Skip to content

Commit f13e28c

Browse files
authored
[gaudi] Refine logging for Gaudi warmup (#3222)
* Refine logging for Gaudi warmup * Make style * Make style 2 * Flash causal LM case * Add log_master & VLM cases * Black
1 parent b4d17f1 commit f13e28c

File tree

10 files changed

+37
-7
lines changed

10 files changed

+37
-7
lines changed

backends/gaudi/server/text_generation_server/models/custom_modeling/flash_llava_next.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
""" PyTorch Llava-NeXT model."""
15+
"""PyTorch Llava-NeXT model."""
1616

1717
from typing import List, Optional, Tuple
1818

backends/gaudi/server/text_generation_server/models/custom_modeling/idefics2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
""" PyTorch Idefics2 model."""
15+
"""PyTorch Idefics2 model."""
1616

1717
from typing import List, Optional, Tuple
1818

backends/gaudi/server/text_generation_server/models/custom_modeling/idefics3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
""" PyTorch Idefics3 model."""
15+
"""PyTorch Idefics3 model."""
1616

1717
from typing import List, Optional, Tuple
1818

backends/gaudi/server/text_generation_server/models/custom_modeling/idefics_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1818
# See the License for the specific language governing permissions and
1919
# limitations under the License.
20-
""" Idefics model configuration"""
20+
"""Idefics model configuration"""
2121
import copy
2222

2323
from transformers import PretrainedConfig

backends/gaudi/server/text_generation_server/models/custom_modeling/idefics_modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1818
# See the License for the specific language governing permissions and
1919
# limitations under the License.
20-
""" PyTorch Idefics model."""
20+
"""PyTorch Idefics model."""
2121
from typing import List, Optional, Tuple, Union
2222

2323
import torch

backends/gaudi/server/text_generation_server/models/custom_modeling/idefics_vision.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
""" PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object"""
15+
"""PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object"""
1616

1717

1818
from dataclasses import dataclass

backends/gaudi/server/text_generation_server/models/flash_causal_lm.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,6 +1721,7 @@ def log_warmup(self, prefilling, i, max_i, batch_size, seq_len):
17211721
f"{dim}:{seq_len} "
17221722
f"bypass:{bypass} "
17231723
f"free_mem:{free_mem}"
1724+
", this may take a while..."
17241725
)
17251726
log_master(logger.info, msg)
17261727

@@ -1772,6 +1773,11 @@ def ordering_function_min_tokens(b):
17721773
total_batch_seq = 0.001
17731774
total_mem = 0
17741775
available_mem = prompt_available_memory
1776+
msg = (
1777+
f"Prefill batch size list:{[bsz[0] for bsz in buckets]}\n"
1778+
f"Prefill sequence length list:{[seq[1] for seq in buckets]}\n"
1779+
)
1780+
log_master(logger.info, msg)
17751781
for i, (batch_size, seq_len) in enumerate(buckets):
17761782
if batch_size * seq_len > self.max_batch_prefill_tokens:
17771783
continue
@@ -1798,6 +1804,8 @@ def ordering_function_min_tokens(b):
17981804
total_mem += used_mem
17991805
total_batch_seq += batch_seq
18001806

1807+
log_master(logger.info, "Prefill warmup successful.\n")
1808+
18011809
def ordering_function_max_bs(b):
18021810
return (-b[0], b[1])
18031811

@@ -1809,6 +1817,9 @@ def ordering_function_max_bs(b):
18091817
total_batch_seq = 0.001
18101818
total_mem = 0
18111819
available_mem = free_mem - self.mem_reserved
1820+
log_master(
1821+
logger.info, f"Decode batch size list:{[bsz[0] for bsz in buckets]}\n"
1822+
)
18121823
for i, (batch_size, block_num) in enumerate(buckets):
18131824
if batch_size > block_num:
18141825
continue
@@ -1833,6 +1844,8 @@ def ordering_function_max_bs(b):
18331844
total_mem += used_mem
18341845
total_batch_seq += batch_seq
18351846

1847+
log_master(logger.info, "Decode warmup successful.\n")
1848+
18361849
log_master(
18371850
logger.info,
18381851
f"warmup hpu graph time {int(time.time() - start_time)}s warmup shape count {warmup_shape_count}",

backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,9 @@ def ordering_function_max_bs(b):
822822
total_batch_seq = 0.001
823823
total_mem = 0
824824
available_mem = decode_available_memory
825+
log_master(
826+
logger.info, f"Decode batch size list:{[bsz[0] for bsz in buckets]}\n"
827+
)
825828
for i, (batch_size, block_num) in enumerate(buckets):
826829
if batch_size > block_num:
827830
continue
@@ -847,6 +850,8 @@ def ordering_function_max_bs(b):
847850
total_mem += used_mem
848851
total_batch_seq += batch_seq
849852

853+
log_master(logger.info, "Decode warmup successful.\n")
854+
850855
log_master(
851856
logger.info,
852857
f"warmup hpu graph time {int(time.time() - start_time)}s warmup shape count {warmup_shape_count}",

backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,11 @@ def ordering_function_min_tokens(b):
398398
total_batch_seq = 0.001
399399
total_mem = 0
400400
available_mem = prompt_available_memory
401+
msg = (
402+
f"Prefill batch size list:{[bsz[0] for bsz in buckets]}\n"
403+
f"Prefill sequence length list:{[seq[1] for seq in buckets]}\n"
404+
)
405+
log_master(logger.info, msg)
401406
for i, (batch_size, seq_len) in enumerate(buckets):
402407
if batch_size * seq_len > self.max_batch_prefill_tokens:
403408
continue
@@ -424,6 +429,8 @@ def ordering_function_min_tokens(b):
424429
total_mem += used_mem
425430
total_batch_seq += batch_seq
426431

432+
log_master(logger.info, "Prefill warmup successful.\n")
433+
427434
def ordering_function_max_bs(b):
428435
return (-b[0], b[1])
429436

@@ -435,6 +442,9 @@ def ordering_function_max_bs(b):
435442
total_batch_seq = 0.001
436443
total_mem = 0
437444
available_mem = free_mem - self.mem_reserved
445+
log_master(
446+
logger.info, f"Decode batch size list:{[bsz[0] for bsz in buckets]}\n"
447+
)
438448
for i, (batch_size, block_num) in enumerate(buckets):
439449
if batch_size > block_num:
440450
continue
@@ -459,6 +469,8 @@ def ordering_function_max_bs(b):
459469
total_mem += used_mem
460470
total_batch_seq += batch_seq
461471

472+
log_master(logger.info, "Decode warmup successful.\n")
473+
462474
log_master(
463475
logger.info,
464476
f"warmup hpu graph time {int(time.time() - start_time)}s warmup shape count {warmup_shape_count}",

backends/gaudi/server/text_generation_server/utils/segments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def find_segments(
11-
adapter_indices: Union[torch.Tensor, List[int]]
11+
adapter_indices: Union[torch.Tensor, List[int]],
1212
) -> Tuple[List[int], List[int]]:
1313
segments = [0]
1414
segment_indices = []

0 commit comments

Comments
 (0)