Skip to content

Commit 564c9e1

Browse files
committed
Flash causal LM case
1 parent 2ba396c commit 564c9e1

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

backends/gaudi/server/text_generation_server/models/flash_causal_lm.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,7 @@ def log_warmup(self, prefilling, i, max_i, batch_size, seq_len):
17021702
f"{dim}:{seq_len} "
17031703
f"bypass:{bypass} "
17041704
f"free_mem:{free_mem}"
1705+
", this may take a while..."
17051706
)
17061707
log_master(logger.info, msg)
17071708

@@ -1753,6 +1754,10 @@ def ordering_function_min_tokens(b):
17531754
total_batch_seq = 0.001
17541755
total_mem = 0
17551756
available_mem = prompt_available_memory
1757+
logger.info(
1758+
f"Prefill batch size list:{[bsz[0] for bsz in buckets]}\n"
1759+
f"Prefill sequence length list:{[seq[1] for seq in buckets]}\n"
1760+
)
17561761
for i, (batch_size, seq_len) in enumerate(buckets):
17571762
if batch_size * seq_len > self.max_batch_prefill_tokens:
17581763
continue
@@ -1779,6 +1784,8 @@ def ordering_function_min_tokens(b):
17791784
total_mem += used_mem
17801785
total_batch_seq += batch_seq
17811786

1787+
logger.info("Prefill warmup successful.\n")
1788+
17821789
def ordering_function_max_bs(b):
17831790
return (-b[0], b[1])
17841791

@@ -1790,6 +1797,7 @@ def ordering_function_max_bs(b):
17901797
total_batch_seq = 0.001
17911798
total_mem = 0
17921799
available_mem = free_mem - self.mem_reserved
1800+
logger.info(f"Decode batch size list:{[bsz[0] for bsz in buckets]}\n")
17931801
for i, (batch_size, block_num) in enumerate(buckets):
17941802
if batch_size > block_num:
17951803
continue
@@ -1814,6 +1822,8 @@ def ordering_function_max_bs(b):
18141822
total_mem += used_mem
18151823
total_batch_seq += batch_seq
18161824

1825+
logger.info("Decode warmup successful.\n")
1826+
18171827
log_master(
18181828
logger.info,
18191829
f"warmup hpu graph time {int(time.time() - start_time)}s warmup shape count {warmup_shape_count}",

0 commit comments

Comments
 (0)