@@ -1702,6 +1702,7 @@ def log_warmup(self, prefilling, i, max_i, batch_size, seq_len):
1702
1702
f"{ dim } :{ seq_len } "
1703
1703
f"bypass:{ bypass } "
1704
1704
f"free_mem:{ free_mem } "
1705
+ ", this may take a while..."
1705
1706
)
1706
1707
log_master (logger .info , msg )
1707
1708
@@ -1753,6 +1754,10 @@ def ordering_function_min_tokens(b):
1753
1754
total_batch_seq = 0.001
1754
1755
total_mem = 0
1755
1756
available_mem = prompt_available_memory
1757
+ logger .info (
1758
+ f"Prefill batch size list:{ [bsz [0 ] for bsz in buckets ]} \n "
1759
+ f"Prefill sequence length list:{ [seq [1 ] for seq in buckets ]} \n "
1760
+ )
1756
1761
for i , (batch_size , seq_len ) in enumerate (buckets ):
1757
1762
if batch_size * seq_len > self .max_batch_prefill_tokens :
1758
1763
continue
@@ -1779,6 +1784,8 @@ def ordering_function_min_tokens(b):
1779
1784
total_mem += used_mem
1780
1785
total_batch_seq += batch_seq
1781
1786
1787
+ logger .info ("Prefill warmup successful.\n " )
1788
+
1782
1789
def ordering_function_max_bs (b ):
1783
1790
return (- b [0 ], b [1 ])
1784
1791
@@ -1790,6 +1797,7 @@ def ordering_function_max_bs(b):
1790
1797
total_batch_seq = 0.001
1791
1798
total_mem = 0
1792
1799
available_mem = free_mem - self .mem_reserved
1800
+ logger .info (f"Decode batch size list:{ [bsz [0 ] for bsz in buckets ]} \n " )
1793
1801
for i , (batch_size , block_num ) in enumerate (buckets ):
1794
1802
if batch_size > block_num :
1795
1803
continue
@@ -1814,6 +1822,8 @@ def ordering_function_max_bs(b):
1814
1822
total_mem += used_mem
1815
1823
total_batch_seq += batch_seq
1816
1824
1825
+ logger .info ("Decode warmup successful.\n " )
1826
+
1817
1827
log_master (
1818
1828
logger .info ,
1819
1829
f"warmup hpu graph time { int (time .time () - start_time )} s warmup shape count { warmup_shape_count } " ,
0 commit comments