fix may_reinitialize_input_batch bug

wangxiyuan · wangxiyuan · commit a03a107f8e7b · 2025-09-14T16:08:14.000+08:00
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2815,8 +2815,7 @@ def may_reinitialize_input_batch(self,
                 # This is likely Mamba or other non-attention cache,
                 # no splitting.
                 kernel_block_sizes.append([0])
-
-        if block_sizes != [self.cache_config.block_size]:
+        if kernel_block_sizes != [self.cache_config.block_size]:
             assert self.cache_config.cpu_offload_gb == 0, (
                 "Cannot re-initialize the input batch when CPU weight "
                 "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 "  # noqa: E501