Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions python/sglang/srt/mem_cache/hiradix_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,16 +468,18 @@ def check_prefetch_progress(self, req_id: str) -> bool:

# todo: more policies for prefetch progress such as timeout
# the current policy is to prefetch with best effort and terminate when queuing is over
last_host_node, token_ids, host_indices, operation = self.ongoing_prefetch.pop(
req_id
)
last_host_node, token_ids, host_indices, operation = self.ongoing_prefetch[req_id]

if operation.host_indices is None:
# prefetch has not been issued due to insufficient host memory
self.ongoing_prefetch.pop(req_id)
return True

if not self.can_terminate_prefetch(operation):
return False

# Only pop when we're ready to process
self.ongoing_prefetch.pop(req_id)

completed_tokens, hash_value = self.cache_controller.terminate_prefetch(
operation
Expand Down