Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
Union,
)

from cachetools import LRUCache, cached
from cachetools import LRUCache
from cachetools.keys import hashkey
from pydantic_core import to_json

Expand Down Expand Up @@ -875,11 +875,49 @@ def __hash__(self) -> int:
return hash(self.manifest_path)


@cached(cache=LRUCache(maxsize=128), key=lambda io, manifest_list: hashkey(manifest_list))
class ManifestCache:
def __init__(self, maxsize: int = 1):
self._cache: LRUCache = LRUCache(maxsize=maxsize) # type: ignore

def _manifests(self, io: FileIO, manifest_list: str) -> Tuple[ManifestFile, ...]:
"""Read and cache manifests from the given manifest list, returning a tuple to prevent modification."""
cache_key = hashkey(manifest_list)

if cache_key in self._cache:
return self._cache[cache_key]

file = io.new_input(manifest_list)
result = tuple(read_manifest_list(file))
self._cache[cache_key] = result

return result

def __del__(self) -> None:
"""Clear the cache when the instance is deleted."""
if hasattr(self, "_cache"):
self._cache.clear()


# Global instance that can be garbage collected
_manifest_cache_instance = ManifestCache(maxsize=1)


def _manifests(io: FileIO, manifest_list: str) -> Tuple[ManifestFile, ...]:
"""Read and cache manifests from the given manifest list, returning a tuple to prevent modification."""
file = io.new_input(manifest_list)
return tuple(read_manifest_list(file))
return _manifest_cache_instance._manifests(io, manifest_list)


# Add the cache attribute to maintain the same interface
_manifests.cache = _manifest_cache_instance._cache # type: ignore


# Add cache_clear method to maintain the same interface
def _manifests_cache_clear() -> None:
"""Clear the manifest cache."""
_manifest_cache_instance._cache.clear()


_manifests.cache_clear = _manifests_cache_clear # type: ignore


def read_manifest_list(input_file: InputFile) -> Iterator[ManifestFile]:
Expand Down