Source code for omnipath._core.cache._cache

from abc import ABC, abstractmethod
from copy import copy
from shutil import rmtree
from typing import Any, Union, Optional
from pathlib import Path
import os
import pickle

import pandas as pd


def _is_empty(data: Optional[pd.DataFrame]) -> bool:
    return data is None or (isinstance(data, pd.DataFrame) and not len(data))


class Cache(ABC):
    """
    Abstract class which defines the caching interface.

    Empty values (`None` or an empty :class:`pandas.DataFrame`) will not be saved in the cache.
    """

    @abstractmethod
    def __getitem__(self, key: str) -> Optional[Any]:
        pass

    @abstractmethod
    def __setitem__(self, key: str, value: Any) -> None:
        pass

    @abstractmethod
    def __len__(self) -> int:
        pass

    @abstractmethod
    def clear(self) -> None:  # noqa: D102
        pass

    @property
    @abstractmethod
    def path(self) -> Optional[Union[str, Path]]:  # noqa: D102
        pass

    @abstractmethod
    def __str__(self) -> str:
        pass

    def __repr__(self) -> str:
        return str(self)


class FileCache(Cache):
    """
    Cache which persists the data into :mod:`pickle` files.

    Parameters
    ----------
    path
        Path to a directory where the files will be stored.
    """

    _suffix = ".pickle"

    def __init__(self, path: Union[str, Path]):
        if not isinstance(path, (str, Path)):
            raise TypeError(
                f"Expected `path` to be either `str` or `pathlib.Path`, "
                f"found `{type(path).__name__}`."
            )
        if not str(path):
            raise ValueError("Empty cache path.")

        self._cache_dir = Path(path)

    def __contains__(self, key: str) -> bool:
        if not key.endswith(self._suffix):
            key += self._suffix

        return (self._cache_dir / key).is_file()

    def __setitem__(self, key: str, value: Any) -> None:
        if _is_empty(value):
            return
        self._cache_dir.mkdir(parents=True, exist_ok=True)

        fname = str(key)
        if not fname.endswith(self._suffix):
            fname += self._suffix

        with open(self._cache_dir / fname, "wb") as fout:
            pickle.dump(value, fout)

    def __getitem__(self, key: str) -> Any:
        if not key.endswith(self._suffix):
            key += self._suffix

        if not (self._cache_dir / key).is_file():
            raise KeyError(self._cache_dir / key)

        with open(self._cache_dir / key, "rb") as fin:
            return pickle.load(fin)

    def __len__(self) -> int:
        return (
            len([f for f in os.listdir(self.path) if str(f).endswith(self._suffix)])
            if self.path.is_dir()
            else 0
        )

    @property
    def path(self) -> Path:
        """Return the directory where the cache files are stored."""
        return self._cache_dir

    def clear(self) -> None:
        """Remove all files and the directory under :attr:`path`."""
        if self._cache_dir.is_dir():
            rmtree(self._cache_dir)

    def __str__(self) -> str:
        return f"<{self.__class__.__name__}[size={len(self)}, path={str(self.path)!r}]>"


class MemoryCache(dict, Cache):
    """
    Cache which persists the data into the memory.

    Objects stored in the cache are copied using :func:`copy.copy``.
    """

    @property
    def path(self) -> Optional[str]:
        """Return `'memory'`."""
        return "memory"

    def __setitem__(self, key: str, value: Any) -> None:
        if _is_empty(value):
            return
        # the value is usually a dataframe (copy for safety)
        return super().__setitem__(key, copy(value))

    def __getitem__(self, key: str) -> Any:
        return copy(super().__getitem__(key))

    def __str__(self) -> str:
        return f"<{self.__class__.__name__}[size={len(self)}]>"

    def __repr__(self) -> str:
        return str(self)

    def __copy__(self) -> "MemoryCache":
        return self

    def copy(self) -> "MemoryCache":
        """Return self."""
        return self


class NoopCache(MemoryCache):
    """Cache which doesn't save anything."""

    @property
    def path(self) -> Optional[str]:
        """Return `None`."""
        return None

    def __setitem__(self, key: str, value: Any) -> None:
        pass

    def __str__(self):
        return f"<{self.__class__.__name__}>"


[docs]def clear_cache() -> None: """Remove all cached data from :attr:`omnipath.options.cache`.""" from omnipath import options options.cache.clear()
__all__ = [clear_cache]