from io import BytesIO
from abc import ABC, ABCMeta, abstractmethod
from enum import Enum
from typing import (
Any,
Dict,
Tuple,
Union,
Mapping,
Callable,
Iterable,
Optional,
Sequence,
)
from operator import itemgetter
from functools import wraps, partial
import logging
from pandas.api.types import is_float_dtype, is_numeric_dtype
import pandas as pd
from omnipath import options
from omnipath.constants import License, Organism
from omnipath._core.query import QueryType
from omnipath._core.utils._docs import d
from omnipath._core.requests._utils import (
_ERROR_EMPTY_FMT,
_inject_params,
_count_resources,
_count_references,
_inject_api_method,
_strip_resource_label_df,
)
from omnipath.constants._pkg_constants import DEFAULT_FIELD, Key, Format, final
from omnipath._core.downloader._downloader import Downloader
def _error_handler(callback: Callable[[BytesIO], Any]) -> Callable:
@wraps(callback)
def wrapper(cls, *args, **kwargs) -> pd.DataFrame:
res: pd.DataFrame = callback(*args, **kwargs)
if len(res.columns) == 1 and res.columns == ["Something is not entirely good:"]:
raise RuntimeError(" ".join(res.iloc[:, 0]))
return res
return wrapper
class OmnipathRequestMeta(ABCMeta): # noqa: D101
def __new__(cls, clsname, superclasses, attributedict): # noqa: D102
for supercls in superclasses:
for attr in ("__string__", "__logical__", "__categorical__"):
attributedict[attr] = attributedict.get(attr, frozenset()) | getattr(
supercls, attr, frozenset()
)
clazz = super().__new__(cls, clsname, superclasses, attributedict)
_inject_api_method(clazz)
return clazz
class OmnipathRequestABC(ABC, metaclass=OmnipathRequestMeta):
"""Base class for all :mod:`omnipath` requests."""
__string__ = frozenset({"uniprot", "genesymbol"})
__logical__ = frozenset()
__categorical__ = frozenset()
_json_reader = _error_handler(partial(pd.read_json, typ="frame"))
_tsv_reader = _error_handler(
partial(pd.read_csv, sep="\t", header=0, low_memory=False)
)
_query_type: Optional[QueryType] = None
def __init__(self):
self._downloader = Downloader(options)
@classmethod
@d.dedent
def resources(cls, **kwargs) -> Tuple[str]:
"""%(query_resources)s"""
return cls()._resources(**kwargs)
@classmethod
@d.dedent
def params(cls) -> Dict[str, Any]:
"""%(query_params)s"""
return {q.param: q.valid for q in cls._query_type.value}
@classmethod
def _annotations(cls) -> Dict[str, type]:
"""Return the type annotation for the query parameters."""
return {q.param: q.annotation for q in cls._query_type.value}
@classmethod
def _docs(cls) -> Dict[str, Optional[str]]:
"""Return the type annotation for the query parameters."""
return {q.param: q.doc for q in cls._query_type.value}
def _get(self, **kwargs) -> pd.DataFrame:
self._last_param = {}
self._last_param["original"] = kwargs.copy()
kwargs = self._modify_params(kwargs)
kwargs = self._inject_fields(kwargs)
kwargs, callback = self._convert_params(kwargs)
kwargs = self._validate_params(kwargs)
kwargs = self._finalize_params(kwargs)
self._last_param["final"] = kwargs.copy()
res = self._downloader.maybe_download(
self._query_type.endpoint, params=kwargs, callback=callback, is_final=False
)
if self._downloader._options.convert_dtypes:
res = self._convert_dtypes(res)
res = self._post_process(res)
self._last_param = {}
return res
def _convert_params(
self, params: Dict[str, Any]
) -> Tuple[Dict[str, Any], Callable]:
organism = params.pop("organism", params.pop("organisms", None))
if organism is not None:
organism = Organism(organism)
try:
params[self._query_type("organism").param] = organism.code
except ValueError:
pass
# check the requested format
fmt = params.pop("format", params.pop("formats", None))
fmt = Format(Format.TSV if fmt is None else fmt)
if fmt not in (Format.TSV, Format.JSON):
logging.warning(
f"Invalid `{Key.FORMAT.s}={fmt.s!r}`. Using `{Key.FORMAT.s}={Format.TSV.s!r}`"
)
fmt = Format.TSV
callback = self._tsv_reader if fmt == Format.TSV else self._json_reader
try:
params[self._query_type("format").param] = fmt.s
except ValueError:
pass
# check the license
license = params.pop(
"license", params.pop("licenses", self._downloader._options.license)
)
if license is not None:
license = License(license)
try:
params[self._query_type("license").param] = license
except ValueError:
pass
if self._downloader._options.password is not None:
params.setdefault(Key.PASSWORD.s, self._downloader._options.password)
return params, callback
def _inject_fields(self, params: Dict[str, Any]) -> Dict[str, Any]:
try:
requested = params.get("fields", [])
defaults = getattr(DEFAULT_FIELD, self._query_type.name).value
if self._get_strict_evidences(params) and "evidences" not in requested:
defaults += ("evidences",)
params.pop("strict_evidences", None)
_inject_params(
params,
key=self._query_type(Key.FIELDS.value).param,
value=defaults,
)
except AttributeError:
# no default field for this query
pass
except Exception as e:
logging.warning(
f"Unable to inject `{Key.FIELDS.value}` for `{self}`. Reason: `{e}`"
)
return params
def _validate_params(
self, params: Dict[str, Any]
) -> Dict[str, Optional[Union[str, Sequence[str]]]]:
"""For each passed parameter, validate if it has the correct value."""
res = {}
for k, v in params.items():
# first get the validator for the parameter, then validate
res[self._query_type(k).param] = self._query_type(k)(v)
return res
def _finalize_params(self, params: Dict[str, Any]) -> Dict[str, str]:
"""Convert all the parameters to strings."""
# this is largely redundant
res = {}
for k, v in params.items():
if isinstance(v, str):
res[k] = v
elif isinstance(v, bool):
res[k] = str(int(v))
elif isinstance(v, (int, float)):
res[k] = str(v)
elif isinstance(v, Iterable):
res[k] = ",".join(sorted(v))
elif isinstance(v, Enum):
res[k] = str(v.value)
elif v is not None:
logging.warning(f"Unable to process parameter `{k}={v}`. Ignoring")
return dict(sorted(res.items(), key=itemgetter(0)))
def _convert_dtypes(self, res: pd.DataFrame, **_) -> pd.DataFrame:
"""Automatically convert dtypes for this type of query."""
def to_logical(col: pd.Series) -> pd.Series:
if is_numeric_dtype(col):
return col > 0
return col.astype(str).str.lower().isin(("y", "t", "yes", "true", "1"))
def handle_logical(df: pd.DataFrame, columns: frozenset) -> None:
cols = list(frozenset(df.columns) & columns)
if cols:
df[cols] = df[cols].apply(to_logical)
def handle_categorical(df: pd.DataFrame, columns: frozenset) -> None:
cols = frozenset(df.columns) & columns
cols = [
col
for col, dtype in zip(cols, df[cols].dtypes)
if not is_float_dtype(dtype)
]
if cols:
df[cols] = df[cols].astype("category")
def handle_string(df: pd.DataFrame, columns: frozenset) -> None:
for col in frozenset(df.columns) & columns:
mask = pd.isnull(df[col])
df[col] = df[col].astype(str)
df.loc[mask, col] = None
if not isinstance(res, pd.DataFrame):
raise TypeError(
f"Expected the result to be of type `pandas.DataFrame`, found `{type(res).__name__}`."
)
handle_logical(res, self.__logical__)
handle_categorical(res, self.__categorical__)
handle_string(res, self.__string__)
return res
def _resources(self, **kwargs) -> Tuple[str]:
"""
Return available resources for this type of query.
Parameters
----------
**kwargs
Keyword arguments used for filtering unwanted resources.
Returns
-------
tuple
Unique and sorted resources.
"""
return tuple(
sorted(
res
for res, params in self._downloader.resources.items()
if self._query_type.endpoint in params.get(Key.QUERIES.s, {})
and self._resource_filter(
params[Key.QUERIES.s][self._query_type.endpoint], **kwargs
)
)
)
def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""
Remove parameters from this query.
Parameters
----------
params
The parameters to filter.
Returns
-------
:class:`dict`
The filtered parameters.
"""
return params
@abstractmethod
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Post process the result, e.g. by adding extra columns.
df
The result from :meth:`get`.
Returns
-------
:class:`pandas.DataFrame`
The maybe modified result.
"""
pass
@abstractmethod
def _resource_filter(self, data: Mapping[str, Any], **kwargs) -> bool:
"""
Filter out resources relevant to this query.
Parameters
----------
data
Data which is used as a basis for the filtering.
kwargs
Additional keyword arguments.
Returns
--------
bool
`True` if the resource should be included, otherwise `False`.
"""
pass
@classmethod
def _get_strict_evidences(cls, params: Dict[str, Any]) -> bool:
strict_evidences = params.get("strict_evidences", None)
if strict_evidences is None:
strict_evidences = getattr(cls, "_strict_evidences", False)
return strict_evidences
def __str__(self) -> str:
return f"<{self.__class__.__name__}>"
def __repr__(self) -> str:
return str(self)
class CommonPostProcessor(OmnipathRequestABC, ABC):
"""
Class that implements common post-processing steps for :class:`omnipath.requests.Enzsub`, \
:class:`omnipath.requests.Intercell`, :class:`omnipath.requests.Complexes` and \
:class`omnipath.interactions.InteractionRequest`.
This class remove `'genesymbols'` and `'organisms'` from the query parameters, as well as optionally adds
number of resources and references to the result or removes the resource labels from references (PubMed IDs)
:class:`omnipath.interactions.InteractionRequest` and :class:`omnipath.requests.Enzsub`.
"""
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Add number of resources and references for each row in the resulting ``df``.
Parameters
----------
df
The dataframe containing results.
Returns
-------
The modified dataframe.
"""
_count_resources(df)
_count_references(df)
_strip_resource_label_df(df, col="references")
return df
class OrganismGenesymbolsRemover(CommonPostProcessor, ABC):
"""Class that removes organism and genesymbols keys from the query."""
def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
params.pop(Key.ORGANISM.s, None)
params.pop(Key.GENESYMBOLS.s, None)
return params
@classmethod
@d.dedent
def params(cls) -> Dict[str, Any]:
"""%(query_params)s"""
params = super().params()
params.pop(Key.ORGANISM.s, None)
params.pop(Key.GENESYMBOLS.s, None)
return params
class GraphLike(ABC):
"""
Class that is able to construct a graph.
Should be injected with any class with :meth:`get`.
"""
@classmethod
@abstractmethod
def _get_source_target_cols(cls, data: pd.DataFrame) -> Tuple[str, str]:
pass
@classmethod
def graph(cls, data: Optional[pd.DataFrame] = None, **kwargs):
"""
Create a graph.
Parameters
----------
data
The interaction data. If `None`, create a new request.
kwargs
Keyword arguments for :meth:`get` if ``data = None``.
Returns
-------
:class:`networkx.DiGraph`
The interaction graph.
"""
try:
import networkx as nx
except ImportError:
raise ImportError(
"Unable to import `networkx`. Please install it as `pip install networkx`."
) from None
data = cls.get(**kwargs) if data is None else data
if not isinstance(data, pd.DataFrame):
raise TypeError(
f"Expected `data` to be of type `pandas.DataFrame`, found `{type(data).__name__}`."
)
if data.empty:
raise ValueError(_ERROR_EMPTY_FMT.format(obj="data"))
source, target = cls._get_source_target_cols(data)
G = nx.from_pandas_edgelist(
data,
source=source,
target=target,
edge_attr=tuple(data.columns.difference([source, target])),
create_using=nx.DiGraph,
)
for s, t, attr in G.edges(data=True):
for col in ["references", "references_stripped", "sources"]:
if col in data:
if ";" in str(attr[col]):
G.edges[s, t][col] = sorted(str(attr[col]).split(";"))
return G
[docs]class Enzsub(CommonPostProcessor):
"""
Request enzyme-substrate relationships from [OmniPath]_.
Imports the enzyme-substrate (more exactly, enzyme-PTM) relationships `database <https://omnipathdb.org/enzsub>`__.
"""
__string__ = frozenset({"enzyme", "substrate"})
__categorical__ = frozenset({"residue_type", "modification"})
_query_type = QueryType.ENZSUB
def _resource_filter(self, data: Mapping[str, Any], **_) -> bool:
return True
[docs]@final
class SignedPTMs(Enzsub, GraphLike):
"""
Request enzyme-substrate relationships and interactions from [OmniPath]_.
PTM data does not contain sign (activation/inhibition), we generate this information based on the
interaction network.
"""
@classmethod
def _get_source_target_cols(cls, data: pd.DataFrame) -> Tuple[str, str]:
source = "enzyme_genesymbol" if "enzyme_genesymbol" in data else "enzyme"
target = (
"substrate_genesymbol" if "substrate_genesymbol" in data else "substrate"
)
return source, target
[docs] @classmethod
def get(
cls,
ptms: Optional[pd.DataFrame] = None,
interactions: Optional[pd.DataFrame] = None,
) -> pd.DataFrame:
"""
Get signs for enzyme-PTM interactions.
Parameters
----------
ptms
Data generated by :meth:`omnipath.requests.Enzsub.get`. If `None`, a new request will be performed.
interactions
Data generated by :meth:`omnipath.interactions.OmniPath.get`. If `None`, a new request will be performed.
Returns
-------
:class:`pandas.DataFrame`
The signed PTMs with columns **'is_inhibition'** and **'is_stimulation'**.
"""
from omnipath.requests import Enzsub
from omnipath.interactions import OmniPath
ptms = Enzsub.get() if ptms is None else ptms
interactions = OmniPath.get() if interactions is None else interactions
if not isinstance(ptms, pd.DataFrame):
raise TypeError(
f"Expected `ptms` to be of type `pandas.DataFrame`, found `{type(ptms).__name__}`."
)
if not isinstance(interactions, pd.DataFrame):
raise TypeError(
f"Expected `interactions` to be of type `pandas.DataFrame`, found `{type(ptms).__name__}`."
)
if ptms.empty:
raise ValueError(_ERROR_EMPTY_FMT.format(obj="PTMs"))
if interactions.empty:
raise ValueError(_ERROR_EMPTY_FMT.format(obj="interactions"))
return pd.merge(
ptms,
interactions[["source", "target", "is_stimulation", "is_inhibition"]],
left_on=["enzyme", "substrate"],
right_on=["source", "target"],
how="left",
)
__all__ = [Enzsub, SignedPTMs]