from abc import ABC, abstractmethod
from typing import Any, Set, Dict, Tuple, Union, Mapping, Iterable, Optional, Sequence
import pandas as pd
from omnipath.constants import InteractionDataset
from omnipath._core.query import QueryType
from omnipath._core.utils._docs import d
from omnipath._core.requests._utils import _inject_params
from omnipath._core.requests._request import GraphLike, CommonPostProcessor
from omnipath.constants._pkg_constants import Key, final
from omnipath._core.requests.interactions._json import _json_cols_hook
from omnipath._core.requests.interactions._evidences import only_from
Datasets_t = Union[str, InteractionDataset, Sequence[str], Sequence[InteractionDataset]]
def _to_dataset_set(
datasets, name: str, none_value: Iterable[InteractionDataset]
) -> Set[InteractionDataset]:
if isinstance(datasets, (str, InteractionDataset)):
datasets = (datasets,)
elif datasets is None:
datasets = none_value
if not isinstance(datasets, Iterable):
raise TypeError(
f"Expected `{name}` to be an `Iterable`, found `{type(datasets).__name__}`."
)
return {InteractionDataset(d) for d in datasets}
@d.dedent
class InteractionRequest(CommonPostProcessor, GraphLike, ABC):
"""
Base class for retrieving interactions from [OmniPath]_.
Parameters
----------
datasets
Name of interaction datasets to download. If `None`, download all datasets, after removing the ones in
``exclude``. Can be one or more of the following:
%(interaction_datasets)s
exclude
Interaction datasets to exclude. Only used when ``datasets = None``.
"""
__string__ = frozenset({"source", "target", "dip_url"})
__logical__ = frozenset(
{
"is_directed",
"is_stimulation",
"is_inhibition",
"consensus_direction",
"consensus_stimulation",
"consensus_inhibition",
}
)
_query_type = QueryType.INTERACTIONS
def __init__(
self,
datasets: Optional[Datasets_t] = None,
exclude: Optional[Datasets_t] = None,
):
super().__init__()
datasets = _to_dataset_set(datasets, "datasets", set(InteractionDataset))
exclude = _to_dataset_set(exclude, "exclude", set())
datasets = datasets - exclude
if not len(datasets):
raise ValueError(
f"After excluding `{len(exclude)}` datasets, none were left."
)
self._datasets = datasets
def _modify_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._modify_params(params)
params[self._query_type("datasets").param] = self._datasets
return params
@classmethod
@abstractmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
params.pop(cls._query_type(Key.DATASETS.s).param, None)
return params
@classmethod
@d.dedent
def params(cls) -> Dict[str, Any]:
"""%(query_params)s"""
params = super().params()
return cls._filter_params(params)
@classmethod
def _get_source_target_cols(cls, data: pd.DataFrame) -> Tuple[str, str]:
source = "source_genesymbol" if "source_genesymbol" in data else "source"
target = "target_genesymbol" if "target_genesymbol" in data else "target"
return source, target
def _resources(self, **_) -> Tuple[str]:
return super()._resources(**{Key.DATASETS.s: self._datasets})
def _resource_filter(
self,
data: Mapping[str, Any],
datasets: Optional[Sequence[InteractionDataset]] = None,
) -> bool:
res = datasets is None or (
{InteractionDataset(d) for d in data.get(Key.DATASETS.s, {})}
& {InteractionDataset(d) for d in datasets}
)
return res
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame:
df = super()._post_process(df)
df = _json_cols_hook(df)
original_param = self._last_param["original"]
strict_evidences = self._get_strict_evidences(original_param)
if strict_evidences:
datasets = {ds.value for ds in self._datasets}
resources = self._last_param["final"].get("resources", ())
df = only_from(df, datasets=datasets, resources=resources)
fields_requested = original_param.get("fields", ())
if "evidences" not in fields_requested:
df.drop(columns="evidences", inplace=True)
return df
class CommonParamFilter(InteractionRequest, ABC):
"""Filter which tries to remove some common invalid parameters from many interaction queries."""
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._filter_params(params)
for key in (
"dorothea_levels",
"dorothea_methods",
"tfregulons_levels",
"tfregulons_methods",
):
try:
# catch the ValueError if not a valid key anymore
params.pop(cls._query_type(key).param)
except (KeyError, ValueError):
pass
return params
[docs]@final
class Dorothea(InteractionRequest):
"""
Request interactions from the `dorothea` dataset.
Imports the `dataset <https://omnipathdb.org/interactions?datasets=dorothea>`__ which contains transcription factor
(TF)-target interactions from `DoRothEA <https://github.com/saezlab/DoRothEA>`__.
"""
_strict_evidences = True
def __init__(self):
super().__init__(InteractionDataset.DOROTHEA)
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._filter_params(params)
params.pop(cls._query_type("tfregulons_levels").param, None)
params.pop(cls._query_type("tfregulons_methods").param, None)
return params
@final
class CollecTRI(InteractionRequest):
"""
Request interactions from the `collectri` dataset.
Imports the `dataset
<https://omnipathdb.org/interactions?datasets=collectri>`__ which contains
transcription factor (TF)-target interactions from
`CollecTRI <https://github.com/saezlab/CollecTRI>`__.
"""
_strict_evidences = True
def __init__(self):
super().__init__(InteractionDataset.COLLECTRI)
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._filter_params(params)
return params
[docs]@final
class TFtarget(InteractionRequest):
"""
Request interactions from the `TF-target` dataset.
Imports the `dataset <https://omnipathdb.org/interactions?datasets=tf_target>`__ which contains transcription
factor-target protein coding gene interactions.
Other TF-target datasets in :mod:`omnipath` are :class:`omnipath.interactions.Dorothea` and
:class:`omnipath.interactions.TFmiRNA` which provides TF-miRNA gene interactions.
"""
def __init__(self):
super().__init__(InteractionDataset.TF_TARGET)
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._filter_params(params)
params.pop(cls._query_type("dorothea_levels").param, None)
params.pop(cls._query_type("dorothea_methods").param, None)
return params
[docs]@final
class Transcriptional(InteractionRequest):
"""
Request all `TF-target` interactions from [OmniPath]_.
Imports the `dataset
<https://omnipathdb.org/interactions?datasets=dorothea,tf_target,collectri>`__
which contains transcription factor-target protein coding gene interactions.
"""
def __init__(self):
super().__init__(
(
InteractionDataset.DOROTHEA,
InteractionDataset.TF_TARGET,
InteractionDataset.COLLECTRI,
)
)
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
return super()._filter_params(params)
[docs]@final
class miRNA(CommonParamFilter):
"""
Request interactions from the `miRNA-target` dataset.
Imports the `dataset <https://omnipathdb.org/interactions?datasets=mirnatarget>`__. which contains
miRNA-mRNA interactions.
"""
def __init__(self):
super().__init__(InteractionDataset.MIRNA_TARGET)
[docs]@final
class TFmiRNA(CommonParamFilter):
"""
Request interactions from the `TF-miRNA` dataset.
Imports the `dataset <https://omnipathdb.org/interactions?datasets=tf_mirna>`__ which contains transcription
factor-miRNA gene interactions.
"""
def __init__(self):
super().__init__(InteractionDataset.TF_MIRNA)
[docs]@final
class lncRNAmRNA(CommonParamFilter):
"""
Request interactions from the `lncRNA-mRNA` dataset.
Imports the `dataset <https://omnipathdb.org/interactions?datasets=lncrna_mrna>`__ which contains
lncRNA-mRNA interactions.
"""
def __init__(self):
super().__init__(InteractionDataset.LNCRNA_MRNA)
[docs]@final
class OmniPath(InteractionRequest):
"""
Request interactions from the `omnipath` dataset.
Imports the `database <https://omnipathdb.org/interactions>`__, which contains only interactions supported by
literature references.
This part of the interaction database was compiled in a similar way as it has been presented in [OmniPath16]_.
"""
__string__ = frozenset({"source", "target", "dip_url"})
__logical__ = frozenset(
{
"is_directed",
"is_stimulation",
"is_inhibition",
"consensus_direction",
"consensus_stimulation",
"consensus_inhibition",
}
)
def __init__(self):
super().__init__(InteractionDataset.OMNIPATH)
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
return super()._filter_params(params)
[docs]@d.get_sections(base="all_ints", sections=["Parameters"])
@d.dedent
class AllInteractions(InteractionRequest):
"""
Request all [OmniPath]_ interaction datasets.
The available interaction datasets are :class:`omnipath.constants.InteractionDataset`.
Parameters
----------
include
Interactions datasets to include from the [OmniPath]_ database. If `None`, include everything.
exclude
Interaction datasets to exclude from the [OmniPath]_ database. If `None`, don't exclude anything.
"""
def __init__(
self,
include: Optional[Datasets_t] = None,
exclude: Optional[Datasets_t] = None,
):
super().__init__(include, exclude=exclude)
def _inject_fields(self, params: Dict[str, Any]) -> Dict[str, Any]:
params = super()._inject_fields(params)
_inject_params(params, key=self._query_type("fields").param, value="type")
return params
@classmethod
def _filter_params(cls, params: Dict[str, Any]) -> Dict[str, Any]:
return super()._filter_params(params)
[docs] @classmethod
@d.dedent
def get(
cls,
include: Optional[Datasets_t] = None,
exclude: Optional[Datasets_t] = None,
**kwargs,
) -> pd.DataFrame:
"""
%(get.full_desc)s
The available interaction datasets are :class:`omnipath.constants.InteractionDataset`.
Parameters
----------
include
Interactions datasets to include from the [OmniPath]_ database. If `None`, include everything.
exclude
Interaction datasets to exclude from the [OmniPath]_ database. If `None`, don't exclude anything.
%(get.parameters)s
Returns
-------
%(get.returns)s
"""
return cls(include, exclude=exclude)._get(**kwargs)
# ideally docrep would be possible, but the metaclass is not perfect and gets rid of the info
[docs]@final
class PostTranslational(AllInteractions):
"""
Request all post-translational interactions from [OmniPath]_ .
Imports the `dataset <https://omnipathdb.org/interactions?datasets=omnipath,pathwayextra,kinaseextra,ligrecextra>`__
which contains post-transcriptional (i.e. protein-protein) interactions. This query requests the interactions from
the following datasets:
- :attr:`omnipath.constants.InteractionDataset.OMNIPATH`
- :attr:`omnipath.constants.InteractionDataset.PATHWAY_EXTRA`
- :attr:`omnipath.constants.InteractionDataset.KINASE_EXTRA`
- :attr:`omnipath.constants.InteractionDataset.LIGREC_EXTRA`
Parameters
----------
exclude
Post-translational interaction datasets to exclude. If `None`, don't exclude anything.
"""
def __init__(self, exclude: Optional[Datasets_t] = None):
super().__init__(
include=[
InteractionDataset.OMNIPATH,
InteractionDataset.PATHWAY_EXTRA,
InteractionDataset.KINASE_EXTRA,
InteractionDataset.LIGREC_EXTRA,
],
exclude=exclude,
)
def _inject_fields(self, params: Dict[str, Any]) -> Dict[str, Any]:
# we don't need the type
return super(InteractionRequest, self)._inject_fields(params)
[docs] @classmethod
@d.dedent
def get(
cls,
exclude: Optional[Datasets_t] = None,
**kwargs,
) -> pd.DataFrame:
"""
%(get.full_desc)s
The available interaction datasets are :class:`omnipath.constants.InteractionDataset`.
Parameters
----------
exclude
Post-translational interaction datasets to exclude. If `None`, don't exclude anything.
%(get.parameters)s
Returns
-------
%(get.returns)s
"""
return cls(exclude=exclude)._get(**kwargs)
__all__ = [
CollecTRI,
Dorothea,
OmniPath,
TFtarget,
KinaseExtra,
LigRecExtra,
PathwayExtra,
AllInteractions,
Transcriptional,
TFmiRNA,
miRNA,
lncRNAmRNA,
PostTranslational,
]