Source code for omnipath._core.requests._complexes

from typing import Any, Union, Mapping, Iterable, Optional
import logging

import pandas as pd

from omnipath._core.query import QueryType
from omnipath._core.requests._request import OrganismGenesymbolsRemover
from omnipath.constants._pkg_constants import final


[docs]@final class Complexes(OrganismGenesymbolsRemover): """Request information about protein complexes from [OmniPath]_.""" __string__ = frozenset( { "name", "components", "components_genesymbols", "stoichiometry", "references", "identifiers", } ) __categorical__ = frozenset({"sources"}) _query_type = QueryType.COMPLEXES def _resource_filter(self, data: Mapping[str, Any], **_) -> bool: return True
[docs] @classmethod def complex_genes( cls, genes: Union[str, Iterable[str]], complexes: Optional[pd.DataFrame] = None, total_match: bool = False, ) -> pd.DataFrame: """ Get all the molecular complexes for a given ``genes``. This function returns all the molecular complexes where an input set of genes participate. User can choose to retrieve every complex where any of the input genes participate or just retrieve these complexes where all the genes in input set participate together. Parameters ---------- genes The genes for which complexes will be retrieved (hgnc format). complexes Complex data from :meth:`get`. If `None`, new request will be made. total_match If `True`, get only complexes where all the genes participate together, otherwise get complexes where any of the genes participate. Returns ------- :class:`pandas.DataFrame` The filtered ``complexes``. """ if isinstance(genes, str): genes = (genes,) genes = tuple(set(genes)) if not len(genes): raise ValueError("No genes have been selected.") if complexes is None: logging.info("Fetching complexes from the server") complexes = cls.get() if not isinstance(complexes, pd.DataFrame): raise TypeError( f"Expected `complexes` to be of type `pandas.DataFrame`, found `{type(complexes)}`." ) if complexes.empty: logging.warning("Complexes are empty") return complexes col = "components_genesymbols" if col not in complexes: raise KeyError(f"Unable to find `{col}` in `{complexes.columns}`.") reduction = all if total_match else any return complexes.loc[ complexes[col] .str.split("_") .apply(lambda needles: reduction(n in genes for n in needles)) ].reset_index(drop=True)
__all__ = [Complexes]