jpayne@69: from typing import Union, List, Dict, Any jpayne@69: jpayne@69: import requests jpayne@69: jpayne@69: from gprofiler.version import __version__ jpayne@69: jpayne@69: jpayne@69: class GProfiler(): jpayne@69: def __init__(self, user_agent: str = '', base_url: str = None, return_dataframe: bool = False): jpayne@69: ''' jpayne@69: A class representing the g:Profiler toolkit. Contains methods for jpayne@69: querying the g:GOSt, g:Convert, g:Orth and g:SNPense tools. Please see the jpayne@69: g:Profiler web tool (https://biit.cs.ut.ee/gprofiler/) for extensive documentation on all the options to jpayne@69: the methods. jpayne@69: jpayne@69: :param user_agent: the URL used for the g:Profiler service. jpayne@69: :param base_url: the URL used for the g:Profiler service. jpayne@69: :param return_dataframe: if True, query results are presented as pandas DataFrames. jpayne@69: ''' jpayne@69: self.user_agent = 'gprofiler-python {version}/{user_agent}'.format(version=__version__, user_agent=user_agent) jpayne@69: jpayne@69: if base_url is None: jpayne@69: self.base_url = 'https://biit.cs.ut.ee/gprofiler' jpayne@69: else: jpayne@69: self.base_url = base_url jpayne@69: jpayne@69: self.return_dataframe = return_dataframe jpayne@69: if return_dataframe: jpayne@69: self._pandas = self._get_pandas_module() jpayne@69: jpayne@69: self.meta = None jpayne@69: jpayne@69: @staticmethod jpayne@69: def _get_pandas_module(): jpayne@69: is_pandas_module = lambda x: getattr(x, '__name__', '') == 'pandas' jpayne@69: namespace = globals() jpayne@69: if 'pd' in namespace and is_pandas_module(namespace['pd']): jpayne@69: return namespace['pd'] jpayne@69: elif 'pandas' in namespace and is_pandas_module(namespace['pandas']): jpayne@69: return namespace['pandas'] jpayne@69: else: jpayne@69: import importlib jpayne@69: return importlib.import_module('pandas') jpayne@69: jpayne@69: jpayne@69: def __getattr__(self, item): jpayne@69: if item in ['gprofile', 'gorth', 'gconvert']: jpayne@69: raise NotImplementedError('''`{}` has been renamed `{}` and has a new interface jpayne@69: To use the previous version use the command `pip install --upgrade --no-deps --force-reinstall gprofiler-official==0.3.5` jpayne@69: '''.format(item, item[1:])) jpayne@69: raise AttributeError('{} is not an attribute of {}'.format(item, self.__class__.__name__)) jpayne@69: jpayne@69: jpayne@69: jpayne@69: def profile( jpayne@69: self, jpayne@69: query: Union[str, List[str], Dict[str, List[str]]], jpayne@69: organism: str = 'hsapiens', jpayne@69: sources: List[str] = tuple(), jpayne@69: user_threshold: float = 0.05, jpayne@69: all_results: bool = False, jpayne@69: ordered: bool = False, jpayne@69: no_evidences: bool = True, jpayne@69: combined: bool = False, jpayne@69: measure_underrepresentation: bool = False, jpayne@69: no_iea: bool = False, jpayne@69: domain_scope: str = 'annotated', jpayne@69: numeric_namespace: str = '', jpayne@69: significance_threshold_method: str = 'g_SCS', jpayne@69: background: str = None, jpayne@69: jpayne@69: ) -> List[Dict[str, Any]]: jpayne@69: """ jpayne@69: performs functional profiling of gene lists using various kinds of biological evidence. jpayne@69: The tool performs statistical enrichment analysis to find over-representation of information from Gene Ontology terms, jpayne@69: biological pathways, regulatory DNA elements, human disease gene annotations, and protein-protein interaction networks. jpayne@69: jpayne@69: jpayne@69: jpayne@69: :param query: list of genes to profile. For running multiple queries at once, accepts a dictionary of lists as well. jpayne@69: :param organism: Organism id for profiling. For full list see https://biit.cs.ut.ee/gprofiler/page/organism-list jpayne@69: :param sources: List of annotation sources to include in analysis. Defaults to all known. jpayne@69: :param user_threshold: Significance threshold for analysis. jpayne@69: :param all_results: If True, return all analysis results regardless of statistical significance. jpayne@69: :param ordered: If True, considers the order of input query to be significant. See https://biit.cs.ut.ee/gprofiler/page/docs#ordered_gene_lists jpayne@69: :param no_evidences: If False, the results include lists of intersections and evidences for the intersections jpayne@69: :param combined: If True, performs all queries and combines the results into a single table. NB! changes the output format. jpayne@69: :param measure_underrepresentation: if True, performs test for significantly under-represented functional terms. jpayne@69: :param no_iea: If True, excludes electronically annotated Gene Ontology terms before analysis. jpayne@69: :param domain_scope: "known" for using all known genes as background, "annotated" to use all genes annotated for particular datasource. jpayne@69: :param numeric_namespace: name for the numeric namespace to use if there are numeric values in the query. jpayne@69: :param significance_threshold_method: method for multiple correction. "g_SCS"|"bonferroni"|"fdr". https://biit.cs.ut.ee/gprofiler/page/docs#significance_threhshold jpayne@69: :param background: List of genes to use as a statistical background. jpayne@69: :return: jpayne@69: """ jpayne@69: jpayne@69: if background is not None: jpayne@69: domain_scope = 'custom' jpayne@69: jpayne@69: r = requests.post( jpayne@69: '{}/api/gost/profile/'.format(self.base_url.rstrip("/")), jpayne@69: json={ jpayne@69: 'organism': organism, # string, eg "hsapiens" jpayne@69: 'query': query, # whitespace-delimited string or list of strings or object of strings to lists of strings jpayne@69: 'sources': sources, # list of strings, for example: jpayne@69: 'user_threshold': user_threshold, # significance threshold, defaults to 0.05 jpayne@69: 'all_results': all_results, # bool jpayne@69: 'no_evidences': no_evidences, # bool - if set to true, saves on database lookups jpayne@69: 'combined': combined, # bool, set to true for g:Cocoa output jpayne@69: 'measure_underrepresentation': measure_underrepresentation, # bool jpayne@69: 'no_iea': no_iea, # bool jpayne@69: 'numeric_ns': numeric_namespace, # string jpayne@69: 'domain_scope': domain_scope, # string 'known'|'annotated'|'custom' jpayne@69: 'ordered': ordered, # bool, set to true for ordered query jpayne@69: 'significance_threshold_method': significance_threshold_method, # string, "g_SCS"|"bonferroni"|"fdr", "g_SCS"by default jpayne@69: 'background': background if background is not None else '' # string, background name or query string jpayne@69: jpayne@69: } jpayne@69: , headers={'User-Agent': self.user_agent}) jpayne@69: jpayne@69: if r.status_code != 200: jpayne@69: message = '' jpayne@69: try: jpayne@69: message = r.json()['message'] jpayne@69: except: jpayne@69: message = 'query failed with error {}'.format(r.status_code) jpayne@69: raise AssertionError(message) jpayne@69: res = r.json() jpayne@69: jpayne@69: meta = res['meta'] jpayne@69: self.meta = meta jpayne@69: jpayne@69: if not combined: jpayne@69: columns = ['source', jpayne@69: 'native', jpayne@69: 'name', jpayne@69: 'p_value', jpayne@69: 'significant', jpayne@69: 'description', jpayne@69: 'term_size', jpayne@69: 'query_size', jpayne@69: 'intersection_size', jpayne@69: 'effective_domain_size', jpayne@69: 'precision', jpayne@69: 'recall', jpayne@69: 'query', jpayne@69: 'parents'] jpayne@69: if not no_evidences: jpayne@69: columns.append('intersections') jpayne@69: columns.append('evidences') jpayne@69: else: jpayne@69: columns = [ jpayne@69: 'source', jpayne@69: 'native', jpayne@69: 'name', jpayne@69: 'p_values', jpayne@69: 'description', jpayne@69: 'term_size', jpayne@69: 'query_sizes', jpayne@69: 'intersection_sizes', jpayne@69: 'effective_domain_size', jpayne@69: 'parents'] jpayne@69: jpayne@69: queries = (meta['query_metadata']['queries'].keys()) jpayne@69: jpayne@69: if not no_evidences and not combined: jpayne@69: reverse_mappings = {} jpayne@69: for query in queries: jpayne@69: mapping = (meta['genes_metadata']['query'][query]['mapping']) jpayne@69: reverse_mapping = {} jpayne@69: for k, v in mapping.items(): jpayne@69: if len(v) == 1: jpayne@69: # one-to-one mapping jpayne@69: reverse_mapping[v[0]] = k jpayne@69: else: jpayne@69: # one-to=many mapping, we'll use the gene ID jpayne@69: for i in v: jpayne@69: reverse_mapping[i] = i jpayne@69: reverse_mappings[query] = reverse_mapping jpayne@69: jpayne@69: for result in res['result']: jpayne@69: mapping = reverse_mappings[result['query']] jpayne@69: genes = [] jpayne@69: for i in meta['genes_metadata']['query'][result['query']]['ensgs']: jpayne@69: genes.append(mapping[i]) jpayne@69: result['evidences'] = [i for i in result['intersections'] if i] jpayne@69: result['intersections'] = ([gene for ev, gene in zip(result['intersections'], genes) if ev]) jpayne@69: jpayne@69: if not self.return_dataframe: jpayne@69: columns = set(columns) jpayne@69: jpayne@69: # filter the columns jpayne@69: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@69: return result jpayne@69: jpayne@69: else: jpayne@69: jpayne@69: df = self._pandas.DataFrame(res['result']) jpayne@69: jpayne@69: if len(df) > 0: jpayne@69: df = df[columns] jpayne@69: jpayne@69: else: jpayne@69: return self._pandas.DataFrame(columns=columns) jpayne@69: return df jpayne@69: jpayne@69: def convert( jpayne@69: self, jpayne@69: query: Union[str, List[str], Dict[str, List[str]]], jpayne@69: organism: str = 'hsapiens', jpayne@69: target_namespace: str = 'ENSG', jpayne@69: numeric_namespace: str = 'ENTREZGENE' jpayne@69: ) -> List[Dict[str, Any]]: jpayne@69: """ jpayne@69: Query g:Convert. jpayne@69: jpayne@69: :param query: list of genes to convert jpayne@69: :param organism: organism id jpayne@69: :param target_namespace: namespace to convert into jpayne@69: :param numeric_namespace jpayne@69: """ jpayne@69: r = requests.post( jpayne@69: '{}/api/convert/convert'.format(self.base_url), jpayne@69: json={ jpayne@69: 'organism': organism, jpayne@69: 'query': query, jpayne@69: 'target': target_namespace, jpayne@69: 'numeric_ns': numeric_namespace, jpayne@69: 'output': 'json' jpayne@69: }, jpayne@69: headers={'User-Agent': self.user_agent} jpayne@69: ) jpayne@69: jpayne@69: if r.status_code != 200: jpayne@69: message = '' jpayne@69: try: jpayne@69: message = r.json()['message'] jpayne@69: except: jpayne@69: message = 'query failed with error {}'.format(r.status_code) jpayne@69: raise AssertionError(message) jpayne@69: res = r.json() jpayne@69: jpayne@69: meta = res['meta'] jpayne@69: self.meta = meta jpayne@69: columns = ['incoming', 'converted', 'n_incoming', 'n_converted', 'name', 'description', 'namespaces', 'query'] jpayne@69: jpayne@69: if not self.return_dataframe: jpayne@69: columns = set(columns) jpayne@69: jpayne@69: # filter the columns jpayne@69: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@69: return result jpayne@69: jpayne@69: df = self._pandas.DataFrame(res['result']) jpayne@69: df = df[columns] jpayne@69: jpayne@69: return df jpayne@69: jpayne@69: def orth(self, jpayne@69: query: List[str], jpayne@69: organism: str = "hsapiens", jpayne@69: target: str = "mmusculus", jpayne@69: aresolve: Dict[str, str] = None, jpayne@69: numeric_namespace: str = 'ENTREZGENE'): jpayne@69: """ jpayne@69: Query g:Orth. jpayne@69: jpayne@69: jpayne@69: :param query: jpayne@69: :param organism: jpayne@69: :param target: jpayne@69: :param aresolve: jpayne@69: :param numeric_namespace: jpayne@69: """ jpayne@69: r = requests.post( jpayne@69: '{}/api/orth/orth'.format(self.base_url), jpayne@69: json={ jpayne@69: 'organism': organism, jpayne@69: 'query': query, jpayne@69: 'target': target, jpayne@69: 'numeric_ns': numeric_namespace, jpayne@69: 'aresolve': aresolve, jpayne@69: 'output': 'json' jpayne@69: }, jpayne@69: headers={'User-Agent': self.user_agent} jpayne@69: ) jpayne@69: jpayne@69: if r.status_code != 200: jpayne@69: message = '' jpayne@69: try: jpayne@69: message = r.json()['message'] jpayne@69: except: jpayne@69: message = 'query failed with error {}'.format(r.status_code) jpayne@69: raise AssertionError(message) jpayne@69: res = r.json() jpayne@69: meta = res['meta'] jpayne@69: self.meta = meta jpayne@69: columns = ['incoming', 'converted', 'ortholog_ensg', 'n_incoming', 'n_converted', 'n_result', 'name', 'description', 'namespaces'] jpayne@69: if not self.return_dataframe: jpayne@69: columns = set(columns) jpayne@69: jpayne@69: # filter the columns jpayne@69: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@69: return result jpayne@69: jpayne@69: jpayne@69: df = self._pandas.DataFrame(res['result']) jpayne@69: df = df[columns] jpayne@69: jpayne@69: return df jpayne@69: jpayne@69: def snpense(self, jpayne@69: query: List[str]): jpayne@69: """ jpayne@69: jpayne@69: :param query: jpayne@69: """ jpayne@69: r = requests.post( jpayne@69: '{}/api/snpense/snpense'.format(self.base_url), jpayne@69: json={ jpayne@69: 'query': query, jpayne@69: 'output': 'json+' jpayne@69: }, jpayne@69: headers={'User-Agent': self.user_agent} jpayne@69: ) jpayne@69: jpayne@69: if r.status_code != 200: jpayne@69: message = '' jpayne@69: try: jpayne@69: message = r.json()['message'] jpayne@69: except: jpayne@69: message = 'query failed with error {}'.format(r.status_code) jpayne@69: raise AssertionError(message) jpayne@69: res = r.json() jpayne@69: meta = res['meta'] jpayne@69: self.meta = meta jpayne@69: columns = ['rs_id', 'chromosome', 'strand', 'start', 'end', 'ensgs', 'gene_names', 'variants'] jpayne@69: if not self.return_dataframe: jpayne@69: columns = set(columns) jpayne@69: jpayne@69: # filter the columns jpayne@69: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@69: return result jpayne@69: jpayne@69: df = self._pandas.DataFrame(res['result']) jpayne@69: df = df[columns] jpayne@69: jpayne@69: return df