jpayne@68: from typing import Union, List, Dict, Any jpayne@68: jpayne@68: import requests jpayne@68: jpayne@68: from gprofiler.version import __version__ jpayne@68: jpayne@68: jpayne@68: class GProfiler(): jpayne@68: def __init__(self, user_agent: str = '', base_url: str = None, return_dataframe: bool = False): jpayne@68: ''' jpayne@68: A class representing the g:Profiler toolkit. Contains methods for jpayne@68: querying the g:GOSt, g:Convert, g:Orth and g:SNPense tools. Please see the jpayne@68: g:Profiler web tool (https://biit.cs.ut.ee/gprofiler/) for extensive documentation on all the options to jpayne@68: the methods. jpayne@68: jpayne@68: :param user_agent: the URL used for the g:Profiler service. jpayne@68: :param base_url: the URL used for the g:Profiler service. jpayne@68: :param return_dataframe: if True, query results are presented as pandas DataFrames. jpayne@68: ''' jpayne@68: self.user_agent = 'gprofiler-python {version}/{user_agent}'.format(version=__version__, user_agent=user_agent) jpayne@68: jpayne@68: if base_url is None: jpayne@68: self.base_url = 'https://biit.cs.ut.ee/gprofiler' jpayne@68: else: jpayne@68: self.base_url = base_url jpayne@68: jpayne@68: self.return_dataframe = return_dataframe jpayne@68: if return_dataframe: jpayne@68: self._pandas = self._get_pandas_module() jpayne@68: jpayne@68: self.meta = None jpayne@68: jpayne@68: @staticmethod jpayne@68: def _get_pandas_module(): jpayne@68: is_pandas_module = lambda x: getattr(x, '__name__', '') == 'pandas' jpayne@68: namespace = globals() jpayne@68: if 'pd' in namespace and is_pandas_module(namespace['pd']): jpayne@68: return namespace['pd'] jpayne@68: elif 'pandas' in namespace and is_pandas_module(namespace['pandas']): jpayne@68: return namespace['pandas'] jpayne@68: else: jpayne@68: import importlib jpayne@68: return importlib.import_module('pandas') jpayne@68: jpayne@68: jpayne@68: def __getattr__(self, item): jpayne@68: if item in ['gprofile', 'gorth', 'gconvert']: jpayne@68: raise NotImplementedError('''`{}` has been renamed `{}` and has a new interface jpayne@68: To use the previous version use the command `pip install --upgrade --no-deps --force-reinstall gprofiler-official==0.3.5` jpayne@68: '''.format(item, item[1:])) jpayne@68: raise AttributeError('{} is not an attribute of {}'.format(item, self.__class__.__name__)) jpayne@68: jpayne@68: jpayne@68: jpayne@68: def profile( jpayne@68: self, jpayne@68: query: Union[str, List[str], Dict[str, List[str]]], jpayne@68: organism: str = 'hsapiens', jpayne@68: sources: List[str] = tuple(), jpayne@68: user_threshold: float = 0.05, jpayne@68: all_results: bool = False, jpayne@68: ordered: bool = False, jpayne@68: no_evidences: bool = True, jpayne@68: combined: bool = False, jpayne@68: measure_underrepresentation: bool = False, jpayne@68: no_iea: bool = False, jpayne@68: domain_scope: str = 'annotated', jpayne@68: numeric_namespace: str = '', jpayne@68: significance_threshold_method: str = 'g_SCS', jpayne@68: background: str = None, jpayne@68: jpayne@68: ) -> List[Dict[str, Any]]: jpayne@68: """ jpayne@68: performs functional profiling of gene lists using various kinds of biological evidence. jpayne@68: The tool performs statistical enrichment analysis to find over-representation of information from Gene Ontology terms, jpayne@68: biological pathways, regulatory DNA elements, human disease gene annotations, and protein-protein interaction networks. jpayne@68: jpayne@68: jpayne@68: jpayne@68: :param query: list of genes to profile. For running multiple queries at once, accepts a dictionary of lists as well. jpayne@68: :param organism: Organism id for profiling. For full list see https://biit.cs.ut.ee/gprofiler/page/organism-list jpayne@68: :param sources: List of annotation sources to include in analysis. Defaults to all known. jpayne@68: :param user_threshold: Significance threshold for analysis. jpayne@68: :param all_results: If True, return all analysis results regardless of statistical significance. jpayne@68: :param ordered: If True, considers the order of input query to be significant. See https://biit.cs.ut.ee/gprofiler/page/docs#ordered_gene_lists jpayne@68: :param no_evidences: If False, the results include lists of intersections and evidences for the intersections jpayne@68: :param combined: If True, performs all queries and combines the results into a single table. NB! changes the output format. jpayne@68: :param measure_underrepresentation: if True, performs test for significantly under-represented functional terms. jpayne@68: :param no_iea: If True, excludes electronically annotated Gene Ontology terms before analysis. jpayne@68: :param domain_scope: "known" for using all known genes as background, "annotated" to use all genes annotated for particular datasource. jpayne@68: :param numeric_namespace: name for the numeric namespace to use if there are numeric values in the query. jpayne@68: :param significance_threshold_method: method for multiple correction. "g_SCS"|"bonferroni"|"fdr". https://biit.cs.ut.ee/gprofiler/page/docs#significance_threhshold jpayne@68: :param background: List of genes to use as a statistical background. jpayne@68: :return: jpayne@68: """ jpayne@68: jpayne@68: if background is not None: jpayne@68: domain_scope = 'custom' jpayne@68: jpayne@68: r = requests.post( jpayne@68: '{}/api/gost/profile/'.format(self.base_url.rstrip("/")), jpayne@68: json={ jpayne@68: 'organism': organism, # string, eg "hsapiens" jpayne@68: 'query': query, # whitespace-delimited string or list of strings or object of strings to lists of strings jpayne@68: 'sources': sources, # list of strings, for example: jpayne@68: 'user_threshold': user_threshold, # significance threshold, defaults to 0.05 jpayne@68: 'all_results': all_results, # bool jpayne@68: 'no_evidences': no_evidences, # bool - if set to true, saves on database lookups jpayne@68: 'combined': combined, # bool, set to true for g:Cocoa output jpayne@68: 'measure_underrepresentation': measure_underrepresentation, # bool jpayne@68: 'no_iea': no_iea, # bool jpayne@68: 'numeric_ns': numeric_namespace, # string jpayne@68: 'domain_scope': domain_scope, # string 'known'|'annotated'|'custom' jpayne@68: 'ordered': ordered, # bool, set to true for ordered query jpayne@68: 'significance_threshold_method': significance_threshold_method, # string, "g_SCS"|"bonferroni"|"fdr", "g_SCS"by default jpayne@68: 'background': background if background is not None else '' # string, background name or query string jpayne@68: jpayne@68: } jpayne@68: , headers={'User-Agent': self.user_agent}) jpayne@68: jpayne@68: if r.status_code != 200: jpayne@68: message = '' jpayne@68: try: jpayne@68: message = r.json()['message'] jpayne@68: except: jpayne@68: message = 'query failed with error {}'.format(r.status_code) jpayne@68: raise AssertionError(message) jpayne@68: res = r.json() jpayne@68: jpayne@68: meta = res['meta'] jpayne@68: self.meta = meta jpayne@68: jpayne@68: if not combined: jpayne@68: columns = ['source', jpayne@68: 'native', jpayne@68: 'name', jpayne@68: 'p_value', jpayne@68: 'significant', jpayne@68: 'description', jpayne@68: 'term_size', jpayne@68: 'query_size', jpayne@68: 'intersection_size', jpayne@68: 'effective_domain_size', jpayne@68: 'precision', jpayne@68: 'recall', jpayne@68: 'query', jpayne@68: 'parents'] jpayne@68: if not no_evidences: jpayne@68: columns.append('intersections') jpayne@68: columns.append('evidences') jpayne@68: else: jpayne@68: columns = [ jpayne@68: 'source', jpayne@68: 'native', jpayne@68: 'name', jpayne@68: 'p_values', jpayne@68: 'description', jpayne@68: 'term_size', jpayne@68: 'query_sizes', jpayne@68: 'intersection_sizes', jpayne@68: 'effective_domain_size', jpayne@68: 'parents'] jpayne@68: jpayne@68: queries = (meta['query_metadata']['queries'].keys()) jpayne@68: jpayne@68: if not no_evidences and not combined: jpayne@68: reverse_mappings = {} jpayne@68: for query in queries: jpayne@68: mapping = (meta['genes_metadata']['query'][query]['mapping']) jpayne@68: reverse_mapping = {} jpayne@68: for k, v in mapping.items(): jpayne@68: if len(v) == 1: jpayne@68: # one-to-one mapping jpayne@68: reverse_mapping[v[0]] = k jpayne@68: else: jpayne@68: # one-to=many mapping, we'll use the gene ID jpayne@68: for i in v: jpayne@68: reverse_mapping[i] = i jpayne@68: reverse_mappings[query] = reverse_mapping jpayne@68: jpayne@68: for result in res['result']: jpayne@68: mapping = reverse_mappings[result['query']] jpayne@68: genes = [] jpayne@68: for i in meta['genes_metadata']['query'][result['query']]['ensgs']: jpayne@68: genes.append(mapping[i]) jpayne@68: result['evidences'] = [i for i in result['intersections'] if i] jpayne@68: result['intersections'] = ([gene for ev, gene in zip(result['intersections'], genes) if ev]) jpayne@68: jpayne@68: if not self.return_dataframe: jpayne@68: columns = set(columns) jpayne@68: jpayne@68: # filter the columns jpayne@68: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@68: return result jpayne@68: jpayne@68: else: jpayne@68: jpayne@68: df = self._pandas.DataFrame(res['result']) jpayne@68: jpayne@68: if len(df) > 0: jpayne@68: df = df[columns] jpayne@68: jpayne@68: else: jpayne@68: return self._pandas.DataFrame(columns=columns) jpayne@68: return df jpayne@68: jpayne@68: def convert( jpayne@68: self, jpayne@68: query: Union[str, List[str], Dict[str, List[str]]], jpayne@68: organism: str = 'hsapiens', jpayne@68: target_namespace: str = 'ENSG', jpayne@68: numeric_namespace: str = 'ENTREZGENE' jpayne@68: ) -> List[Dict[str, Any]]: jpayne@68: """ jpayne@68: Query g:Convert. jpayne@68: jpayne@68: :param query: list of genes to convert jpayne@68: :param organism: organism id jpayne@68: :param target_namespace: namespace to convert into jpayne@68: :param numeric_namespace jpayne@68: """ jpayne@68: r = requests.post( jpayne@68: '{}/api/convert/convert'.format(self.base_url), jpayne@68: json={ jpayne@68: 'organism': organism, jpayne@68: 'query': query, jpayne@68: 'target': target_namespace, jpayne@68: 'numeric_ns': numeric_namespace, jpayne@68: 'output': 'json' jpayne@68: }, jpayne@68: headers={'User-Agent': self.user_agent} jpayne@68: ) jpayne@68: jpayne@68: if r.status_code != 200: jpayne@68: message = '' jpayne@68: try: jpayne@68: message = r.json()['message'] jpayne@68: except: jpayne@68: message = 'query failed with error {}'.format(r.status_code) jpayne@68: raise AssertionError(message) jpayne@68: res = r.json() jpayne@68: jpayne@68: meta = res['meta'] jpayne@68: self.meta = meta jpayne@68: columns = ['incoming', 'converted', 'n_incoming', 'n_converted', 'name', 'description', 'namespaces', 'query'] jpayne@68: jpayne@68: if not self.return_dataframe: jpayne@68: columns = set(columns) jpayne@68: jpayne@68: # filter the columns jpayne@68: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@68: return result jpayne@68: jpayne@68: df = self._pandas.DataFrame(res['result']) jpayne@68: df = df[columns] jpayne@68: jpayne@68: return df jpayne@68: jpayne@68: def orth(self, jpayne@68: query: List[str], jpayne@68: organism: str = "hsapiens", jpayne@68: target: str = "mmusculus", jpayne@68: aresolve: Dict[str, str] = None, jpayne@68: numeric_namespace: str = 'ENTREZGENE'): jpayne@68: """ jpayne@68: Query g:Orth. jpayne@68: jpayne@68: jpayne@68: :param query: jpayne@68: :param organism: jpayne@68: :param target: jpayne@68: :param aresolve: jpayne@68: :param numeric_namespace: jpayne@68: """ jpayne@68: r = requests.post( jpayne@68: '{}/api/orth/orth'.format(self.base_url), jpayne@68: json={ jpayne@68: 'organism': organism, jpayne@68: 'query': query, jpayne@68: 'target': target, jpayne@68: 'numeric_ns': numeric_namespace, jpayne@68: 'aresolve': aresolve, jpayne@68: 'output': 'json' jpayne@68: }, jpayne@68: headers={'User-Agent': self.user_agent} jpayne@68: ) jpayne@68: jpayne@68: if r.status_code != 200: jpayne@68: message = '' jpayne@68: try: jpayne@68: message = r.json()['message'] jpayne@68: except: jpayne@68: message = 'query failed with error {}'.format(r.status_code) jpayne@68: raise AssertionError(message) jpayne@68: res = r.json() jpayne@68: meta = res['meta'] jpayne@68: self.meta = meta jpayne@68: columns = ['incoming', 'converted', 'ortholog_ensg', 'n_incoming', 'n_converted', 'n_result', 'name', 'description', 'namespaces'] jpayne@68: if not self.return_dataframe: jpayne@68: columns = set(columns) jpayne@68: jpayne@68: # filter the columns jpayne@68: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@68: return result jpayne@68: jpayne@68: jpayne@68: df = self._pandas.DataFrame(res['result']) jpayne@68: df = df[columns] jpayne@68: jpayne@68: return df jpayne@68: jpayne@68: def snpense(self, jpayne@68: query: List[str]): jpayne@68: """ jpayne@68: jpayne@68: :param query: jpayne@68: """ jpayne@68: r = requests.post( jpayne@68: '{}/api/snpense/snpense'.format(self.base_url), jpayne@68: json={ jpayne@68: 'query': query, jpayne@68: 'output': 'json+' jpayne@68: }, jpayne@68: headers={'User-Agent': self.user_agent} jpayne@68: ) jpayne@68: jpayne@68: if r.status_code != 200: jpayne@68: message = '' jpayne@68: try: jpayne@68: message = r.json()['message'] jpayne@68: except: jpayne@68: message = 'query failed with error {}'.format(r.status_code) jpayne@68: raise AssertionError(message) jpayne@68: res = r.json() jpayne@68: meta = res['meta'] jpayne@68: self.meta = meta jpayne@68: columns = ['rs_id', 'chromosome', 'strand', 'start', 'end', 'ensgs', 'gene_names', 'variants'] jpayne@68: if not self.return_dataframe: jpayne@68: columns = set(columns) jpayne@68: jpayne@68: # filter the columns jpayne@68: result = [{k: v for k, v in i.items() if k in columns} for i in res['result']] jpayne@68: return result jpayne@68: jpayne@68: df = self._pandas.DataFrame(res['result']) jpayne@68: df = df[columns] jpayne@68: jpayne@68: return df