csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/gprofiler/gprofiler.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/gprofiler/gprofiler.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

rev	line source
jpayne@69	1 from typing import Union, List, Dict, Any
jpayne@69	2
jpayne@69	3 import requests
jpayne@69	4
jpayne@69	5 from gprofiler.version import __version__
jpayne@69	6
jpayne@69	7
jpayne@69	8 class GProfiler():
jpayne@69	9 def __init__(self, user_agent: str = '', base_url: str = None, return_dataframe: bool = False):
jpayne@69	10 '''
jpayne@69	11 A class representing the g:Profiler toolkit. Contains methods for
jpayne@69	12 querying the g:GOSt, g:Convert, g:Orth and g:SNPense tools. Please see the
jpayne@69	13 g:Profiler web tool (https://biit.cs.ut.ee/gprofiler/) for extensive documentation on all the options to
jpayne@69	14 the methods.
jpayne@69	15
jpayne@69	16 :param user_agent: the URL used for the g:Profiler service.
jpayne@69	17 :param base_url: the URL used for the g:Profiler service.
jpayne@69	18 :param return_dataframe: if True, query results are presented as pandas DataFrames.
jpayne@69	19 '''
jpayne@69	20 self.user_agent = 'gprofiler-python {version}/{user_agent}'.format(version=__version__, user_agent=user_agent)
jpayne@69	21
jpayne@69	22 if base_url is None:
jpayne@69	23 self.base_url = 'https://biit.cs.ut.ee/gprofiler'
jpayne@69	24 else:
jpayne@69	25 self.base_url = base_url
jpayne@69	26
jpayne@69	27 self.return_dataframe = return_dataframe
jpayne@69	28 if return_dataframe:
jpayne@69	29 self._pandas = self._get_pandas_module()
jpayne@69	30
jpayne@69	31 self.meta = None
jpayne@69	32
jpayne@69	33 @staticmethod
jpayne@69	34 def _get_pandas_module():
jpayne@69	35 is_pandas_module = lambda x: getattr(x, '__name__', '') == 'pandas'
jpayne@69	36 namespace = globals()
jpayne@69	37 if 'pd' in namespace and is_pandas_module(namespace['pd']):
jpayne@69	38 return namespace['pd']
jpayne@69	39 elif 'pandas' in namespace and is_pandas_module(namespace['pandas']):
jpayne@69	40 return namespace['pandas']
jpayne@69	41 else:
jpayne@69	42 import importlib
jpayne@69	43 return importlib.import_module('pandas')
jpayne@69	44
jpayne@69	45
jpayne@69	46 def __getattr__(self, item):
jpayne@69	47 if item in ['gprofile', 'gorth', 'gconvert']:
jpayne@69	48 raise NotImplementedError('''`{}` has been renamed `{}` and has a new interface
jpayne@69	49 To use the previous version use the command `pip install --upgrade --no-deps --force-reinstall gprofiler-official==0.3.5`
jpayne@69	50 '''.format(item, item[1:]))
jpayne@69	51 raise AttributeError('{} is not an attribute of {}'.format(item, self.__class__.__name__))
jpayne@69	52
jpayne@69	53
jpayne@69	54
jpayne@69	55 def profile(
jpayne@69	56 self,
jpayne@69	57 query: Union[str, List[str], Dict[str, List[str]]],
jpayne@69	58 organism: str = 'hsapiens',
jpayne@69	59 sources: List[str] = tuple(),
jpayne@69	60 user_threshold: float = 0.05,
jpayne@69	61 all_results: bool = False,
jpayne@69	62 ordered: bool = False,
jpayne@69	63 no_evidences: bool = True,
jpayne@69	64 combined: bool = False,
jpayne@69	65 measure_underrepresentation: bool = False,
jpayne@69	66 no_iea: bool = False,
jpayne@69	67 domain_scope: str = 'annotated',
jpayne@69	68 numeric_namespace: str = '',
jpayne@69	69 significance_threshold_method: str = 'g_SCS',
jpayne@69	70 background: str = None,
jpayne@69	71
jpayne@69	72 ) -> List[Dict[str, Any]]:
jpayne@69	73 """
jpayne@69	74 performs functional profiling of gene lists using various kinds of biological evidence.
jpayne@69	75 The tool performs statistical enrichment analysis to find over-representation of information from Gene Ontology terms,
jpayne@69	76 biological pathways, regulatory DNA elements, human disease gene annotations, and protein-protein interaction networks.
jpayne@69	77
jpayne@69	78
jpayne@69	79
jpayne@69	80 :param query: list of genes to profile. For running multiple queries at once, accepts a dictionary of lists as well.
jpayne@69	81 :param organism: Organism id for profiling. For full list see https://biit.cs.ut.ee/gprofiler/page/organism-list
jpayne@69	82 :param sources: List of annotation sources to include in analysis. Defaults to all known.
jpayne@69	83 :param user_threshold: Significance threshold for analysis.
jpayne@69	84 :param all_results: If True, return all analysis results regardless of statistical significance.
jpayne@69	85 :param ordered: If True, considers the order of input query to be significant. See https://biit.cs.ut.ee/gprofiler/page/docs#ordered_gene_lists
jpayne@69	86 :param no_evidences: If False, the results include lists of intersections and evidences for the intersections
jpayne@69	87 :param combined: If True, performs all queries and combines the results into a single table. NB! changes the output format.
jpayne@69	88 :param measure_underrepresentation: if True, performs test for significantly under-represented functional terms.
jpayne@69	89 :param no_iea: If True, excludes electronically annotated Gene Ontology terms before analysis.
jpayne@69	90 :param domain_scope: "known" for using all known genes as background, "annotated" to use all genes annotated for particular datasource.
jpayne@69	91 :param numeric_namespace: name for the numeric namespace to use if there are numeric values in the query.
jpayne@69	92 :param significance_threshold_method: method for multiple correction. "g_SCS"\|"bonferroni"\|"fdr". https://biit.cs.ut.ee/gprofiler/page/docs#significance_threhshold
jpayne@69	93 :param background: List of genes to use as a statistical background.
jpayne@69	94 :return:
jpayne@69	95 """
jpayne@69	96
jpayne@69	97 if background is not None:
jpayne@69	98 domain_scope = 'custom'
jpayne@69	99
jpayne@69	100 r = requests.post(
jpayne@69	101 '{}/api/gost/profile/'.format(self.base_url.rstrip("/")),
jpayne@69	102 json={
jpayne@69	103 'organism': organism, # string, eg "hsapiens"
jpayne@69	104 'query': query, # whitespace-delimited string or list of strings or object of strings to lists of strings
jpayne@69	105 'sources': sources, # list of strings, for example:
jpayne@69	106 'user_threshold': user_threshold, # significance threshold, defaults to 0.05
jpayne@69	107 'all_results': all_results, # bool
jpayne@69	108 'no_evidences': no_evidences, # bool - if set to true, saves on database lookups
jpayne@69	109 'combined': combined, # bool, set to true for g:Cocoa output
jpayne@69	110 'measure_underrepresentation': measure_underrepresentation, # bool
jpayne@69	111 'no_iea': no_iea, # bool
jpayne@69	112 'numeric_ns': numeric_namespace, # string
jpayne@69	113 'domain_scope': domain_scope, # string 'known'\|'annotated'\|'custom'
jpayne@69	114 'ordered': ordered, # bool, set to true for ordered query
jpayne@69	115 'significance_threshold_method': significance_threshold_method, # string, "g_SCS"\|"bonferroni"\|"fdr", "g_SCS"by default
jpayne@69	116 'background': background if background is not None else '' # string, background name or query string
jpayne@69	117
jpayne@69	118 }
jpayne@69	119 , headers={'User-Agent': self.user_agent})
jpayne@69	120
jpayne@69	121 if r.status_code != 200:
jpayne@69	122 message = ''
jpayne@69	123 try:
jpayne@69	124 message = r.json()['message']
jpayne@69	125 except:
jpayne@69	126 message = 'query failed with error {}'.format(r.status_code)
jpayne@69	127 raise AssertionError(message)
jpayne@69	128 res = r.json()
jpayne@69	129
jpayne@69	130 meta = res['meta']
jpayne@69	131 self.meta = meta
jpayne@69	132
jpayne@69	133 if not combined:
jpayne@69	134 columns = ['source',
jpayne@69	135 'native',
jpayne@69	136 'name',
jpayne@69	137 'p_value',
jpayne@69	138 'significant',
jpayne@69	139 'description',
jpayne@69	140 'term_size',
jpayne@69	141 'query_size',
jpayne@69	142 'intersection_size',
jpayne@69	143 'effective_domain_size',
jpayne@69	144 'precision',
jpayne@69	145 'recall',
jpayne@69	146 'query',
jpayne@69	147 'parents']
jpayne@69	148 if not no_evidences:
jpayne@69	149 columns.append('intersections')
jpayne@69	150 columns.append('evidences')
jpayne@69	151 else:
jpayne@69	152 columns = [
jpayne@69	153 'source',
jpayne@69	154 'native',
jpayne@69	155 'name',
jpayne@69	156 'p_values',
jpayne@69	157 'description',
jpayne@69	158 'term_size',
jpayne@69	159 'query_sizes',
jpayne@69	160 'intersection_sizes',
jpayne@69	161 'effective_domain_size',
jpayne@69	162 'parents']
jpayne@69	163
jpayne@69	164 queries = (meta['query_metadata']['queries'].keys())
jpayne@69	165
jpayne@69	166 if not no_evidences and not combined:
jpayne@69	167 reverse_mappings = {}
jpayne@69	168 for query in queries:
jpayne@69	169 mapping = (meta['genes_metadata']['query'][query]['mapping'])
jpayne@69	170 reverse_mapping = {}
jpayne@69	171 for k, v in mapping.items():
jpayne@69	172 if len(v) == 1:
jpayne@69	173 # one-to-one mapping
jpayne@69	174 reverse_mapping[v[0]] = k
jpayne@69	175 else:
jpayne@69	176 # one-to=many mapping, we'll use the gene ID
jpayne@69	177 for i in v:
jpayne@69	178 reverse_mapping[i] = i
jpayne@69	179 reverse_mappings[query] = reverse_mapping
jpayne@69	180
jpayne@69	181 for result in res['result']:
jpayne@69	182 mapping = reverse_mappings[result['query']]
jpayne@69	183 genes = []
jpayne@69	184 for i in meta['genes_metadata']['query'][result['query']]['ensgs']:
jpayne@69	185 genes.append(mapping[i])
jpayne@69	186 result['evidences'] = [i for i in result['intersections'] if i]
jpayne@69	187 result['intersections'] = ([gene for ev, gene in zip(result['intersections'], genes) if ev])
jpayne@69	188
jpayne@69	189 if not self.return_dataframe:
jpayne@69	190 columns = set(columns)
jpayne@69	191
jpayne@69	192 # filter the columns
jpayne@69	193 result = [{k: v for k, v in i.items() if k in columns} for i in res['result']]
jpayne@69	194 return result
jpayne@69	195
jpayne@69	196 else:
jpayne@69	197
jpayne@69	198 df = self._pandas.DataFrame(res['result'])
jpayne@69	199
jpayne@69	200 if len(df) > 0:
jpayne@69	201 df = df[columns]
jpayne@69	202
jpayne@69	203 else:
jpayne@69	204 return self._pandas.DataFrame(columns=columns)
jpayne@69	205 return df
jpayne@69	206
jpayne@69	207 def convert(
jpayne@69	208 self,
jpayne@69	209 query: Union[str, List[str], Dict[str, List[str]]],
jpayne@69	210 organism: str = 'hsapiens',
jpayne@69	211 target_namespace: str = 'ENSG',
jpayne@69	212 numeric_namespace: str = 'ENTREZGENE'
jpayne@69	213 ) -> List[Dict[str, Any]]:
jpayne@69	214 """
jpayne@69	215 Query g:Convert.
jpayne@69	216
jpayne@69	217 :param query: list of genes to convert
jpayne@69	218 :param organism: organism id
jpayne@69	219 :param target_namespace: namespace to convert into
jpayne@69	220 :param numeric_namespace
jpayne@69	221 """
jpayne@69	222 r = requests.post(
jpayne@69	223 '{}/api/convert/convert'.format(self.base_url),
jpayne@69	224 json={
jpayne@69	225 'organism': organism,
jpayne@69	226 'query': query,
jpayne@69	227 'target': target_namespace,
jpayne@69	228 'numeric_ns': numeric_namespace,
jpayne@69	229 'output': 'json'
jpayne@69	230 },
jpayne@69	231 headers={'User-Agent': self.user_agent}
jpayne@69	232 )
jpayne@69	233
jpayne@69	234 if r.status_code != 200:
jpayne@69	235 message = ''
jpayne@69	236 try:
jpayne@69	237 message = r.json()['message']
jpayne@69	238 except:
jpayne@69	239 message = 'query failed with error {}'.format(r.status_code)
jpayne@69	240 raise AssertionError(message)
jpayne@69	241 res = r.json()
jpayne@69	242
jpayne@69	243 meta = res['meta']
jpayne@69	244 self.meta = meta
jpayne@69	245 columns = ['incoming', 'converted', 'n_incoming', 'n_converted', 'name', 'description', 'namespaces', 'query']
jpayne@69	246
jpayne@69	247 if not self.return_dataframe:
jpayne@69	248 columns = set(columns)
jpayne@69	249
jpayne@69	250 # filter the columns
jpayne@69	251 result = [{k: v for k, v in i.items() if k in columns} for i in res['result']]
jpayne@69	252 return result
jpayne@69	253
jpayne@69	254 df = self._pandas.DataFrame(res['result'])
jpayne@69	255 df = df[columns]
jpayne@69	256
jpayne@69	257 return df
jpayne@69	258
jpayne@69	259 def orth(self,
jpayne@69	260 query: List[str],
jpayne@69	261 organism: str = "hsapiens",
jpayne@69	262 target: str = "mmusculus",
jpayne@69	263 aresolve: Dict[str, str] = None,
jpayne@69	264 numeric_namespace: str = 'ENTREZGENE'):
jpayne@69	265 """
jpayne@69	266 Query g:Orth.
jpayne@69	267
jpayne@69	268
jpayne@69	269 :param query:
jpayne@69	270 :param organism:
jpayne@69	271 :param target:
jpayne@69	272 :param aresolve:
jpayne@69	273 :param numeric_namespace:
jpayne@69	274 """
jpayne@69	275 r = requests.post(
jpayne@69	276 '{}/api/orth/orth'.format(self.base_url),
jpayne@69	277 json={
jpayne@69	278 'organism': organism,
jpayne@69	279 'query': query,
jpayne@69	280 'target': target,
jpayne@69	281 'numeric_ns': numeric_namespace,
jpayne@69	282 'aresolve': aresolve,
jpayne@69	283 'output': 'json'
jpayne@69	284 },
jpayne@69	285 headers={'User-Agent': self.user_agent}
jpayne@69	286 )
jpayne@69	287
jpayne@69	288 if r.status_code != 200:
jpayne@69	289 message = ''
jpayne@69	290 try:
jpayne@69	291 message = r.json()['message']
jpayne@69	292 except:
jpayne@69	293 message = 'query failed with error {}'.format(r.status_code)
jpayne@69	294 raise AssertionError(message)
jpayne@69	295 res = r.json()
jpayne@69	296 meta = res['meta']
jpayne@69	297 self.meta = meta
jpayne@69	298 columns = ['incoming', 'converted', 'ortholog_ensg', 'n_incoming', 'n_converted', 'n_result', 'name', 'description', 'namespaces']
jpayne@69	299 if not self.return_dataframe:
jpayne@69	300 columns = set(columns)
jpayne@69	301
jpayne@69	302 # filter the columns
jpayne@69	303 result = [{k: v for k, v in i.items() if k in columns} for i in res['result']]
jpayne@69	304 return result
jpayne@69	305
jpayne@69	306
jpayne@69	307 df = self._pandas.DataFrame(res['result'])
jpayne@69	308 df = df[columns]
jpayne@69	309
jpayne@69	310 return df
jpayne@69	311
jpayne@69	312 def snpense(self,
jpayne@69	313 query: List[str]):
jpayne@69	314 """
jpayne@69	315
jpayne@69	316 :param query:
jpayne@69	317 """
jpayne@69	318 r = requests.post(
jpayne@69	319 '{}/api/snpense/snpense'.format(self.base_url),
jpayne@69	320 json={
jpayne@69	321 'query': query,
jpayne@69	322 'output': 'json+'
jpayne@69	323 },
jpayne@69	324 headers={'User-Agent': self.user_agent}
jpayne@69	325 )
jpayne@69	326
jpayne@69	327 if r.status_code != 200:
jpayne@69	328 message = ''
jpayne@69	329 try:
jpayne@69	330 message = r.json()['message']
jpayne@69	331 except:
jpayne@69	332 message = 'query failed with error {}'.format(r.status_code)
jpayne@69	333 raise AssertionError(message)
jpayne@69	334 res = r.json()
jpayne@69	335 meta = res['meta']
jpayne@69	336 self.meta = meta
jpayne@69	337 columns = ['rs_id', 'chromosome', 'strand', 'start', 'end', 'ensgs', 'gene_names', 'variants']
jpayne@69	338 if not self.return_dataframe:
jpayne@69	339 columns = set(columns)
jpayne@69	340
jpayne@69	341 # filter the columns
jpayne@69	342 result = [{k: v for k, v in i.items() if k in columns} for i in res['result']]
jpayne@69	343 return result
jpayne@69	344
jpayne@69	345 df = self._pandas.DataFrame(res['result'])
jpayne@69	346 df = df[columns]
jpayne@69	347
jpayne@69	348 return df

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/gprofiler/gprofiler.py @ 69:33d812a61356