# HG changeset patch
# User kkonganti
# Date 1663083144 14400
# Node ID be95a7ce968ae480f4bdbd8874a047ccffc5d98a
# Parent 5244e74657673ad4a6e712f724628912d4a51a52
"planemo upload"
diff -r 5244e7465767 -r be95a7ce968a cfsan_lexmapr2.xml
--- a/cfsan_lexmapr2.xml Wed Aug 31 14:32:14 2022 -0400
+++ b/cfsan_lexmapr2.xml Tue Sep 13 11:32:24 2022 -0400
@@ -28,7 +28,7 @@
#end for
]]>
- $__tool_directory__/nltk_data
+ /tool/tool-data/cfsan_lexmapr2/0/nltk_data
diff -r 5244e7465767 -r be95a7ce968a lexmapr.zip
Binary file lexmapr.zip has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__init__.py
--- a/lexmapr/__init__.py Wed Aug 31 14:32:14 2022 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/__init__.cpython-37.pyc
Binary file lexmapr/__pycache__/__init__.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/create_databases.cpython-37.pyc
Binary file lexmapr/__pycache__/create_databases.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/definitions.cpython-37.pyc
Binary file lexmapr/__pycache__/definitions.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/ontology_reasoner.cpython-37.pyc
Binary file lexmapr/__pycache__/ontology_reasoner.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline.cpython-37.pyc
Binary file lexmapr/__pycache__/pipeline.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline_helpers.cpython-37.pyc
Binary file lexmapr/__pycache__/pipeline_helpers.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline_resources.cpython-37.pyc
Binary file lexmapr/__pycache__/pipeline_resources.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/run_summary.cpython-37.pyc
Binary file lexmapr/__pycache__/run_summary.cpython-37.pyc has changed
diff -r 5244e7465767 -r be95a7ce968a lexmapr/create_databases.py
--- a/lexmapr/create_databases.py Wed Aug 31 14:32:14 2022 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,248 +0,0 @@
-"""Builds SQLite3 databases"""
-
-import logging, os, pickle, re, requests, sqlite3, sys, time
-import lexmapr.ontology_reasoner as ontr
-from nltk.tokenize import word_tokenize
-from lexmapr.pipeline_helpers import punctuation_treatment
-from lexmapr.definitions import embl_ontologies, synonym_db, ontol_db
-from lexmapr.definitions import owl_dir, purl_link, missing_ontol_labels
-from lexmapr.pipeline_resources import get_resource_label_permutations
-
-logging.getLogger('requests').setLevel(logging.WARNING)
-logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-
-# TODO: might replace pickle with ujson
-def _pickle_save(data_to_save, file_path):
- '''Write a pickle file'''
- with open(file_path,'wb') as SAVE_file:
- pickle.dump(data_to_save, SAVE_file)
-
-
-def _pickle_load(file_path):
- '''Read a pickle file'''
- with open(file_path,'rb') as LOAD_file:
- return(pickle.load(LOAD_file))
-
-
-def _get_ontols(ontol_interest):
- '''Obtain URLs for ontologies of interest'''
- ontol_dic = {}
- embl_resp = requests.get(embl_ontologies)
- resp_blocks = re.findall('([\s\S]+?)
',embl_resp.content.decode('utf-8'))
- for resp_block in resp_blocks:
- try:
- embl_abbr = re.search('class=\"ontology-source\">([\s\S]+?)<', resp_block).group(1)
- embl_name = re.search('([\s\S]+?)', resp_block).group(1)
- embl_link = re.search('href=\"(\S+)\">Download', resp_block).group(1)
- if embl_link.startswith('ontologies'):
- embl_link = embl_link[len('ontologies'):]
- # TODO: with Python 3.9- embl_link.removeprefix('ontologies')
- except(AttributeError):
- continue
- if embl_abbr in ontol_interest:
- ontol_dic[embl_abbr] = (embl_name, embl_link)
- # Continue if not find all ontologies of interest specified in definitions.py
- not_found = set(ontol_interest).difference(set(ontol_dic.keys()))
- if not_found:
- if len(not_found) == 1:
- logging.warning(f'Did not find ontology: ' + ', '.join(not_found))
- else:
- logging.warning(f'Did not find ontologies: ' + ', '.join(not_found))
- if ontol_dic == {}:
- sys.exit('Zero ontologies found from user-given list')
- return(ontol_dic)
-
-
-def _check_make(db_file, remake_cache, ontol_interest):
- '''Check if database file should be remade'''
- if os.path.exists(db_file) and remake_cache == False:
- if os.path.exists(os.path.join(owl_dir, 'cached_ontologies.pickle')):
- if ontol_interest == _pickle_load(os.path.join(owl_dir, 'cached_ontologies.pickle')):
- return(False)
- try:
- os.remove(db_file)
- except(FileNotFoundError):
- pass
- return(True)
-
-
-def _db_insert(db_cursor, table_name, key_term, val_term):
- '''Insert new data into a database table'''
- if key_term.strip()==val_term.strip() or key_term.strip()=='' or val_term.strip()=='':
- return
- db_cursor.execute(f"INSERT OR IGNORE INTO {table_name} VALUES (:key,:value)",
- {'key':key_term.strip(), 'value':val_term.strip()})
-
-
-def _get_imports(file_handle):
- '''Check for required imports; append any new patterns to pattern_strs'''
- pattern_strs = []
- pattern_strs.append('')
- pattern_strs.append('')
- whole_file = str(file_handle.read())
- for patt_str in pattern_strs:
- import_match = re.findall(patt_str, whole_file)
- if import_match != []:
- import_match = [x if re.search('^http:',x) else purl_link+x for x in import_match]
- break
- return(import_match)
-
-
-def _section_file(file_handle, break_pattern,
- stt_at=' // Classes', end_at=' // Annotations'):
- '''Break OWL files into readable sections for each ontology accession'''
- whole_file = str(file_handle.read())
- if stt_at != '':
- if re.search(stt_at, whole_file):
- whole_file = ''.join(whole_file.split(stt_at)[1:])
- if end_at != '':
- if re.search(end_at, whole_file):
- whole_file = ''.join(whole_file.split(end_at)[:-1])
- file_sections = whole_file.split(break_pattern)
- return(file_sections[1:-1])
-
-
-def _labels_synonyms(obo_list, have_label=False):
- '''Identify labels, ids and exact ontology synonyms'''
- obo_ids = []
- for obo_string in obo_list:
- id_pattern = '(\w+) -->'
- lab_pattern = '\([\s\S]+?)\<\/rdfs:label\>'
- syn_pattern = '\(.*?)\ 1:
- if not re.search('NCBITaxon', ontol_term[0]):
- for permutation in get_resource_label_permutations(ontol_label):
- _db_insert(c,'standard_resource_permutations',permutation,ontol_term[0])
- # Add abbreviated binomials from NCBITaxons; TODO: may get wrong combinations?
- elif len(word_tokenize(ontol_label)) == 2:
- bi_name = ontol_label.split()
- _db_insert(c, 'standard_resource_permutations',
- bi_name[0][0]+' '+bi_name[1], ontol_term[0])
- for syn_term in ontol_term[2]:
- _db_insert(s,'label_synonyms',punctuation_treatment(str(syn_term)),ontol_label)
- conn.commit()
- sonn.commit()
-
- conn.close()
- sonn.close()
- _pickle_save(ontol_interest, os.path.join(owl_dir,'cached_ontologies.pickle'))
- return
diff -r 5244e7465767 -r be95a7ce968a lexmapr/definitions.py
--- a/lexmapr/definitions.py Wed Aug 31 14:32:14 2022 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-"""Static definitions"""
-
-import os
-
-
-# root path
-ROOT = os.path.dirname(__file__)
-
-# URL to list of OLS ontologies where download link is given
-embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies'
-
-# beginning of URL to ontology PURL
-purl_link = 'http://purl.obolibrary.org/obo/'
-
-# directory for downloaded OWL files
-owl_dir = 'lexmapr/owl_files'
-
-# path to database with synonyms from predefined resources and from OWL files
-synonym_db = 'lexmapr/owl_files/label_synonyms.db'
-
-# path to database with all ontologies of interest
-ontol_db = 'lexmapr/owl_files/ontol_table.db'
-
-
-# ontologies of interest
-ontol_interest = [#'BFO',
- #'CHEBI',
- #'ENVO',
- 'FOODON',
- #'GENEPIO',
- 'NCBITAXON', # NCBITaxon is not valid as of April 2022
- #'OGMS',
- #'PATO',
- #'PCO',
- #'UBERON',
- ]
-
-# ontology accessions that do not have labels or are placeholders as of April 2022
-# will skip in database building
-missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370',
- 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021',
- ]
-
-# terms indicating that the metadata was not given/collected; will output empty results
-not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous',
- 'not collected','missing','unidentified','unknown','none','unamed','other',
- 'undetermined','not known','no history given','no source specified','null',
- 'unspecified','not reported','not available not collected','not isolated',
- 'not available','not provided','xxx','mising','misng','other','unidentified',
- 'not determined other','reported later','intact unknown','not determined',
- 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ',
- 'not supplied','not specified',
- ]
-
-# below are bin definitions
-# TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer'
-# can collect as 'for *' in text?
-#fo_consumer = []
-
-fo_product = ['algal food product:FOODON_00001184',
- 'amphibian:FOODON_03411624',
- 'amphibian or reptile food product:FOODON_00002200',
- 'animal based refined or partially-refined food product:FOODON_00001595',
- 'avian egg food product:FOODON_00001105',
- 'avian food product:FOODON_001251',
- 'bakery food product:FOODON_00001626',
- 'cell-based technology food product:FOODON_00003376',
- 'dairy food product:FOODON_00001256',
- 'dietary supplement:FOODON_03401298',
- 'fish egg food product:FOODON_00001250',
- 'fish food product:FOODON_00001248',
- 'food product analog:FOODON_00001871',
- 'food product component:FOODON_00001714',
- 'fungus food product:FOODON_00001143',
- 'game animal food product:FOODON_00002477',
- 'insect food product:FOODON_00001177',
- 'meat food product:FOODON_00002477',
- 'microbial food product:FOODON_00001145',
- 'plant food product:FOODON_00001015',
- 'poultry food product:FOODON_00001283',
- 'prepared food product:FOODON_00001180',
- 'processed food product:FOODON_03311737',
- 'reptile egg food product:FOODON_00002199',
- 'seafood product:FOODON_00001046',
- 'shellfish food product:FOODON_00001293',
- 'soup food product:FOODON_00002257',
- 'sustainable agriculture food product:FOODON_00003375',
- 'vegetarian food product:FOODON_00003194',
- 'vertebrate animal food product:FOODON_00001092',
- ]
-
-fo_quality = ['food (acidified):FOODON_03301625',
- 'food (adulterated):FOODON_00003367',
- 'food (baked):FOODON_00002456',
- 'food (batter-coated):FOODON_00002662',
- 'food (blanched):FOODON_00002767',
- 'food (blend):FOODON_00003889',
- 'food (boiled):FOODON_00002688',
- 'food (breaded):FOODON_00002661',
- 'food (broiled or grilled):FOODON_00002647',
- 'food (canned):FOODON_00002418',
- 'food (chilled):FOODON_00002642',
- 'food (chopped):FOODON_00002777',
- 'food (cleaned):FOODON_00002708',
- 'food (colored):FOODON_00002650',
- 'food (comminuted):FOODON_00002754',
- 'food (cooked):FOODON_00001181',
- 'food (deep-fried):FOODON_03307052',
- 'food (dehydrated):FOODON_00002643',
- 'food (dried):FOODON_03307539',
- 'food (fat or oil coated):FOODON_03460233',
- 'food (fermented):FOODON_00001258',
- 'food (filled):FOODON_00002644',
- 'food (flavored):FOODON_00002646',
- 'food (freeze-dried):FOODON_03301752',
- 'food (fresh):FOODON_00002457',
- 'food (fried):FOODON_00002660',
- 'food (frozen):FOODON_03302148',
- 'food (genetically-modified):FOODON_03530251',
- 'food (ground):FOODON_00002713',
- 'food (harvested):FOODON_00003398',
- 'food (heat treated):FOODON_03316043',
- 'food (hulled):FOODON_00002720',
- 'food (hydrolized):FOODON_00002653',
- 'food (irradiated):FOODON_03305364',
- 'food (juiced):FOODON_00003499',
- 'food (liquid):FOODON_03430130',
- 'food (milled):FOODON_00002649',
- 'food (not genetically-modified):FOODON_00003379',
- 'food (organically grown):FOODON_03306690',
- 'food (packaged):FOODON_00002739',
- 'food (packed in high pressurised containers):FOODON_03317139',
- 'food (pan-fried):FOODON_00002463',
- 'food (paste):FOODON_00003887',
- 'food (pasteurized):FOODON_00002654',
- 'food (peeled):FOODON_00002655',
- 'food (pickled):FOODON_00001079',
- 'food (powdered):FOODON_00002976',
- 'food (precooked):FOODON_00002971',
- 'food (precooked, frozen):FOODON_03305323',
- 'food (preserved):FOODON_00002158',
- 'food (puffed):FOODON_00002656',
- 'food (raw):FOODON_03311126',
- 'food (rehydrated):FOODON_00002755',
- 'food (roasted):FOODON_00002744',
- 'food (salted):FOODON_03460173',
- 'food (seasoned):FOODON_00002733',
- 'fruit (seedless):FOODON_00003461',
- 'food (semiliquid):FOODON_03430103',
- 'food (semisolid):FOODON_03430144',
- 'food (sliced):FOODON_00002455',
- 'food (smoked or smoke-flavored):FOODON_03460172',
- 'food (solid):FOODON_03430151',
- 'food (spoiled):FOODON_00003366',
- 'food (starch or flour thickened):FOODON_03315268',
- 'food (steamed):FOODON_00002657',
- 'food (sugar-free):FOODON_03315838',
- 'food (textured):FOODON_00002658',
- 'food (toasted):FOODON_00002659',
- 'food (unprocessed):FOODON_03316056',
- 'food (unstandardized):FOODON_03315636',
- ]
-
-fo_organism = ['algae:FOODON_03411301',
- 'animal:FOODON_00003004',
- 'fungus:FOODON_03411261',
- 'lichen:FOODON_03412345',
- 'whole plant:PO_0000003',
- ]
-
-ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types
- 'Ecdysozoa:NCBITaxon_1206794',
- 'Echinodermata:NCBITaxon_7586',
- 'Fungi:NCBITaxon_4751',
- 'Spiralia:NCBITaxon_2697495',
- 'Viridiplantae:NCBITaxon_33090',
- 'Amphibia:NCBITaxon_8292',
- #'Sauropsida:NCBITaxon_8457',
- 'Aves:NCBITaxon_8782',
- 'Crocodylia:NCBITaxon_1294634',
- 'Testudinata:NCBITaxon_2841271',
- 'Lepidosauria:NCBITaxon_8504',
- #'Mammalia:NCBITaxon_40674',
- 'Artiodactyla:NCBITaxon_91561',
- 'Carnivora:NCBITaxon_33554',
- 'Chiroptera:NCBITaxon_9397',
- 'Chrysochloridae:NCBITaxon_9389',
- 'Eulipotyphla:NCBITaxon_9362',
- 'Hyracoidea:NCBITaxon_9810',
- 'Macroscelidea:NCBITaxon_28734',
- 'Metatheria:NCBITaxon_9263',
- 'Ornithorhynchidae:NCBITaxon_9256',
- 'Perissodactyla:NCBITaxon_9787',
- 'Pholidota:NCBITaxon_9971',
- 'Primates:NCBITaxon_9443',
- 'Proboscidea:NCBITaxon_9779',
- 'Rodentia:NCBITaxon_9989',
- 'Sirenia:NCBITaxon_9774',
- 'Tachyglossidae:NCBITaxon_9259',
- 'Tenrecidae:NCBITaxon_9369',
- 'Tubulidentata:NCBITaxon_9815',
- 'Xenarthra:NCBITaxon_9348',
- ]
-
-arg_bins = {#'fo_consumer':fo_consumer,
- 'fo_product':fo_product,
- 'fo_quality':fo_quality,
- 'fo_organism':fo_organism,
- 'ncbi_taxon':ncbi_taxon,
- }
diff -r 5244e7465767 -r be95a7ce968a lexmapr/ontology_reasoner.py
--- a/lexmapr/ontology_reasoner.py Wed Aug 31 14:32:14 2022 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,449 +0,0 @@
-"""Ontology finder and visualizer"""
-
-import copy, json, logging, requests, time
-import pygraphviz as pgv
-
-logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-
-# TODO: figure out what to do with root Thing:Thing
-class Ontology_accession:
- '''Base class for defining attributes and behavior of single ontology accesions;
- Assume format definition (whitespace and punctuation okay):ontology_id'''
- existing_ontologies = {}
-
- @staticmethod
- def make_instance(acc):
- '''Use instead of default __init__ to enforce one instance per ontology'''
- try:
- return(Ontology_accession.existing_ontologies[acc])
- except(KeyError):
- Ontology_accession.existing_ontologies[acc] = Ontology_accession(acc)
- return(Ontology_accession.existing_ontologies[acc])
-
- def __init__(self, acc):
- '''If ontology is not recognized, just use short form, ex THING'''
- def_split = acc.split(':')
- self.label = ':'.join(def_split[:-1])
- self.id = def_split[-1].replace('_',':')
- self.parents = 'not assigned yet'
- self.children = 'not assigned yet'
- self.ancestors = 'not assigned yet'
- self.descendants = 'not assigned yet'
- self.graph_nodes = 'not assigned yet'
- self.graph_fill = False
- self.ontology = def_split[1].split('_')[0]
- if self.label == '':
- self._get_label()
-
- def _api_results(self, input_list, return_list):
- '''Ignore obsolete terms, not currently checking for \'term_replaced_by\''''
- for x_term in input_list:
- if x_term['is_obsolete']:
- continue
- new_term = x_term['label'] + ':' + x_term['short_form']
- return_list.append(Ontology_accession.make_instance(new_term))
- return(return_list)
-
- def _add_edges(self, family_member, family_list, edge_set, round_num):
- '''Add edges to graph'''
- if edge_set == []:
- return(edge_set)
- elif round_num > 0:
- for x in family_list:
- x.get_family(family_member)
- if family_member == 'parents': # TODO: how get x.family_member to collapse code
- if x.parents == ['none found']:
- continue
- if len(x.parents) > 5:
- time.sleep(0.05)
- new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents]
- edge_set = edge_set + [z for z in new_edges if z not in edge_set]
- edge_set = x._add_edges(family_member, x.parents, edge_set, round_num-1)
- elif family_member == 'children':
- if x.children == ['none found']:
- continue
- if len(x.children) > 5:
- time.sleep(0.05)
- new_edges = [(x._graph_label(),y._graph_label()) for y in x.children]
- edge_set = edge_set + [z for z in new_edges if z not in edge_set]
- edge_set = x._add_edges(family_member, x.children, edge_set, round_num-1)
- return(edge_set)
-
- def _draw_graph(self, o_file, node_color, edge_color):
- '''Draw and save the graph'''
- ontol_graph = pgv.AGraph(name='ontology_graph')
- ontol_graph.add_node(self._graph_label())
- for x in self.graph_nodes:
- ontol_graph.add_edge(x[0], x[1])
- ontol_graph.node_attr.update(shape='box',
- style='rounded,filled',
- fillcolor='lightgrey',
- color=node_color)
- ontol_graph.edge_attr.update(shape='normal',
- color=edge_color,
- dir='back')
- ontol_graph.get_node(self._graph_label()).attr.update(fillcolor='lightblue')
- # TODO: determine best algorithm: neato, fdp, nop, twopi; tried circo; not dot, sfdp
- ontol_graph.draw(o_file, prog='twopi')
-
- def _expand_edge(self, family_member, family_list, edge_set, old_set='', stop_terms=False):
- '''Add edges to graph'''
- while old_set != edge_set:
- old_set = copy.deepcopy(edge_set)
- for x in family_list:
- if x == 'none found':
- break
- if type(stop_terms) == list:
- if x in stop_terms:
- break
- x.get_family(family_member)
- if family_member == 'parents': # TODO: how get x.family_member to collapse code
- if x.parents == ['none found']:
- continue
- if len(x.parents) > 5:
- time.sleep(0.05)
- new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents]
- edge_set = edge_set + [z for z in new_edges if z not in edge_set]
- edge_set = x._expand_edge(family_member,x.parents,edge_set,old_set,stop_terms)
- elif family_member == 'children':
- if x.children == ['none found']:
- continue
- if len(x.children) > 5:
- time.sleep(0.05)
- new_edges = [(x._graph_label(),y._graph_label()) for y in x.children]
- edge_set = edge_set + [z for z in new_edges if z not in edge_set]
- edge_set = x._expand_edge(family_member,x.children,edge_set,old_set,stop_terms)
- return(edge_set)
-
- def _get_label(self):
- '''Retrieve definition is correct for an id; updates instance'''
- query_url = 'http://www.ebi.ac.uk/ols/api/terms?obo_id={}'.format(self.id)
- ols_resp = self._get_request(query_url)
- if ols_resp is None:
- logging.warning(f'Did not retrieve PURL for {self.id}')
- self.label = 'unk'
- return
- try:
- self.label = ols_resp.json()['_embedded']['terms'][0]['label']
- except(KeyError):
- logging.warning(f'Did not find label for {self.id} in OLS')
- self.label = 'unk'
- except json.decoder.JSONDecodeError as err:
- time.sleep(0.05)
- self._get_label()
-
- def _get_request(self, request_url, max_retries=5):
- '''Retrieve URL'''
- while max_retries > 0:
- try:
- return(requests.get(request_url))
- except:
- time.sleep(0.05)
- max_retries -= 1
- return(None)
-
- def _graph_label(self):
- '''Format a graph label'''
- return(self.id+'\\n'+self.label)
-
- def _next_page(self, url_link, return_list):
- '''Get next page of search results'''
- next_resp = self._get_request(url_link)
- if next_resp is None:
- logging.warning(f'Did not retrieve URL for {url_link} during API search')
- return(False, return_list)
- else:
- try:
- next_link = next_resp.json()['_links']['next']['href']
- except(KeyError):
- next_link = False
- return_list = self._api_results(next_resp.json()['_embedded']['terms'], return_list)
- return(next_link, return_list)
-
- def check_label(self):
- '''Check if given definition is correct for an id; returns Boolean or str `unk`'''
- self._get_label()
- if self.label != 'unk':
- return(ols_resp.json()['_embedded']['terms'][0]['label'] == self.label)
- else:
- return(self.label)
-
- def get_family(self, family_member):
- '''Returns list of parents, ancestors, children or descendants'''
- if family_member == 'parents' and self.parents != 'not assigned yet':
- return(self.parents)
- elif family_member == 'children' and self.children != 'not assigned yet':
- return(self.children)
- elif family_member == 'ancestors' and self.ancestors != 'not assigned yet':
- return(self.ancestors)
- elif family_member == 'descendants' and self.descendants != 'not assigned yet':
- return(self.descendants)
-
- if self.id.split(':')[0].lower() == 'gaz':
- query_url = 'https://www.ebi.ac.uk/ols/api/ontologies/gaz/terms?iri='
- query_url += 'http://purl.obolibrary.org/obo/' + self.id.replace(':','_')
- ols_resp = self._get_request(query_url)
- qry_url = ols_resp.json()['_embedded']['terms'][0]['_links']\
- ['hierarchical'+family_member.title()]['href']
- else:
- query_url = 'http://www.ebi.ac.uk/ols/api/ontologies/{}/{}?id={}'
- qry_url = query_url.format(self.id.split(':')[0].lower(),family_member,self.id)
-
- ols_resp = self._get_request(qry_url)
- if ols_resp is None:
- logging.warning(f'Did not get URL for {url_link} during search for {family_member}')
- result_list = ['none found']
- elif ols_resp.status_code > 200:
- result_list = ['none found']
- elif ols_resp.json()['page']['totalElements'] > 0:
- result_list = self._api_results(ols_resp.json()['_embedded']['terms'], [])
- if ols_resp.json()['page']['totalPages'] > 1:
- next_url = ols_resp.json()['_links']['next']['href']
- while next_url:
- next_url,result_list = self._next_page(next_url,result_list)
- else:
- result_list = ['none found']
-
- if family_member == 'parents':
- self.parents = list(set(result_list))
- elif family_member == 'children':
- self.children = list(set(result_list))
- elif family_member == 'ancestors':
- self.ancestors = list(set(result_list))
- elif family_member == 'descendants':
- self.descendants = list(set(result_list))
- return(result_list)
-
- def bin_term(self, bin_package):
- '''Categorize term into given bins as Ontology_package'''
- term_bins = []
- self.get_family('ancestors')
- if self.ancestors == ['none found']:
- ancestor_labels = [x.label + ':' + x.id.replace(':','_') for x in [self]]
- else:
- ancestor_labels = [x.label+':'+x.id.replace(':','_') for x in [self]+self.ancestors]
- return([x for x in ancestor_labels if x in bin_package.ontologies])
-
- def visualize_term(self, o_file, node_color='black', edge_color='black',
- fill_out=False, stop_terms=False, draw_graph=True):
- '''Visualize one term'''
- if self.graph_nodes!='not assigned yet' and self.graph_fill==fill_out:
- if draw_graph:
- self._draw_graph(o_file, node_color, edge_color)
- else:
- self.get_family('parents')
- self.get_family('children')
- edge_set1,edge_set2 = [],[]
- if self.parents != ['none found']:
- edge_set1 = [(x._graph_label(),self._graph_label()) for x in self.parents]
- if self.children != ['none found']:
- edge_set2 = [(self._graph_label(),x._graph_label()) for x in self.children]
- if type(fill_out) == int:
- edge_set1 = self._add_edges('parents', self.parents, edge_set1, fill_out-1)
- edge_set2 = self._add_edges('children', self.children, edge_set2, fill_out-1)
- elif fill_out==True:
- edge_set1 = self._expand_edge('parents',self.parents,edge_set1,'',stop_terms)
- edge_set2 = self._expand_edge('children',self.children,edge_set2,'',stop_terms)
- self.graph_nodes = list(set(edge_set1+edge_set2))
- if draw_graph:
- self._draw_graph(o_file, node_color, edge_color)
-
-
-class Ontology_package:
- '''Associate or package Ontology_accession objects together'''
- def __init__(self, package_label, ontol_list):
- self.label = package_label
- self.ontologies = ontol_list
- self.bins = []
- self.lcp = 'not assigned yet'
- self.hcc = 'not assigned yet'
- self._lcp_state = (True,[])
- self._hcc_state = (True,[])
- self._bin_state = []
- self.graph_nodes = 'not assigned yet'
- self.graph_state = False
-
- def _common_family(self,family_member,incl_terms,excl_terms):
- '''Find common family members'''
- family_candidates = {}
- for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]:
- family_candidates[ontol_term] = ontol_term.get_family(family_member)
- common_members = self._common_list(family_candidates, incl_terms)
- while common_members == []:
- for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]:
- if len(self.ontologies) > 30:
- time.sleep(0.05)
- original_list = list(family_candidates[ontol_term])
- for family_ontol in original_list:
- if len(original_list) > 30:
- time.sleep(0.05)
- try:
- family_candidates[ontol_term].extend(\
- family_ontol.get_family(family_member))
- except(AttributeError):
- family_candidates[ontol_term].extend(['none found'])
- return(common_members)
-
- def _common_list(self, input_dic, incl_terms):
- '''Compare input dictionary keys and list'''
- term_lists = []
- for ontol_key in input_dic:
- append_list = [ontol_key]
- for ontol_val in input_dic[ontol_key]:
- append_list.append(ontol_val)
- term_lists.append(append_list)
- common_set = set.intersection(*map(set, term_lists))
- if incl_terms:
- common_keys = []
- for ontol_acc in common_set:
- if ontol_acc in input_dic.keys():
- common_keys.append(ontol_acc)
- if common_keys != []:
- return(common_keys)
- return(list(common_set - set(input_dic.keys())))
-
- def _draw_graph(self, o_file, node_color, edge_color, show_lcp, show_hcc):
- '''Draw and save graph'''
- ontol_graph = pgv.AGraph(name='ontology_graph')
- for x in self.ontologies:
- ontol_graph.add_node(x._graph_label())
- for x in self.graph_nodes:
- ontol_graph.add_edge(x[0], x[1])
- ontol_graph.node_attr.update(shape='box', style='rounded,filled',
- fillcolor='lightgrey', color=node_color)
- ontol_graph.edge_attr.update(shape='normal', color=edge_color, dir='back')
- if show_lcp:
- for x in self.lcp:
- ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige')
- if show_hcc:
- for x in self.hcc:
- ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige')
- for x in self.ontologies:
- ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='lightblue')
- ontol_graph.draw(o_file,prog='dot')