# HG changeset patch # User kkonganti # Date 1663083144 14400 # Node ID be95a7ce968ae480f4bdbd8874a047ccffc5d98a # Parent 5244e74657673ad4a6e712f724628912d4a51a52 "planemo upload" diff -r 5244e7465767 -r be95a7ce968a cfsan_lexmapr2.xml --- a/cfsan_lexmapr2.xml Wed Aug 31 14:32:14 2022 -0400 +++ b/cfsan_lexmapr2.xml Tue Sep 13 11:32:24 2022 -0400 @@ -28,7 +28,7 @@ #end for ]]> - $__tool_directory__/nltk_data + /tool/tool-data/cfsan_lexmapr2/0/nltk_data diff -r 5244e7465767 -r be95a7ce968a lexmapr.zip Binary file lexmapr.zip has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__init__.py --- a/lexmapr/__init__.py Wed Aug 31 14:32:14 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ - diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/__init__.cpython-37.pyc Binary file lexmapr/__pycache__/__init__.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/create_databases.cpython-37.pyc Binary file lexmapr/__pycache__/create_databases.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/definitions.cpython-37.pyc Binary file lexmapr/__pycache__/definitions.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/ontology_reasoner.cpython-37.pyc Binary file lexmapr/__pycache__/ontology_reasoner.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline.cpython-37.pyc Binary file lexmapr/__pycache__/pipeline.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline_helpers.cpython-37.pyc Binary file lexmapr/__pycache__/pipeline_helpers.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/pipeline_resources.cpython-37.pyc Binary file lexmapr/__pycache__/pipeline_resources.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/__pycache__/run_summary.cpython-37.pyc Binary file lexmapr/__pycache__/run_summary.cpython-37.pyc has changed diff -r 5244e7465767 -r be95a7ce968a lexmapr/create_databases.py --- a/lexmapr/create_databases.py Wed Aug 31 14:32:14 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,248 +0,0 @@ -"""Builds SQLite3 databases""" - -import logging, os, pickle, re, requests, sqlite3, sys, time -import lexmapr.ontology_reasoner as ontr -from nltk.tokenize import word_tokenize -from lexmapr.pipeline_helpers import punctuation_treatment -from lexmapr.definitions import embl_ontologies, synonym_db, ontol_db -from lexmapr.definitions import owl_dir, purl_link, missing_ontol_labels -from lexmapr.pipeline_resources import get_resource_label_permutations - -logging.getLogger('requests').setLevel(logging.WARNING) -logging.getLogger('urllib3').setLevel(logging.WARNING) - - -# TODO: might replace pickle with ujson -def _pickle_save(data_to_save, file_path): - '''Write a pickle file''' - with open(file_path,'wb') as SAVE_file: - pickle.dump(data_to_save, SAVE_file) - - -def _pickle_load(file_path): - '''Read a pickle file''' - with open(file_path,'rb') as LOAD_file: - return(pickle.load(LOAD_file)) - - -def _get_ontols(ontol_interest): - '''Obtain URLs for ontologies of interest''' - ontol_dic = {} - embl_resp = requests.get(embl_ontologies) - resp_blocks = re.findall('([\s\S]+?)',embl_resp.content.decode('utf-8')) - for resp_block in resp_blocks: - try: - embl_abbr = re.search('class=\"ontology-source\">([\s\S]+?)<', resp_block).group(1) - embl_name = re.search('([\s\S]+?)', resp_block).group(1) - embl_link = re.search('href=\"(\S+)\">Download', resp_block).group(1) - if embl_link.startswith('ontologies'): - embl_link = embl_link[len('ontologies'):] - # TODO: with Python 3.9- embl_link.removeprefix('ontologies') - except(AttributeError): - continue - if embl_abbr in ontol_interest: - ontol_dic[embl_abbr] = (embl_name, embl_link) - # Continue if not find all ontologies of interest specified in definitions.py - not_found = set(ontol_interest).difference(set(ontol_dic.keys())) - if not_found: - if len(not_found) == 1: - logging.warning(f'Did not find ontology: ' + ', '.join(not_found)) - else: - logging.warning(f'Did not find ontologies: ' + ', '.join(not_found)) - if ontol_dic == {}: - sys.exit('Zero ontologies found from user-given list') - return(ontol_dic) - - -def _check_make(db_file, remake_cache, ontol_interest): - '''Check if database file should be remade''' - if os.path.exists(db_file) and remake_cache == False: - if os.path.exists(os.path.join(owl_dir, 'cached_ontologies.pickle')): - if ontol_interest == _pickle_load(os.path.join(owl_dir, 'cached_ontologies.pickle')): - return(False) - try: - os.remove(db_file) - except(FileNotFoundError): - pass - return(True) - - -def _db_insert(db_cursor, table_name, key_term, val_term): - '''Insert new data into a database table''' - if key_term.strip()==val_term.strip() or key_term.strip()=='' or val_term.strip()=='': - return - db_cursor.execute(f"INSERT OR IGNORE INTO {table_name} VALUES (:key,:value)", - {'key':key_term.strip(), 'value':val_term.strip()}) - - -def _get_imports(file_handle): - '''Check for required imports; append any new patterns to pattern_strs''' - pattern_strs = [] - pattern_strs.append('') - pattern_strs.append('') - whole_file = str(file_handle.read()) - for patt_str in pattern_strs: - import_match = re.findall(patt_str, whole_file) - if import_match != []: - import_match = [x if re.search('^http:',x) else purl_link+x for x in import_match] - break - return(import_match) - - -def _section_file(file_handle, break_pattern, - stt_at=' // Classes', end_at=' // Annotations'): - '''Break OWL files into readable sections for each ontology accession''' - whole_file = str(file_handle.read()) - if stt_at != '': - if re.search(stt_at, whole_file): - whole_file = ''.join(whole_file.split(stt_at)[1:]) - if end_at != '': - if re.search(end_at, whole_file): - whole_file = ''.join(whole_file.split(end_at)[:-1]) - file_sections = whole_file.split(break_pattern) - return(file_sections[1:-1]) - - -def _labels_synonyms(obo_list, have_label=False): - '''Identify labels, ids and exact ontology synonyms''' - obo_ids = [] - for obo_string in obo_list: - id_pattern = '(\w+) -->' - lab_pattern = '\([\s\S]+?)\<\/rdfs:label\>' - syn_pattern = '\(.*?)\ 1: - if not re.search('NCBITaxon', ontol_term[0]): - for permutation in get_resource_label_permutations(ontol_label): - _db_insert(c,'standard_resource_permutations',permutation,ontol_term[0]) - # Add abbreviated binomials from NCBITaxons; TODO: may get wrong combinations? - elif len(word_tokenize(ontol_label)) == 2: - bi_name = ontol_label.split() - _db_insert(c, 'standard_resource_permutations', - bi_name[0][0]+' '+bi_name[1], ontol_term[0]) - for syn_term in ontol_term[2]: - _db_insert(s,'label_synonyms',punctuation_treatment(str(syn_term)),ontol_label) - conn.commit() - sonn.commit() - - conn.close() - sonn.close() - _pickle_save(ontol_interest, os.path.join(owl_dir,'cached_ontologies.pickle')) - return diff -r 5244e7465767 -r be95a7ce968a lexmapr/definitions.py --- a/lexmapr/definitions.py Wed Aug 31 14:32:14 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -"""Static definitions""" - -import os - - -# root path -ROOT = os.path.dirname(__file__) - -# URL to list of OLS ontologies where download link is given -embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies' - -# beginning of URL to ontology PURL -purl_link = 'http://purl.obolibrary.org/obo/' - -# directory for downloaded OWL files -owl_dir = 'lexmapr/owl_files' - -# path to database with synonyms from predefined resources and from OWL files -synonym_db = 'lexmapr/owl_files/label_synonyms.db' - -# path to database with all ontologies of interest -ontol_db = 'lexmapr/owl_files/ontol_table.db' - - -# ontologies of interest -ontol_interest = [#'BFO', - #'CHEBI', - #'ENVO', - 'FOODON', - #'GENEPIO', - 'NCBITAXON', # NCBITaxon is not valid as of April 2022 - #'OGMS', - #'PATO', - #'PCO', - #'UBERON', - ] - -# ontology accessions that do not have labels or are placeholders as of April 2022 -# will skip in database building -missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370', - 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021', - ] - -# terms indicating that the metadata was not given/collected; will output empty results -not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous', - 'not collected','missing','unidentified','unknown','none','unamed','other', - 'undetermined','not known','no history given','no source specified','null', - 'unspecified','not reported','not available not collected','not isolated', - 'not available','not provided','xxx','mising','misng','other','unidentified', - 'not determined other','reported later','intact unknown','not determined', - 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ', - 'not supplied','not specified', - ] - -# below are bin definitions -# TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer' -# can collect as 'for *' in text? -#fo_consumer = [] - -fo_product = ['algal food product:FOODON_00001184', - 'amphibian:FOODON_03411624', - 'amphibian or reptile food product:FOODON_00002200', - 'animal based refined or partially-refined food product:FOODON_00001595', - 'avian egg food product:FOODON_00001105', - 'avian food product:FOODON_001251', - 'bakery food product:FOODON_00001626', - 'cell-based technology food product:FOODON_00003376', - 'dairy food product:FOODON_00001256', - 'dietary supplement:FOODON_03401298', - 'fish egg food product:FOODON_00001250', - 'fish food product:FOODON_00001248', - 'food product analog:FOODON_00001871', - 'food product component:FOODON_00001714', - 'fungus food product:FOODON_00001143', - 'game animal food product:FOODON_00002477', - 'insect food product:FOODON_00001177', - 'meat food product:FOODON_00002477', - 'microbial food product:FOODON_00001145', - 'plant food product:FOODON_00001015', - 'poultry food product:FOODON_00001283', - 'prepared food product:FOODON_00001180', - 'processed food product:FOODON_03311737', - 'reptile egg food product:FOODON_00002199', - 'seafood product:FOODON_00001046', - 'shellfish food product:FOODON_00001293', - 'soup food product:FOODON_00002257', - 'sustainable agriculture food product:FOODON_00003375', - 'vegetarian food product:FOODON_00003194', - 'vertebrate animal food product:FOODON_00001092', - ] - -fo_quality = ['food (acidified):FOODON_03301625', - 'food (adulterated):FOODON_00003367', - 'food (baked):FOODON_00002456', - 'food (batter-coated):FOODON_00002662', - 'food (blanched):FOODON_00002767', - 'food (blend):FOODON_00003889', - 'food (boiled):FOODON_00002688', - 'food (breaded):FOODON_00002661', - 'food (broiled or grilled):FOODON_00002647', - 'food (canned):FOODON_00002418', - 'food (chilled):FOODON_00002642', - 'food (chopped):FOODON_00002777', - 'food (cleaned):FOODON_00002708', - 'food (colored):FOODON_00002650', - 'food (comminuted):FOODON_00002754', - 'food (cooked):FOODON_00001181', - 'food (deep-fried):FOODON_03307052', - 'food (dehydrated):FOODON_00002643', - 'food (dried):FOODON_03307539', - 'food (fat or oil coated):FOODON_03460233', - 'food (fermented):FOODON_00001258', - 'food (filled):FOODON_00002644', - 'food (flavored):FOODON_00002646', - 'food (freeze-dried):FOODON_03301752', - 'food (fresh):FOODON_00002457', - 'food (fried):FOODON_00002660', - 'food (frozen):FOODON_03302148', - 'food (genetically-modified):FOODON_03530251', - 'food (ground):FOODON_00002713', - 'food (harvested):FOODON_00003398', - 'food (heat treated):FOODON_03316043', - 'food (hulled):FOODON_00002720', - 'food (hydrolized):FOODON_00002653', - 'food (irradiated):FOODON_03305364', - 'food (juiced):FOODON_00003499', - 'food (liquid):FOODON_03430130', - 'food (milled):FOODON_00002649', - 'food (not genetically-modified):FOODON_00003379', - 'food (organically grown):FOODON_03306690', - 'food (packaged):FOODON_00002739', - 'food (packed in high pressurised containers):FOODON_03317139', - 'food (pan-fried):FOODON_00002463', - 'food (paste):FOODON_00003887', - 'food (pasteurized):FOODON_00002654', - 'food (peeled):FOODON_00002655', - 'food (pickled):FOODON_00001079', - 'food (powdered):FOODON_00002976', - 'food (precooked):FOODON_00002971', - 'food (precooked, frozen):FOODON_03305323', - 'food (preserved):FOODON_00002158', - 'food (puffed):FOODON_00002656', - 'food (raw):FOODON_03311126', - 'food (rehydrated):FOODON_00002755', - 'food (roasted):FOODON_00002744', - 'food (salted):FOODON_03460173', - 'food (seasoned):FOODON_00002733', - 'fruit (seedless):FOODON_00003461', - 'food (semiliquid):FOODON_03430103', - 'food (semisolid):FOODON_03430144', - 'food (sliced):FOODON_00002455', - 'food (smoked or smoke-flavored):FOODON_03460172', - 'food (solid):FOODON_03430151', - 'food (spoiled):FOODON_00003366', - 'food (starch or flour thickened):FOODON_03315268', - 'food (steamed):FOODON_00002657', - 'food (sugar-free):FOODON_03315838', - 'food (textured):FOODON_00002658', - 'food (toasted):FOODON_00002659', - 'food (unprocessed):FOODON_03316056', - 'food (unstandardized):FOODON_03315636', - ] - -fo_organism = ['algae:FOODON_03411301', - 'animal:FOODON_00003004', - 'fungus:FOODON_03411261', - 'lichen:FOODON_03412345', - 'whole plant:PO_0000003', - ] - -ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types - 'Ecdysozoa:NCBITaxon_1206794', - 'Echinodermata:NCBITaxon_7586', - 'Fungi:NCBITaxon_4751', - 'Spiralia:NCBITaxon_2697495', - 'Viridiplantae:NCBITaxon_33090', - 'Amphibia:NCBITaxon_8292', - #'Sauropsida:NCBITaxon_8457', - 'Aves:NCBITaxon_8782', - 'Crocodylia:NCBITaxon_1294634', - 'Testudinata:NCBITaxon_2841271', - 'Lepidosauria:NCBITaxon_8504', - #'Mammalia:NCBITaxon_40674', - 'Artiodactyla:NCBITaxon_91561', - 'Carnivora:NCBITaxon_33554', - 'Chiroptera:NCBITaxon_9397', - 'Chrysochloridae:NCBITaxon_9389', - 'Eulipotyphla:NCBITaxon_9362', - 'Hyracoidea:NCBITaxon_9810', - 'Macroscelidea:NCBITaxon_28734', - 'Metatheria:NCBITaxon_9263', - 'Ornithorhynchidae:NCBITaxon_9256', - 'Perissodactyla:NCBITaxon_9787', - 'Pholidota:NCBITaxon_9971', - 'Primates:NCBITaxon_9443', - 'Proboscidea:NCBITaxon_9779', - 'Rodentia:NCBITaxon_9989', - 'Sirenia:NCBITaxon_9774', - 'Tachyglossidae:NCBITaxon_9259', - 'Tenrecidae:NCBITaxon_9369', - 'Tubulidentata:NCBITaxon_9815', - 'Xenarthra:NCBITaxon_9348', - ] - -arg_bins = {#'fo_consumer':fo_consumer, - 'fo_product':fo_product, - 'fo_quality':fo_quality, - 'fo_organism':fo_organism, - 'ncbi_taxon':ncbi_taxon, - } diff -r 5244e7465767 -r be95a7ce968a lexmapr/ontology_reasoner.py --- a/lexmapr/ontology_reasoner.py Wed Aug 31 14:32:14 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,449 +0,0 @@ -"""Ontology finder and visualizer""" - -import copy, json, logging, requests, time -import pygraphviz as pgv - -logging.getLogger('urllib3').setLevel(logging.WARNING) - - -# TODO: figure out what to do with root Thing:Thing -class Ontology_accession: - '''Base class for defining attributes and behavior of single ontology accesions; - Assume format definition (whitespace and punctuation okay):ontology_id''' - existing_ontologies = {} - - @staticmethod - def make_instance(acc): - '''Use instead of default __init__ to enforce one instance per ontology''' - try: - return(Ontology_accession.existing_ontologies[acc]) - except(KeyError): - Ontology_accession.existing_ontologies[acc] = Ontology_accession(acc) - return(Ontology_accession.existing_ontologies[acc]) - - def __init__(self, acc): - '''If ontology is not recognized, just use short form, ex THING''' - def_split = acc.split(':') - self.label = ':'.join(def_split[:-1]) - self.id = def_split[-1].replace('_',':') - self.parents = 'not assigned yet' - self.children = 'not assigned yet' - self.ancestors = 'not assigned yet' - self.descendants = 'not assigned yet' - self.graph_nodes = 'not assigned yet' - self.graph_fill = False - self.ontology = def_split[1].split('_')[0] - if self.label == '': - self._get_label() - - def _api_results(self, input_list, return_list): - '''Ignore obsolete terms, not currently checking for \'term_replaced_by\'''' - for x_term in input_list: - if x_term['is_obsolete']: - continue - new_term = x_term['label'] + ':' + x_term['short_form'] - return_list.append(Ontology_accession.make_instance(new_term)) - return(return_list) - - def _add_edges(self, family_member, family_list, edge_set, round_num): - '''Add edges to graph''' - if edge_set == []: - return(edge_set) - elif round_num > 0: - for x in family_list: - x.get_family(family_member) - if family_member == 'parents': # TODO: how get x.family_member to collapse code - if x.parents == ['none found']: - continue - if len(x.parents) > 5: - time.sleep(0.05) - new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents] - edge_set = edge_set + [z for z in new_edges if z not in edge_set] - edge_set = x._add_edges(family_member, x.parents, edge_set, round_num-1) - elif family_member == 'children': - if x.children == ['none found']: - continue - if len(x.children) > 5: - time.sleep(0.05) - new_edges = [(x._graph_label(),y._graph_label()) for y in x.children] - edge_set = edge_set + [z for z in new_edges if z not in edge_set] - edge_set = x._add_edges(family_member, x.children, edge_set, round_num-1) - return(edge_set) - - def _draw_graph(self, o_file, node_color, edge_color): - '''Draw and save the graph''' - ontol_graph = pgv.AGraph(name='ontology_graph') - ontol_graph.add_node(self._graph_label()) - for x in self.graph_nodes: - ontol_graph.add_edge(x[0], x[1]) - ontol_graph.node_attr.update(shape='box', - style='rounded,filled', - fillcolor='lightgrey', - color=node_color) - ontol_graph.edge_attr.update(shape='normal', - color=edge_color, - dir='back') - ontol_graph.get_node(self._graph_label()).attr.update(fillcolor='lightblue') - # TODO: determine best algorithm: neato, fdp, nop, twopi; tried circo; not dot, sfdp - ontol_graph.draw(o_file, prog='twopi') - - def _expand_edge(self, family_member, family_list, edge_set, old_set='', stop_terms=False): - '''Add edges to graph''' - while old_set != edge_set: - old_set = copy.deepcopy(edge_set) - for x in family_list: - if x == 'none found': - break - if type(stop_terms) == list: - if x in stop_terms: - break - x.get_family(family_member) - if family_member == 'parents': # TODO: how get x.family_member to collapse code - if x.parents == ['none found']: - continue - if len(x.parents) > 5: - time.sleep(0.05) - new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents] - edge_set = edge_set + [z for z in new_edges if z not in edge_set] - edge_set = x._expand_edge(family_member,x.parents,edge_set,old_set,stop_terms) - elif family_member == 'children': - if x.children == ['none found']: - continue - if len(x.children) > 5: - time.sleep(0.05) - new_edges = [(x._graph_label(),y._graph_label()) for y in x.children] - edge_set = edge_set + [z for z in new_edges if z not in edge_set] - edge_set = x._expand_edge(family_member,x.children,edge_set,old_set,stop_terms) - return(edge_set) - - def _get_label(self): - '''Retrieve definition is correct for an id; updates instance''' - query_url = 'http://www.ebi.ac.uk/ols/api/terms?obo_id={}'.format(self.id) - ols_resp = self._get_request(query_url) - if ols_resp is None: - logging.warning(f'Did not retrieve PURL for {self.id}') - self.label = 'unk' - return - try: - self.label = ols_resp.json()['_embedded']['terms'][0]['label'] - except(KeyError): - logging.warning(f'Did not find label for {self.id} in OLS') - self.label = 'unk' - except json.decoder.JSONDecodeError as err: - time.sleep(0.05) - self._get_label() - - def _get_request(self, request_url, max_retries=5): - '''Retrieve URL''' - while max_retries > 0: - try: - return(requests.get(request_url)) - except: - time.sleep(0.05) - max_retries -= 1 - return(None) - - def _graph_label(self): - '''Format a graph label''' - return(self.id+'\\n'+self.label) - - def _next_page(self, url_link, return_list): - '''Get next page of search results''' - next_resp = self._get_request(url_link) - if next_resp is None: - logging.warning(f'Did not retrieve URL for {url_link} during API search') - return(False, return_list) - else: - try: - next_link = next_resp.json()['_links']['next']['href'] - except(KeyError): - next_link = False - return_list = self._api_results(next_resp.json()['_embedded']['terms'], return_list) - return(next_link, return_list) - - def check_label(self): - '''Check if given definition is correct for an id; returns Boolean or str `unk`''' - self._get_label() - if self.label != 'unk': - return(ols_resp.json()['_embedded']['terms'][0]['label'] == self.label) - else: - return(self.label) - - def get_family(self, family_member): - '''Returns list of parents, ancestors, children or descendants''' - if family_member == 'parents' and self.parents != 'not assigned yet': - return(self.parents) - elif family_member == 'children' and self.children != 'not assigned yet': - return(self.children) - elif family_member == 'ancestors' and self.ancestors != 'not assigned yet': - return(self.ancestors) - elif family_member == 'descendants' and self.descendants != 'not assigned yet': - return(self.descendants) - - if self.id.split(':')[0].lower() == 'gaz': - query_url = 'https://www.ebi.ac.uk/ols/api/ontologies/gaz/terms?iri=' - query_url += 'http://purl.obolibrary.org/obo/' + self.id.replace(':','_') - ols_resp = self._get_request(query_url) - qry_url = ols_resp.json()['_embedded']['terms'][0]['_links']\ - ['hierarchical'+family_member.title()]['href'] - else: - query_url = 'http://www.ebi.ac.uk/ols/api/ontologies/{}/{}?id={}' - qry_url = query_url.format(self.id.split(':')[0].lower(),family_member,self.id) - - ols_resp = self._get_request(qry_url) - if ols_resp is None: - logging.warning(f'Did not get URL for {url_link} during search for {family_member}') - result_list = ['none found'] - elif ols_resp.status_code > 200: - result_list = ['none found'] - elif ols_resp.json()['page']['totalElements'] > 0: - result_list = self._api_results(ols_resp.json()['_embedded']['terms'], []) - if ols_resp.json()['page']['totalPages'] > 1: - next_url = ols_resp.json()['_links']['next']['href'] - while next_url: - next_url,result_list = self._next_page(next_url,result_list) - else: - result_list = ['none found'] - - if family_member == 'parents': - self.parents = list(set(result_list)) - elif family_member == 'children': - self.children = list(set(result_list)) - elif family_member == 'ancestors': - self.ancestors = list(set(result_list)) - elif family_member == 'descendants': - self.descendants = list(set(result_list)) - return(result_list) - - def bin_term(self, bin_package): - '''Categorize term into given bins as Ontology_package''' - term_bins = [] - self.get_family('ancestors') - if self.ancestors == ['none found']: - ancestor_labels = [x.label + ':' + x.id.replace(':','_') for x in [self]] - else: - ancestor_labels = [x.label+':'+x.id.replace(':','_') for x in [self]+self.ancestors] - return([x for x in ancestor_labels if x in bin_package.ontologies]) - - def visualize_term(self, o_file, node_color='black', edge_color='black', - fill_out=False, stop_terms=False, draw_graph=True): - '''Visualize one term''' - if self.graph_nodes!='not assigned yet' and self.graph_fill==fill_out: - if draw_graph: - self._draw_graph(o_file, node_color, edge_color) - else: - self.get_family('parents') - self.get_family('children') - edge_set1,edge_set2 = [],[] - if self.parents != ['none found']: - edge_set1 = [(x._graph_label(),self._graph_label()) for x in self.parents] - if self.children != ['none found']: - edge_set2 = [(self._graph_label(),x._graph_label()) for x in self.children] - if type(fill_out) == int: - edge_set1 = self._add_edges('parents', self.parents, edge_set1, fill_out-1) - edge_set2 = self._add_edges('children', self.children, edge_set2, fill_out-1) - elif fill_out==True: - edge_set1 = self._expand_edge('parents',self.parents,edge_set1,'',stop_terms) - edge_set2 = self._expand_edge('children',self.children,edge_set2,'',stop_terms) - self.graph_nodes = list(set(edge_set1+edge_set2)) - if draw_graph: - self._draw_graph(o_file, node_color, edge_color) - - -class Ontology_package: - '''Associate or package Ontology_accession objects together''' - def __init__(self, package_label, ontol_list): - self.label = package_label - self.ontologies = ontol_list - self.bins = [] - self.lcp = 'not assigned yet' - self.hcc = 'not assigned yet' - self._lcp_state = (True,[]) - self._hcc_state = (True,[]) - self._bin_state = [] - self.graph_nodes = 'not assigned yet' - self.graph_state = False - - def _common_family(self,family_member,incl_terms,excl_terms): - '''Find common family members''' - family_candidates = {} - for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]: - family_candidates[ontol_term] = ontol_term.get_family(family_member) - common_members = self._common_list(family_candidates, incl_terms) - while common_members == []: - for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]: - if len(self.ontologies) > 30: - time.sleep(0.05) - original_list = list(family_candidates[ontol_term]) - for family_ontol in original_list: - if len(original_list) > 30: - time.sleep(0.05) - try: - family_candidates[ontol_term].extend(\ - family_ontol.get_family(family_member)) - except(AttributeError): - family_candidates[ontol_term].extend(['none found']) - return(common_members) - - def _common_list(self, input_dic, incl_terms): - '''Compare input dictionary keys and list''' - term_lists = [] - for ontol_key in input_dic: - append_list = [ontol_key] - for ontol_val in input_dic[ontol_key]: - append_list.append(ontol_val) - term_lists.append(append_list) - common_set = set.intersection(*map(set, term_lists)) - if incl_terms: - common_keys = [] - for ontol_acc in common_set: - if ontol_acc in input_dic.keys(): - common_keys.append(ontol_acc) - if common_keys != []: - return(common_keys) - return(list(common_set - set(input_dic.keys()))) - - def _draw_graph(self, o_file, node_color, edge_color, show_lcp, show_hcc): - '''Draw and save graph''' - ontol_graph = pgv.AGraph(name='ontology_graph') - for x in self.ontologies: - ontol_graph.add_node(x._graph_label()) - for x in self.graph_nodes: - ontol_graph.add_edge(x[0], x[1]) - ontol_graph.node_attr.update(shape='box', style='rounded,filled', - fillcolor='lightgrey', color=node_color) - ontol_graph.edge_attr.update(shape='normal', color=edge_color, dir='back') - if show_lcp: - for x in self.lcp: - ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige') - if show_hcc: - for x in self.hcc: - ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige') - for x in self.ontologies: - ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='lightblue') - ontol_graph.draw(o_file,prog='dot')