kkonganti@0: """Ontology finder and visualizer""" kkonganti@0: kkonganti@0: import copy, json, logging, requests, time kkonganti@0: import pygraphviz as pgv kkonganti@0: kkonganti@0: logging.getLogger('urllib3').setLevel(logging.WARNING) kkonganti@0: kkonganti@0: kkonganti@0: # TODO: figure out what to do with root Thing:Thing kkonganti@0: class Ontology_accession: kkonganti@0: '''Base class for defining attributes and behavior of single ontology accesions; kkonganti@0: Assume format definition (whitespace and punctuation okay):ontology_id''' kkonganti@0: existing_ontologies = {} kkonganti@0: kkonganti@0: @staticmethod kkonganti@0: def make_instance(acc): kkonganti@0: '''Use instead of default __init__ to enforce one instance per ontology''' kkonganti@0: try: kkonganti@0: return(Ontology_accession.existing_ontologies[acc]) kkonganti@0: except(KeyError): kkonganti@0: Ontology_accession.existing_ontologies[acc] = Ontology_accession(acc) kkonganti@0: return(Ontology_accession.existing_ontologies[acc]) kkonganti@0: kkonganti@0: def __init__(self, acc): kkonganti@0: '''If ontology is not recognized, just use short form, ex THING''' kkonganti@0: def_split = acc.split(':') kkonganti@0: self.label = ':'.join(def_split[:-1]) kkonganti@0: self.id = def_split[-1].replace('_',':') kkonganti@0: self.parents = 'not assigned yet' kkonganti@0: self.children = 'not assigned yet' kkonganti@0: self.ancestors = 'not assigned yet' kkonganti@0: self.descendants = 'not assigned yet' kkonganti@0: self.graph_nodes = 'not assigned yet' kkonganti@0: self.graph_fill = False kkonganti@0: self.ontology = def_split[1].split('_')[0] kkonganti@0: if self.label == '': kkonganti@0: self._get_label() kkonganti@0: kkonganti@0: def _api_results(self, input_list, return_list): kkonganti@0: '''Ignore obsolete terms, not currently checking for \'term_replaced_by\'''' kkonganti@0: for x_term in input_list: kkonganti@0: if x_term['is_obsolete']: kkonganti@0: continue kkonganti@0: new_term = x_term['label'] + ':' + x_term['short_form'] kkonganti@0: return_list.append(Ontology_accession.make_instance(new_term)) kkonganti@0: return(return_list) kkonganti@0: kkonganti@0: def _add_edges(self, family_member, family_list, edge_set, round_num): kkonganti@0: '''Add edges to graph''' kkonganti@0: if edge_set == []: kkonganti@0: return(edge_set) kkonganti@0: elif round_num > 0: kkonganti@0: for x in family_list: kkonganti@0: x.get_family(family_member) kkonganti@0: if family_member == 'parents': # TODO: how get x.family_member to collapse code kkonganti@0: if x.parents == ['none found']: kkonganti@0: continue kkonganti@0: if len(x.parents) > 5: kkonganti@0: time.sleep(0.05) kkonganti@0: new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents] kkonganti@0: edge_set = edge_set + [z for z in new_edges if z not in edge_set] kkonganti@0: edge_set = x._add_edges(family_member, x.parents, edge_set, round_num-1) kkonganti@0: elif family_member == 'children': kkonganti@0: if x.children == ['none found']: kkonganti@0: continue kkonganti@0: if len(x.children) > 5: kkonganti@0: time.sleep(0.05) kkonganti@0: new_edges = [(x._graph_label(),y._graph_label()) for y in x.children] kkonganti@0: edge_set = edge_set + [z for z in new_edges if z not in edge_set] kkonganti@0: edge_set = x._add_edges(family_member, x.children, edge_set, round_num-1) kkonganti@0: return(edge_set) kkonganti@0: kkonganti@0: def _draw_graph(self, o_file, node_color, edge_color): kkonganti@0: '''Draw and save the graph''' kkonganti@0: ontol_graph = pgv.AGraph(name='ontology_graph') kkonganti@0: ontol_graph.add_node(self._graph_label()) kkonganti@0: for x in self.graph_nodes: kkonganti@0: ontol_graph.add_edge(x[0], x[1]) kkonganti@0: ontol_graph.node_attr.update(shape='box', kkonganti@0: style='rounded,filled', kkonganti@0: fillcolor='lightgrey', kkonganti@0: color=node_color) kkonganti@0: ontol_graph.edge_attr.update(shape='normal', kkonganti@0: color=edge_color, kkonganti@0: dir='back') kkonganti@0: ontol_graph.get_node(self._graph_label()).attr.update(fillcolor='lightblue') kkonganti@0: # TODO: determine best algorithm: neato, fdp, nop, twopi; tried circo; not dot, sfdp kkonganti@0: ontol_graph.draw(o_file, prog='twopi') kkonganti@0: kkonganti@0: def _expand_edge(self, family_member, family_list, edge_set, old_set='', stop_terms=False): kkonganti@0: '''Add edges to graph''' kkonganti@0: while old_set != edge_set: kkonganti@0: old_set = copy.deepcopy(edge_set) kkonganti@0: for x in family_list: kkonganti@0: if x == 'none found': kkonganti@0: break kkonganti@0: if type(stop_terms) == list: kkonganti@0: if x in stop_terms: kkonganti@0: break kkonganti@0: x.get_family(family_member) kkonganti@0: if family_member == 'parents': # TODO: how get x.family_member to collapse code kkonganti@0: if x.parents == ['none found']: kkonganti@0: continue kkonganti@0: if len(x.parents) > 5: kkonganti@0: time.sleep(0.05) kkonganti@0: new_edges = [(y._graph_label(),x._graph_label()) for y in x.parents] kkonganti@0: edge_set = edge_set + [z for z in new_edges if z not in edge_set] kkonganti@0: edge_set = x._expand_edge(family_member,x.parents,edge_set,old_set,stop_terms) kkonganti@0: elif family_member == 'children': kkonganti@0: if x.children == ['none found']: kkonganti@0: continue kkonganti@0: if len(x.children) > 5: kkonganti@0: time.sleep(0.05) kkonganti@0: new_edges = [(x._graph_label(),y._graph_label()) for y in x.children] kkonganti@0: edge_set = edge_set + [z for z in new_edges if z not in edge_set] kkonganti@0: edge_set = x._expand_edge(family_member,x.children,edge_set,old_set,stop_terms) kkonganti@0: return(edge_set) kkonganti@0: kkonganti@0: def _get_label(self): kkonganti@0: '''Retrieve definition is correct for an id; updates instance''' kkonganti@0: query_url = 'http://www.ebi.ac.uk/ols/api/terms?obo_id={}'.format(self.id) kkonganti@0: ols_resp = self._get_request(query_url) kkonganti@0: if ols_resp is None: kkonganti@0: logging.warning(f'Did not retrieve PURL for {self.id}') kkonganti@0: self.label = 'unk' kkonganti@0: return kkonganti@0: try: kkonganti@0: self.label = ols_resp.json()['_embedded']['terms'][0]['label'] kkonganti@0: except(KeyError): kkonganti@0: logging.warning(f'Did not find label for {self.id} in OLS') kkonganti@0: self.label = 'unk' kkonganti@0: except json.decoder.JSONDecodeError as err: kkonganti@0: time.sleep(0.05) kkonganti@0: self._get_label() kkonganti@0: kkonganti@0: def _get_request(self, request_url, max_retries=5): kkonganti@0: '''Retrieve URL''' kkonganti@0: while max_retries > 0: kkonganti@0: try: kkonganti@0: return(requests.get(request_url)) kkonganti@0: except: kkonganti@0: time.sleep(0.05) kkonganti@0: max_retries -= 1 kkonganti@0: return(None) kkonganti@0: kkonganti@0: def _graph_label(self): kkonganti@0: '''Format a graph label''' kkonganti@0: return(self.id+'\\n'+self.label) kkonganti@0: kkonganti@0: def _next_page(self, url_link, return_list): kkonganti@0: '''Get next page of search results''' kkonganti@0: next_resp = self._get_request(url_link) kkonganti@0: if next_resp is None: kkonganti@0: logging.warning(f'Did not retrieve URL for {url_link} during API search') kkonganti@0: return(False, return_list) kkonganti@0: else: kkonganti@0: try: kkonganti@0: next_link = next_resp.json()['_links']['next']['href'] kkonganti@0: except(KeyError): kkonganti@0: next_link = False kkonganti@0: return_list = self._api_results(next_resp.json()['_embedded']['terms'], return_list) kkonganti@0: return(next_link, return_list) kkonganti@0: kkonganti@0: def check_label(self): kkonganti@0: '''Check if given definition is correct for an id; returns Boolean or str `unk`''' kkonganti@0: self._get_label() kkonganti@0: if self.label != 'unk': kkonganti@0: return(ols_resp.json()['_embedded']['terms'][0]['label'] == self.label) kkonganti@0: else: kkonganti@0: return(self.label) kkonganti@0: kkonganti@0: def get_family(self, family_member): kkonganti@0: '''Returns list of parents, ancestors, children or descendants''' kkonganti@0: if family_member == 'parents' and self.parents != 'not assigned yet': kkonganti@0: return(self.parents) kkonganti@0: elif family_member == 'children' and self.children != 'not assigned yet': kkonganti@0: return(self.children) kkonganti@0: elif family_member == 'ancestors' and self.ancestors != 'not assigned yet': kkonganti@0: return(self.ancestors) kkonganti@0: elif family_member == 'descendants' and self.descendants != 'not assigned yet': kkonganti@0: return(self.descendants) kkonganti@0: kkonganti@0: if self.id.split(':')[0].lower() == 'gaz': kkonganti@0: query_url = 'https://www.ebi.ac.uk/ols/api/ontologies/gaz/terms?iri=' kkonganti@0: query_url += 'http://purl.obolibrary.org/obo/' + self.id.replace(':','_') kkonganti@0: ols_resp = self._get_request(query_url) kkonganti@0: qry_url = ols_resp.json()['_embedded']['terms'][0]['_links']\ kkonganti@0: ['hierarchical'+family_member.title()]['href'] kkonganti@0: else: kkonganti@0: query_url = 'http://www.ebi.ac.uk/ols/api/ontologies/{}/{}?id={}' kkonganti@0: qry_url = query_url.format(self.id.split(':')[0].lower(),family_member,self.id) kkonganti@0: kkonganti@0: ols_resp = self._get_request(qry_url) kkonganti@0: if ols_resp is None: kkonganti@0: logging.warning(f'Did not get URL for {url_link} during search for {family_member}') kkonganti@0: result_list = ['none found'] kkonganti@0: elif ols_resp.status_code > 200: kkonganti@0: result_list = ['none found'] kkonganti@0: elif ols_resp.json()['page']['totalElements'] > 0: kkonganti@0: result_list = self._api_results(ols_resp.json()['_embedded']['terms'], []) kkonganti@0: if ols_resp.json()['page']['totalPages'] > 1: kkonganti@0: next_url = ols_resp.json()['_links']['next']['href'] kkonganti@0: while next_url: kkonganti@0: next_url,result_list = self._next_page(next_url,result_list) kkonganti@0: else: kkonganti@0: result_list = ['none found'] kkonganti@0: kkonganti@0: if family_member == 'parents': kkonganti@0: self.parents = list(set(result_list)) kkonganti@0: elif family_member == 'children': kkonganti@0: self.children = list(set(result_list)) kkonganti@0: elif family_member == 'ancestors': kkonganti@0: self.ancestors = list(set(result_list)) kkonganti@0: elif family_member == 'descendants': kkonganti@0: self.descendants = list(set(result_list)) kkonganti@0: return(result_list) kkonganti@0: kkonganti@0: def bin_term(self, bin_package): kkonganti@0: '''Categorize term into given bins as Ontology_package''' kkonganti@0: term_bins = [] kkonganti@0: self.get_family('ancestors') kkonganti@0: if self.ancestors == ['none found']: kkonganti@0: ancestor_labels = [x.label + ':' + x.id.replace(':','_') for x in [self]] kkonganti@0: else: kkonganti@0: ancestor_labels = [x.label+':'+x.id.replace(':','_') for x in [self]+self.ancestors] kkonganti@0: return([x for x in ancestor_labels if x in bin_package.ontologies]) kkonganti@0: kkonganti@0: def visualize_term(self, o_file, node_color='black', edge_color='black', kkonganti@0: fill_out=False, stop_terms=False, draw_graph=True): kkonganti@0: '''Visualize one term''' kkonganti@0: if self.graph_nodes!='not assigned yet' and self.graph_fill==fill_out: kkonganti@0: if draw_graph: kkonganti@0: self._draw_graph(o_file, node_color, edge_color) kkonganti@0: else: kkonganti@0: self.get_family('parents') kkonganti@0: self.get_family('children') kkonganti@0: edge_set1,edge_set2 = [],[] kkonganti@0: if self.parents != ['none found']: kkonganti@0: edge_set1 = [(x._graph_label(),self._graph_label()) for x in self.parents] kkonganti@0: if self.children != ['none found']: kkonganti@0: edge_set2 = [(self._graph_label(),x._graph_label()) for x in self.children] kkonganti@0: if type(fill_out) == int: kkonganti@0: edge_set1 = self._add_edges('parents', self.parents, edge_set1, fill_out-1) kkonganti@0: edge_set2 = self._add_edges('children', self.children, edge_set2, fill_out-1) kkonganti@0: elif fill_out==True: kkonganti@0: edge_set1 = self._expand_edge('parents',self.parents,edge_set1,'',stop_terms) kkonganti@0: edge_set2 = self._expand_edge('children',self.children,edge_set2,'',stop_terms) kkonganti@0: self.graph_nodes = list(set(edge_set1+edge_set2)) kkonganti@0: if draw_graph: kkonganti@0: self._draw_graph(o_file, node_color, edge_color) kkonganti@0: kkonganti@0: kkonganti@0: class Ontology_package: kkonganti@0: '''Associate or package Ontology_accession objects together''' kkonganti@0: def __init__(self, package_label, ontol_list): kkonganti@0: self.label = package_label kkonganti@0: self.ontologies = ontol_list kkonganti@0: self.bins = [] kkonganti@0: self.lcp = 'not assigned yet' kkonganti@0: self.hcc = 'not assigned yet' kkonganti@0: self._lcp_state = (True,[]) kkonganti@0: self._hcc_state = (True,[]) kkonganti@0: self._bin_state = [] kkonganti@0: self.graph_nodes = 'not assigned yet' kkonganti@0: self.graph_state = False kkonganti@0: kkonganti@0: def _common_family(self,family_member,incl_terms,excl_terms): kkonganti@0: '''Find common family members''' kkonganti@0: family_candidates = {} kkonganti@0: for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]: kkonganti@0: family_candidates[ontol_term] = ontol_term.get_family(family_member) kkonganti@0: common_members = self._common_list(family_candidates, incl_terms) kkonganti@0: while common_members == []: kkonganti@0: for ontol_term in [x for x in self.ontologies if x.id not in excl_terms]: kkonganti@0: if len(self.ontologies) > 30: kkonganti@0: time.sleep(0.05) kkonganti@0: original_list = list(family_candidates[ontol_term]) kkonganti@0: for family_ontol in original_list: kkonganti@0: if len(original_list) > 30: kkonganti@0: time.sleep(0.05) kkonganti@0: try: kkonganti@0: family_candidates[ontol_term].extend(\ kkonganti@0: family_ontol.get_family(family_member)) kkonganti@0: except(AttributeError): kkonganti@0: family_candidates[ontol_term].extend(['none found']) kkonganti@0: return(common_members) kkonganti@0: kkonganti@0: def _common_list(self, input_dic, incl_terms): kkonganti@0: '''Compare input dictionary keys and list''' kkonganti@0: term_lists = [] kkonganti@0: for ontol_key in input_dic: kkonganti@0: append_list = [ontol_key] kkonganti@0: for ontol_val in input_dic[ontol_key]: kkonganti@0: append_list.append(ontol_val) kkonganti@0: term_lists.append(append_list) kkonganti@0: common_set = set.intersection(*map(set, term_lists)) kkonganti@0: if incl_terms: kkonganti@0: common_keys = [] kkonganti@0: for ontol_acc in common_set: kkonganti@0: if ontol_acc in input_dic.keys(): kkonganti@0: common_keys.append(ontol_acc) kkonganti@0: if common_keys != []: kkonganti@0: return(common_keys) kkonganti@0: return(list(common_set - set(input_dic.keys()))) kkonganti@0: kkonganti@0: def _draw_graph(self, o_file, node_color, edge_color, show_lcp, show_hcc): kkonganti@0: '''Draw and save graph''' kkonganti@0: ontol_graph = pgv.AGraph(name='ontology_graph') kkonganti@0: for x in self.ontologies: kkonganti@0: ontol_graph.add_node(x._graph_label()) kkonganti@0: for x in self.graph_nodes: kkonganti@0: ontol_graph.add_edge(x[0], x[1]) kkonganti@0: ontol_graph.node_attr.update(shape='box', style='rounded,filled', kkonganti@0: fillcolor='lightgrey', color=node_color) kkonganti@0: ontol_graph.edge_attr.update(shape='normal', color=edge_color, dir='back') kkonganti@0: if show_lcp: kkonganti@0: for x in self.lcp: kkonganti@0: ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige') kkonganti@0: if show_hcc: kkonganti@0: for x in self.hcc: kkonganti@0: ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='beige') kkonganti@0: for x in self.ontologies: kkonganti@0: ontol_graph.get_node(x._graph_label()).attr.update(fillcolor='lightblue') kkonganti@0: ontol_graph.draw(o_file,prog='dot') kkonganti@0: kkonganti@0: def _list_hierarchy(self, input_list, input_position): kkonganti@0: '''Get lowest or highest terms''' kkonganti@0: if input_list == ['none found']: kkonganti@0: return(input_list) kkonganti@0: family_lists = {} kkonganti@0: for input_term in input_list: kkonganti@0: if len(input_list) > 30: time.sleep(0.05) kkonganti@0: if input_position == 'lowest': kkonganti@0: if input_term == 'none found': kkonganti@0: family_list = 'none found' kkonganti@0: else: kkonganti@0: family_list = input_term.get_family('ancestors') kkonganti@0: elif input_position == 'highest': kkonganti@0: if input_term == 'none found': kkonganti@0: family_list = 'none found' kkonganti@0: else: kkonganti@0: family_list = input_term.get_family('descendants') kkonganti@0: family_lists[input_term] = family_list kkonganti@0: while True: kkonganti@0: remove_terms = [] kkonganti@0: for input_term in input_list: kkonganti@0: if [True for f_l in family_lists if input_term in family_lists[f_l]] != []: kkonganti@0: del family_lists[input_term] kkonganti@0: remove_terms.append(input_term) kkonganti@0: if remove_terms != []: kkonganti@0: for x_term in remove_terms: kkonganti@0: input_list.remove(x_term) kkonganti@0: else: kkonganti@0: break kkonganti@0: return(input_list) kkonganti@0: kkonganti@0: def _trim_tips(self): kkonganti@0: '''Remove descendants of self.ontologies and parents of self.lcp''' kkonganti@0: tip_nodes = [x._graph_label() for x in self.ontologies] +\ kkonganti@0: [x._graph_label() for x in self.lcp] kkonganti@0: old_nodes = [] kkonganti@0: while old_nodes != self.graph_nodes: kkonganti@0: old_nodes = self.graph_nodes kkonganti@0: right_nodes = set() kkonganti@0: left_nodes = set() kkonganti@0: for x in self.graph_nodes: kkonganti@0: left_nodes.add(x[0]) kkonganti@0: right_nodes.add(x[1]) kkonganti@0: top_nodes = [x for x in left_nodes.difference(right_nodes) if x not in tip_nodes] kkonganti@0: bot_nodes = [x for x in right_nodes.difference(left_nodes) if x not in tip_nodes] kkonganti@0: self.graph_nodes = [x for x in self.graph_nodes if x[0] not in top_nodes] kkonganti@0: self.graph_nodes = [x for x in self.graph_nodes if x[1] not in bot_nodes] kkonganti@0: kkonganti@0: def get_lcp(self, incl_terms=True, excl_terms=[]): # TODO: missing excl_terms kkonganti@0: '''Find lowest common parent(s); can include input terms as lcp, kkonganti@0: exclude terms by obo id; saves results in lcp attribute''' kkonganti@0: if self._lcp_state == (incl_terms, excl_terms): kkonganti@0: if self.lcp != 'not assigned yet': kkonganti@0: return kkonganti@0: common_members = self._common_family('parents',incl_terms, excl_terms) kkonganti@0: common_members = self._list_hierarchy(common_members, 'lowest') kkonganti@0: if common_members != []: kkonganti@0: self.lcp = common_members kkonganti@0: self._lcp_state = (incl_terms, excl_terms) kkonganti@0: kkonganti@0: def get_hcc(self, incl_terms=True, excl_terms=[]): kkonganti@0: '''Get highest common child(ren); can include input terms as hcc; kkonganti@0: exclude terms by obo id; saves results in hcc attribute''' kkonganti@0: if self._hcc_state == (incl_terms, excl_terms): kkonganti@0: if self.hcc != 'not assigned yet': kkonganti@0: return kkonganti@0: common_members = self._common_family('children', incl_terms, excl_terms) kkonganti@0: common_members = self._list_hierarchy(common_members, 'highest') kkonganti@0: if common_members != []: kkonganti@0: self.hcc = common_members kkonganti@0: self._hcc_state = (incl_terms, excl_terms) kkonganti@0: kkonganti@0: def set_lcp(self, lcp_acc, incl_terms=True, excl_terms=[]): kkonganti@0: self.lcp = lcp_acc kkonganti@0: self._lcp_state = (incl_terms, excl_terms) kkonganti@0: kkonganti@0: def set_hcc(self, hcc_acc, incl_terms=True, excl_terms=[]): kkonganti@0: self.hcc = hcc_acc kkonganti@0: self._hcc_state = (incl_terms, excl_terms) kkonganti@0: kkonganti@0: def bin_terms(self, bin_package): kkonganti@0: '''Categorize terms by those in Ontology_package; saves results in bins attribute''' kkonganti@0: if self._bin_state == bin_package: kkonganti@0: return kkonganti@0: package_bins = [] kkonganti@0: for x in self.ontologies: kkonganti@0: package_bins.extend(x.bin_term(bin_package)) kkonganti@0: self.bins = list(set(package_bins)) kkonganti@0: kkonganti@0: def visualize_terms(self, o_file, fill_out=False, show_lcp=False, show_hcc=False, kkonganti@0: node_color='black', edge_color='black', kkonganti@0: lcp_stop=False, hcc_stop=False, trim_nodes=False): kkonganti@0: '''Visualize terms''' kkonganti@0: if self.graph_nodes=='not assigned yet' or self.graph_fill!=fill_out: kkonganti@0: self.graph_nodes = [] kkonganti@0: for x in self.ontologies: kkonganti@0: if lcp_stop and not hcc_stop: kkonganti@0: if x in self.lcp: kkonganti@0: continue kkonganti@0: x.visualize_term(o_file, fill_out=fill_out, kkonganti@0: stop_terms=self.lcp, draw_graph=False) kkonganti@0: elif hcc_stop and not lcp_stop: kkonganti@0: if x in self.hcc: kkonganti@0: continue kkonganti@0: x.visualize_term(o_file, fill_out=fill_out, kkonganti@0: stop_terms=self.hcc, draw_graph=False) kkonganti@0: elif hcc_stop and lcp_stop: kkonganti@0: if x in self.lcp+self.hcc: kkonganti@0: continue kkonganti@0: x.visualize_term(o_file, fill_out=fill_out, kkonganti@0: stop_terms=self.lcp+self.hcc, draw_graph=False) kkonganti@0: else: kkonganti@0: x.visualize_term(o_file, fill_out=fill_out, draw_graph=False) kkonganti@0: self.graph_nodes.extend([z for z in x.graph_nodes if z not in self.graph_nodes]) kkonganti@0: if trim_nodes: kkonganti@0: self._trim_tips() kkonganti@0: if len(self.graph_nodes) > 150: kkonganti@0: edge_string = 'Parent node\tChild node' kkonganti@0: for edge_tuple in self.graph_nodes: kkonganti@0: edge_string += '\n'+'\t'.join(edge_tuple) kkonganti@0: logging.info(f'Not drawing graph with {len(self.graph_nodes)} edges:\ kkonganti@0: \n\n{edge_string}\n') kkonganti@0: else: kkonganti@0: self._draw_graph(o_file,node_color,edge_color,show_lcp,show_hcc)