cstrittmatter@0: """Reports and visualizes results""" cstrittmatter@0: cstrittmatter@0: import logging, os, pandas, re, shutil, time cstrittmatter@0: import matplotlib.pyplot as plt cstrittmatter@0: import seaborn as sns cstrittmatter@0: import lexmapr.ontology_reasoner as ontr cstrittmatter@0: cstrittmatter@0: logging.getLogger('matplotlib').setLevel(logging.WARNING) cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def _split_results(pandas_series, x_col, y_col, split_delim=True): cstrittmatter@0: '''Format a value count series to a dataframe, spliting |-delimited terms''' cstrittmatter@0: graph_dic = {} cstrittmatter@0: for x in pandas_series.items(): cstrittmatter@0: for y in x[0].split('|'): cstrittmatter@0: try: cstrittmatter@0: graph_dic[y] += x[1] cstrittmatter@0: except(KeyError): cstrittmatter@0: graph_dic[y] = x[1] cstrittmatter@0: if split_delim: cstrittmatter@0: graph_pd=pandas.DataFrame({x_col:[':'.join(x.split(':')[:-1]) for x in graph_dic.keys()], cstrittmatter@0: y_col:list(graph_dic.values())}) cstrittmatter@0: else: cstrittmatter@0: graph_pd=pandas.DataFrame({x_col:list(graph_dic.keys()), cstrittmatter@0: y_col:list(graph_dic.values())}) cstrittmatter@0: return(graph_pd) cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def _get_ontols(map_res, match_col, bin_col): cstrittmatter@0: '''Make instances of Ontology_accessions and group as relevant''' cstrittmatter@0: red_res = map_res[map_res[bin_col].notna()] cstrittmatter@0: mapped_terms = _split_results(red_res[match_col].value_counts(), 'x', 'y', split_delim=False) cstrittmatter@0: mapped_bins = _split_results(red_res[bin_col].value_counts(), 'x', 'y', split_delim=False) cstrittmatter@0: ontol_sets = {} cstrittmatter@0: lcp_set = set() cstrittmatter@0: term_set = set() cstrittmatter@0: for y in list(mapped_bins['x']): cstrittmatter@0: ontol_sets[ontr.Ontology_accession.make_instance(y)] = set() cstrittmatter@0: time.sleep(0.05) cstrittmatter@0: for x in list(mapped_terms['x']): cstrittmatter@0: if x == 'No Match': cstrittmatter@0: continue cstrittmatter@0: term_ontol = ontr.Ontology_accession.make_instance(x) cstrittmatter@0: if term_ontol.ancestors == 'not assigned yet': cstrittmatter@0: term_ontol.get_family('ancestors') cstrittmatter@0: time.sleep(0.05) cstrittmatter@0: if term_ontol.ancestors == ['none found']: cstrittmatter@0: continue cstrittmatter@0: for y in ontol_sets: cstrittmatter@0: if y in term_ontol.ancestors: cstrittmatter@0: ontol_sets[y].add(term_ontol) cstrittmatter@0: for y in ontol_sets: cstrittmatter@0: if ontol_sets[y] != set(): cstrittmatter@0: lcp_set.add(y) cstrittmatter@0: term_set = term_set | ontol_sets[y] cstrittmatter@0: if len(term_set) > 100: cstrittmatter@0: term_list = [x.id for x in list(term_set)] cstrittmatter@0: terms_string = '' cstrittmatter@0: for a,b,c,d in zip(term_list[::4],term_list[1::4],term_list[2::4],term_list[3::4]): cstrittmatter@0: terms_string += f'\n\t\t{a}\t{b}\t{c}\t{d}' cstrittmatter@0: logging.info(f'Not drawing {bin_col} graph with {len(term_list)} child nodes:\n\ cstrittmatter@0: {terms_string}\n') cstrittmatter@0: return([],[]) cstrittmatter@0: return(list(lcp_set), list(term_set)) cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def report_results(out_file, arg_bins): cstrittmatter@0: '''Print mapping counts to log''' cstrittmatter@0: mapping_results = pandas.read_csv(out_file, header=0, delimiter='\t') cstrittmatter@0: match_status = mapping_results['Match_Status (Macro Level)'].value_counts() cstrittmatter@0: logging.info(f'\t\tNo. unique terms: '+str(len(mapping_results['Sample_Desc']))) cstrittmatter@0: for x in match_status.items(): cstrittmatter@0: logging.info(f'\t\tNo. {x[0]}: {x[1]}') cstrittmatter@0: for x in arg_bins: cstrittmatter@0: logging.info(f'\t\tNo. mapped under {x}: {mapping_results[x].count()}') cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def report_cache(term_cache): cstrittmatter@0: # TODO: add counts for bins? cstrittmatter@0: '''Print mapping counts to log from cache, only count unique terms''' cstrittmatter@0: logging.info(f'\t\tNo. unique terms: {len(term_cache)-1}') cstrittmatter@0: no_match = 0 cstrittmatter@0: full_match = 0 cstrittmatter@0: syno_match = 0 cstrittmatter@0: comp_match = 0 cstrittmatter@0: for x in term_cache: cstrittmatter@0: if re.search('No Match', term_cache[x]): cstrittmatter@0: no_match += 1 cstrittmatter@0: if re.search('Full Term Match', term_cache[x]): cstrittmatter@0: full_match += 1 cstrittmatter@0: if re.search('Synonym Match', term_cache[x]): cstrittmatter@0: syno_match += 1 cstrittmatter@0: if re.search('Component Match', term_cache[x]): cstrittmatter@0: comp_match += 1 cstrittmatter@0: logging.info(f'\t\tNo. Unique Full Term Match: {full_match}') cstrittmatter@0: logging.info(f'\t\tNo. Unique Synonym Match: {syno_match}') cstrittmatter@0: logging.info(f'\t\tNo. Unique Component Match: {comp_match}') cstrittmatter@0: logging.info(f'\t\tNo. Unique No Match: {no_match}') cstrittmatter@0: return({'No Match':no_match, 'Full Term Match':full_match, cstrittmatter@0: 'Synonym Match':syno_match, 'Component Match':comp_match}) cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def figure_folder(): cstrittmatter@0: '''Prepare figures folder''' cstrittmatter@0: try: cstrittmatter@0: shutil.rmtree('lexmapr_figures/') cstrittmatter@0: except(FileNotFoundError): cstrittmatter@0: pass cstrittmatter@0: os.mkdir('lexmapr_figures/') cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def visualize_cache(match_counts): cstrittmatter@0: '''Generate graph''' cstrittmatter@0: # TODO: add graphing for bins? cstrittmatter@0: x_col = 'Match status' cstrittmatter@0: y_col = 'No. samples matched' cstrittmatter@0: sns_fig = sns.barplot(x=list(match_counts.keys()), cstrittmatter@0: y=list(match_counts.values()), ci=None).get_figure() cstrittmatter@0: plt.xticks(rotation=90) cstrittmatter@0: plt.tight_layout() cstrittmatter@0: sns_fig.savefig('lexmapr_figures/mapping_results.png') cstrittmatter@0: logging.info(f'Did not attempt to make bin graphs') cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def visualize_results(out_file, arg_bins): cstrittmatter@0: '''Generate graphs''' cstrittmatter@0: map_res = pandas.read_csv(out_file,delimiter='\t') cstrittmatter@0: x_col = 'Match status' cstrittmatter@0: y_col = 'No. samples matched' cstrittmatter@0: match_status = map_res['Match_Status (Macro Level)'].value_counts() cstrittmatter@0: match_res = _split_results(match_status, x_col, y_col, False) cstrittmatter@0: match_res = match_res.sort_values(y_col,ascending=False) cstrittmatter@0: sns_fig = sns.barplot(x=x_col, y=y_col, data=match_res, ci=None).get_figure() cstrittmatter@0: plt.xticks(rotation=90) cstrittmatter@0: plt.tight_layout() cstrittmatter@0: sns_fig.savefig('lexmapr_figures/mapping_results.png') cstrittmatter@0: cstrittmatter@0: if map_res.shape[0] >= 1000: cstrittmatter@0: logging.info(f'Did not attempt to make bin because too many rows') cstrittmatter@0: return cstrittmatter@0: cstrittmatter@0: if arg_bins != []: cstrittmatter@0: x_col = 'Bin' cstrittmatter@0: bin_counts = {} cstrittmatter@0: for x in arg_bins: cstrittmatter@0: bin_counts[x] = sum(map_res[x].value_counts()) cstrittmatter@0: bin_res = _split_results(map_res[x].value_counts(), x_col, y_col) cstrittmatter@0: if not bin_res.empty: cstrittmatter@0: bin_res = bin_res.sort_values(y_col,ascending=False) cstrittmatter@0: plt.clf() cstrittmatter@0: sns_fig = sns.barplot(x=x_col, y=y_col, data=bin_res, ci=None).get_figure() cstrittmatter@0: plt.xticks(rotation=90) cstrittmatter@0: plt.tight_layout() cstrittmatter@0: plt.savefig(f'lexmapr_figures/{x}_binning.png') cstrittmatter@0: cstrittmatter@0: plt.clf() cstrittmatter@0: bin_pd = pandas.DataFrame({x_col:list(bin_counts.keys()), cstrittmatter@0: y_col:list(bin_counts.values())}) cstrittmatter@0: bin_pd = bin_pd.sort_values(y_col,ascending=False) cstrittmatter@0: sns_fig = sns.barplot(x=x_col, y=y_col, data=bin_pd, ci=None).get_figure() cstrittmatter@0: plt.xticks(rotation=90) cstrittmatter@0: plt.tight_layout() cstrittmatter@0: sns_fig.savefig('lexmapr_figures/binning_results.png') cstrittmatter@0: cstrittmatter@0: # TODO: make node colors vary with frequency and color ones that are both top and bottom? cstrittmatter@0: for x in arg_bins: cstrittmatter@0: print(f'\tMight generate {x} ontology graph...'.ljust(80),end='\r') cstrittmatter@0: lcp_list, term_list = _get_ontols(map_res, 'Matched_Components', x) cstrittmatter@0: if lcp_list != [] and term_list != []: cstrittmatter@0: bin_package = ontr.Ontology_package('.', list(term_list)) cstrittmatter@0: bin_package.set_lcp(lcp_list) cstrittmatter@0: bin_package.visualize_terms(f'lexmapr_figures/{x}_terms.png', cstrittmatter@0: show_lcp=True, fill_out=True, trim_nodes=True)