kkonganti@0
|
1 """Reports and visualizes results"""
|
kkonganti@0
|
2
|
kkonganti@0
|
3 import logging, os, pandas, re, shutil, time
|
kkonganti@0
|
4 import matplotlib.pyplot as plt
|
kkonganti@0
|
5 import seaborn as sns
|
kkonganti@0
|
6 import lexmapr.ontology_reasoner as ontr
|
kkonganti@0
|
7
|
kkonganti@0
|
8 logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
kkonganti@0
|
9
|
kkonganti@0
|
10
|
kkonganti@0
|
11 def _split_results(pandas_series, x_col, y_col, split_delim=True):
|
kkonganti@0
|
12 '''Format a value count series to a dataframe, spliting |-delimited terms'''
|
kkonganti@0
|
13 graph_dic = {}
|
kkonganti@0
|
14 for x in pandas_series.items():
|
kkonganti@0
|
15 for y in x[0].split('|'):
|
kkonganti@0
|
16 try:
|
kkonganti@0
|
17 graph_dic[y] += x[1]
|
kkonganti@0
|
18 except(KeyError):
|
kkonganti@0
|
19 graph_dic[y] = x[1]
|
kkonganti@0
|
20 if split_delim:
|
kkonganti@0
|
21 graph_pd=pandas.DataFrame({x_col:[':'.join(x.split(':')[:-1]) for x in graph_dic.keys()],
|
kkonganti@0
|
22 y_col:list(graph_dic.values())})
|
kkonganti@0
|
23 else:
|
kkonganti@0
|
24 graph_pd=pandas.DataFrame({x_col:list(graph_dic.keys()),
|
kkonganti@0
|
25 y_col:list(graph_dic.values())})
|
kkonganti@0
|
26 return(graph_pd)
|
kkonganti@0
|
27
|
kkonganti@0
|
28
|
kkonganti@0
|
29 def _get_ontols(map_res, match_col, bin_col):
|
kkonganti@0
|
30 '''Make instances of Ontology_accessions and group as relevant'''
|
kkonganti@0
|
31 red_res = map_res[map_res[bin_col].notna()]
|
kkonganti@0
|
32 mapped_terms = _split_results(red_res[match_col].value_counts(), 'x', 'y', split_delim=False)
|
kkonganti@0
|
33 mapped_bins = _split_results(red_res[bin_col].value_counts(), 'x', 'y', split_delim=False)
|
kkonganti@0
|
34 ontol_sets = {}
|
kkonganti@0
|
35 lcp_set = set()
|
kkonganti@0
|
36 term_set = set()
|
kkonganti@0
|
37 for y in list(mapped_bins['x']):
|
kkonganti@0
|
38 ontol_sets[ontr.Ontology_accession.make_instance(y)] = set()
|
kkonganti@0
|
39 time.sleep(0.05)
|
kkonganti@0
|
40 for x in list(mapped_terms['x']):
|
kkonganti@0
|
41 if x == 'No Match':
|
kkonganti@0
|
42 continue
|
kkonganti@0
|
43 term_ontol = ontr.Ontology_accession.make_instance(x)
|
kkonganti@0
|
44 if term_ontol.ancestors == 'not assigned yet':
|
kkonganti@0
|
45 term_ontol.get_family('ancestors')
|
kkonganti@0
|
46 time.sleep(0.05)
|
kkonganti@0
|
47 if term_ontol.ancestors == ['none found']:
|
kkonganti@0
|
48 continue
|
kkonganti@0
|
49 for y in ontol_sets:
|
kkonganti@0
|
50 if y in term_ontol.ancestors:
|
kkonganti@0
|
51 ontol_sets[y].add(term_ontol)
|
kkonganti@0
|
52 for y in ontol_sets:
|
kkonganti@0
|
53 if ontol_sets[y] != set():
|
kkonganti@0
|
54 lcp_set.add(y)
|
kkonganti@0
|
55 term_set = term_set | ontol_sets[y]
|
kkonganti@0
|
56 if len(term_set) > 100:
|
kkonganti@0
|
57 term_list = [x.id for x in list(term_set)]
|
kkonganti@0
|
58 terms_string = ''
|
kkonganti@0
|
59 for a,b,c,d in zip(term_list[::4],term_list[1::4],term_list[2::4],term_list[3::4]):
|
kkonganti@0
|
60 terms_string += f'\n\t\t{a}\t{b}\t{c}\t{d}'
|
kkonganti@0
|
61 logging.info(f'Not drawing {bin_col} graph with {len(term_list)} child nodes:\n\
|
kkonganti@0
|
62 {terms_string}\n')
|
kkonganti@0
|
63 return([],[])
|
kkonganti@0
|
64 return(list(lcp_set), list(term_set))
|
kkonganti@0
|
65
|
kkonganti@0
|
66
|
kkonganti@0
|
67 def report_results(out_file, arg_bins):
|
kkonganti@0
|
68 '''Print mapping counts to log'''
|
kkonganti@0
|
69 mapping_results = pandas.read_csv(out_file, header=0, delimiter='\t')
|
kkonganti@0
|
70 match_status = mapping_results['Match_Status (Macro Level)'].value_counts()
|
kkonganti@0
|
71 logging.info(f'\t\tNo. unique terms: '+str(len(mapping_results['Sample_Desc'])))
|
kkonganti@0
|
72 for x in match_status.items():
|
kkonganti@0
|
73 logging.info(f'\t\tNo. {x[0]}: {x[1]}')
|
kkonganti@0
|
74 for x in arg_bins:
|
kkonganti@0
|
75 logging.info(f'\t\tNo. mapped under {x}: {mapping_results[x].count()}')
|
kkonganti@0
|
76
|
kkonganti@0
|
77
|
kkonganti@0
|
78 def report_cache(term_cache):
|
kkonganti@0
|
79 # TODO: add counts for bins?
|
kkonganti@0
|
80 '''Print mapping counts to log from cache, only count unique terms'''
|
kkonganti@0
|
81 logging.info(f'\t\tNo. unique terms: {len(term_cache)-1}')
|
kkonganti@0
|
82 no_match = 0
|
kkonganti@0
|
83 full_match = 0
|
kkonganti@0
|
84 syno_match = 0
|
kkonganti@0
|
85 comp_match = 0
|
kkonganti@0
|
86 for x in term_cache:
|
kkonganti@0
|
87 if re.search('No Match', term_cache[x]):
|
kkonganti@0
|
88 no_match += 1
|
kkonganti@0
|
89 if re.search('Full Term Match', term_cache[x]):
|
kkonganti@0
|
90 full_match += 1
|
kkonganti@0
|
91 if re.search('Synonym Match', term_cache[x]):
|
kkonganti@0
|
92 syno_match += 1
|
kkonganti@0
|
93 if re.search('Component Match', term_cache[x]):
|
kkonganti@0
|
94 comp_match += 1
|
kkonganti@0
|
95 logging.info(f'\t\tNo. Unique Full Term Match: {full_match}')
|
kkonganti@0
|
96 logging.info(f'\t\tNo. Unique Synonym Match: {syno_match}')
|
kkonganti@0
|
97 logging.info(f'\t\tNo. Unique Component Match: {comp_match}')
|
kkonganti@0
|
98 logging.info(f'\t\tNo. Unique No Match: {no_match}')
|
kkonganti@0
|
99 return({'No Match':no_match, 'Full Term Match':full_match,
|
kkonganti@0
|
100 'Synonym Match':syno_match, 'Component Match':comp_match})
|
kkonganti@0
|
101
|
kkonganti@0
|
102
|
kkonganti@0
|
103 def figure_folder():
|
kkonganti@0
|
104 '''Prepare figures folder'''
|
kkonganti@0
|
105 try:
|
kkonganti@0
|
106 shutil.rmtree('lexmapr_figures/')
|
kkonganti@0
|
107 except(FileNotFoundError):
|
kkonganti@0
|
108 pass
|
kkonganti@0
|
109 os.mkdir('lexmapr_figures/')
|
kkonganti@0
|
110
|
kkonganti@0
|
111
|
kkonganti@0
|
112 def visualize_cache(match_counts):
|
kkonganti@0
|
113 '''Generate graph'''
|
kkonganti@0
|
114 # TODO: add graphing for bins?
|
kkonganti@0
|
115 x_col = 'Match status'
|
kkonganti@0
|
116 y_col = 'No. samples matched'
|
kkonganti@0
|
117 sns_fig = sns.barplot(x=list(match_counts.keys()),
|
kkonganti@0
|
118 y=list(match_counts.values()), ci=None).get_figure()
|
kkonganti@0
|
119 plt.xticks(rotation=90)
|
kkonganti@0
|
120 plt.tight_layout()
|
kkonganti@0
|
121 sns_fig.savefig('lexmapr_figures/mapping_results.png')
|
kkonganti@0
|
122 logging.info(f'Did not attempt to make bin graphs')
|
kkonganti@0
|
123
|
kkonganti@0
|
124
|
kkonganti@0
|
125 def visualize_results(out_file, arg_bins):
|
kkonganti@0
|
126 '''Generate graphs'''
|
kkonganti@0
|
127 map_res = pandas.read_csv(out_file,delimiter='\t')
|
kkonganti@0
|
128 x_col = 'Match status'
|
kkonganti@0
|
129 y_col = 'No. samples matched'
|
kkonganti@0
|
130 match_status = map_res['Match_Status (Macro Level)'].value_counts()
|
kkonganti@0
|
131 match_res = _split_results(match_status, x_col, y_col, False)
|
kkonganti@0
|
132 match_res = match_res.sort_values(y_col,ascending=False)
|
kkonganti@0
|
133 sns_fig = sns.barplot(x=x_col, y=y_col, data=match_res, ci=None).get_figure()
|
kkonganti@0
|
134 plt.xticks(rotation=90)
|
kkonganti@0
|
135 plt.tight_layout()
|
kkonganti@0
|
136 sns_fig.savefig('lexmapr_figures/mapping_results.png')
|
kkonganti@0
|
137
|
kkonganti@0
|
138 if map_res.shape[0] >= 1000:
|
kkonganti@0
|
139 logging.info(f'Did not attempt to make bin because too many rows')
|
kkonganti@0
|
140 return
|
kkonganti@0
|
141
|
kkonganti@0
|
142 if arg_bins != []:
|
kkonganti@0
|
143 x_col = 'Bin'
|
kkonganti@0
|
144 bin_counts = {}
|
kkonganti@0
|
145 for x in arg_bins:
|
kkonganti@0
|
146 bin_counts[x] = sum(map_res[x].value_counts())
|
kkonganti@0
|
147 bin_res = _split_results(map_res[x].value_counts(), x_col, y_col)
|
kkonganti@0
|
148 if not bin_res.empty:
|
kkonganti@0
|
149 bin_res = bin_res.sort_values(y_col,ascending=False)
|
kkonganti@0
|
150 plt.clf()
|
kkonganti@0
|
151 sns_fig = sns.barplot(x=x_col, y=y_col, data=bin_res, ci=None).get_figure()
|
kkonganti@0
|
152 plt.xticks(rotation=90)
|
kkonganti@0
|
153 plt.tight_layout()
|
kkonganti@0
|
154 plt.savefig(f'lexmapr_figures/{x}_binning.png')
|
kkonganti@0
|
155
|
kkonganti@0
|
156 plt.clf()
|
kkonganti@0
|
157 bin_pd = pandas.DataFrame({x_col:list(bin_counts.keys()),
|
kkonganti@0
|
158 y_col:list(bin_counts.values())})
|
kkonganti@0
|
159 bin_pd = bin_pd.sort_values(y_col,ascending=False)
|
kkonganti@0
|
160 sns_fig = sns.barplot(x=x_col, y=y_col, data=bin_pd, ci=None).get_figure()
|
kkonganti@0
|
161 plt.xticks(rotation=90)
|
kkonganti@0
|
162 plt.tight_layout()
|
kkonganti@0
|
163 sns_fig.savefig('lexmapr_figures/binning_results.png')
|
kkonganti@0
|
164
|
kkonganti@0
|
165 # TODO: make node colors vary with frequency and color ones that are both top and bottom?
|
kkonganti@0
|
166 for x in arg_bins:
|
kkonganti@0
|
167 print(f'\tMight generate {x} ontology graph...'.ljust(80),end='\r')
|
kkonganti@0
|
168 lcp_list, term_list = _get_ontols(map_res, 'Matched_Components', x)
|
kkonganti@0
|
169 if lcp_list != [] and term_list != []:
|
kkonganti@0
|
170 bin_package = ontr.Ontology_package('.', list(term_list))
|
kkonganti@0
|
171 bin_package.set_lcp(lcp_list)
|
kkonganti@0
|
172 bin_package.visualize_terms(f'lexmapr_figures/{x}_terms.png',
|
kkonganti@0
|
173 show_lcp=True, fill_out=True, trim_nodes=True)
|