Mercurial > repos > jpayne > cfsan_qaqc
comparison qa_core/gims_qa_plot.py @ 0:dafec28bd37e
planemo upload
author | jpayne |
---|---|
date | Tue, 08 May 2018 18:54:20 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dafec28bd37e |
---|---|
1 import subprocess | |
2 from qa_core import new_output_file, qa_params | |
3 from collections import defaultdict | |
4 import warnings | |
5 warnings.filterwarnings("ignore") | |
6 import pandas | |
7 from pandas import DataFrame | |
8 import matplotlib.pyplot as plot | |
9 from numpy import zeros, add, mean, array, percentile, arange | |
10 import traceback | |
11 from gims_qa_tests import cluster | |
12 import logging | |
13 | |
14 bases = ("A", "T", "G", "C", "N") | |
15 | |
16 def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs): | |
17 #log.debug(df) | |
18 #log.debug(annotation) | |
19 plot_quality_histo(df['qualityHisto'], annotation, **kwargs) | |
20 plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation) | |
21 plot_quality_distribution_histo(df['baseQScoreHisto'], annotation) | |
22 plot_base_balance(df, annotation, **kwargs) | |
23 #plot.show() | |
24 | |
25 def plot_base_balance(df, | |
26 annotation=qa_params.get('annotation', ''), | |
27 threshold=qa_params.get('permitted base balance variance', .2), | |
28 **kwargs): | |
29 try: | |
30 plot.figure() | |
31 plot.axis((0,2,0,2)) | |
32 plot.suptitle("{} Base Balance".format(annotation)) | |
33 plot.xlabel("G/C") | |
34 plot.ylabel("A/T") | |
35 thres_min = 1 - threshold | |
36 thres_max = 1 + threshold | |
37 for coord in (thres_min, thres_max): | |
38 for linef in (plot.axvline, plot.axhline): | |
39 linef(coord, linestyle='dashed', label=str(coord)) | |
40 | |
41 plot.annotate(thres_max, xy=(0, thres_max)) | |
42 plot.annotate(thres_min, xy=(0, thres_min)) | |
43 plot.annotate(thres_max, xy=(thres_max, 0)) | |
44 plot.annotate(thres_min, xy=(thres_min, 0)) | |
45 | |
46 a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')] | |
47 y, x = a / float(t), g / float(c) | |
48 plot.plot((x,), (y,), marker="+") | |
49 plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data') | |
50 with new_output_file("base_balance.png") as base_bal: | |
51 plot.savefig(base_bal, format='png', dpi=92) | |
52 except Exception: | |
53 traceback.print_exc() | |
54 | |
55 | |
56 def plot_quality_histo(histo, | |
57 annotation=qa_params.get('annotation', ''), | |
58 q=qa_params.get('user Q threshold', 27)): | |
59 try: | |
60 plot.figure() | |
61 labels = range(max(histo.keys()) + 1) | |
62 plot.bar(left=labels, | |
63 height=[histo[n] for n in labels], | |
64 color='y', | |
65 linewidth=1) | |
66 # plot.hist(histo, bins=range(max(histo))) | |
67 plot.xticks() | |
68 plot.axvline(x=q, label="Q {}".format(q)) | |
69 #plot.show() | |
70 plot.xlabel(r"""$Q (-10 log_(10) \rho)$""") | |
71 plot.ylabel('Number of Bases') | |
72 plot.suptitle('{} Q-Score Histogram'.format(annotation)) | |
73 plot.legend(loc='upper left', fontsize='x-small') | |
74 with new_output_file("qscore_histogram.png") as qual_histogram: | |
75 plot.savefig(qual_histogram, format='png', dpi=92) | |
76 # except ValueError: | |
77 # pass | |
78 except Exception: | |
79 traceback.print_exc() | |
80 | |
81 def plot_nucleotide_distribution_histo(dataframe, | |
82 annotation=qa_params.get('annotation', '')): | |
83 try: | |
84 #dataframe = defaultdict(list) | |
85 colors = iter(('r', 'y', 'g', 'b', 'pink')) | |
86 plot.figure() | |
87 last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row | |
88 for base in bases: | |
89 try: | |
90 plot.bar(left=range(len(dataframe[base])), | |
91 height=dataframe[base], | |
92 width=2, | |
93 bottom=last_base, | |
94 color=colors.next(), | |
95 label=base, | |
96 linewidth=0) | |
97 last_base = add(last_base, dataframe[base]) | |
98 except: | |
99 #print [len(d) for d in dataframe.data.values()] | |
100 raise | |
101 plot.autoscale(tight=True) | |
102 plot.xlabel('Base Position') | |
103 plot.ylabel('Nucleotide Count') | |
104 plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation)) | |
105 plot.legend(loc='upper right', fontsize='xx-small') | |
106 with new_output_file("nucleotide_distribution.png") as nuc_histogram: | |
107 plot.savefig(nuc_histogram, format='png', dpi=108) | |
108 except Exception: | |
109 traceback.print_exc() | |
110 | |
111 def plot_quality_distribution_histo(histo, | |
112 annotation=qa_params.get('annotation', ''), | |
113 q=qa_params.get('user Q threshold', 27) | |
114 ): | |
115 try: | |
116 binning = 20 | |
117 plot.figure(figsize=(12, 6)) | |
118 vec = [tuple(c.elements()) for c in histo] | |
119 means = [mean(t) for t in vec] | |
120 upper_iqr = array([percentile(t, 75) for t in vec]) | |
121 lower_iqr = array([percentile(t, 25) for t in vec]) | |
122 # plot.boxplot(vec, | |
123 # widths=.000005, | |
124 # whis=(5,95), | |
125 # showfliers=False, | |
126 # showbox=True, | |
127 # showmeans=True, | |
128 # meanline=True, | |
129 # showcaps=True, | |
130 # whiskerprops=dict(linestyle='-', color='0.60') | |
131 # ) | |
132 | |
133 plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range') | |
134 plot.plot(means, 'r-', label='Mean') | |
135 plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25) | |
136 plot.autoscale(tight=True) | |
137 plot.axhline(y=q, label="Q {}".format(q)) | |
138 plot.xlabel('Base Position') | |
139 plot.ylabel(r"""$Q (-10 log_(10) \rho)$""") | |
140 plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical') | |
141 plot.suptitle('{} Q-Score Distribution Plot'.format(annotation)) | |
142 plot.legend(loc='lower left', fontsize='x-small') | |
143 plot.grid(True) | |
144 with new_output_file("qscore_distribution.png") as q_distro: | |
145 plot.savefig(q_distro, format='png', dpi=108) | |
146 | |
147 except Exception: | |
148 traceback.print_exc() |