comparison qa_core/gims_qa_plot.py @ 0:dafec28bd37e

planemo upload
author jpayne
date Tue, 08 May 2018 18:54:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dafec28bd37e
1 import subprocess
2 from qa_core import new_output_file, qa_params
3 from collections import defaultdict
4 import warnings
5 warnings.filterwarnings("ignore")
6 import pandas
7 from pandas import DataFrame
8 import matplotlib.pyplot as plot
9 from numpy import zeros, add, mean, array, percentile, arange
10 import traceback
11 from gims_qa_tests import cluster
12 import logging
13
14 bases = ("A", "T", "G", "C", "N")
15
16 def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs):
17 #log.debug(df)
18 #log.debug(annotation)
19 plot_quality_histo(df['qualityHisto'], annotation, **kwargs)
20 plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation)
21 plot_quality_distribution_histo(df['baseQScoreHisto'], annotation)
22 plot_base_balance(df, annotation, **kwargs)
23 #plot.show()
24
25 def plot_base_balance(df,
26 annotation=qa_params.get('annotation', ''),
27 threshold=qa_params.get('permitted base balance variance', .2),
28 **kwargs):
29 try:
30 plot.figure()
31 plot.axis((0,2,0,2))
32 plot.suptitle("{} Base Balance".format(annotation))
33 plot.xlabel("G/C")
34 plot.ylabel("A/T")
35 thres_min = 1 - threshold
36 thres_max = 1 + threshold
37 for coord in (thres_min, thres_max):
38 for linef in (plot.axvline, plot.axhline):
39 linef(coord, linestyle='dashed', label=str(coord))
40
41 plot.annotate(thres_max, xy=(0, thres_max))
42 plot.annotate(thres_min, xy=(0, thres_min))
43 plot.annotate(thres_max, xy=(thres_max, 0))
44 plot.annotate(thres_min, xy=(thres_min, 0))
45
46 a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')]
47 y, x = a / float(t), g / float(c)
48 plot.plot((x,), (y,), marker="+")
49 plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data')
50 with new_output_file("base_balance.png") as base_bal:
51 plot.savefig(base_bal, format='png', dpi=92)
52 except Exception:
53 traceback.print_exc()
54
55
56 def plot_quality_histo(histo,
57 annotation=qa_params.get('annotation', ''),
58 q=qa_params.get('user Q threshold', 27)):
59 try:
60 plot.figure()
61 labels = range(max(histo.keys()) + 1)
62 plot.bar(left=labels,
63 height=[histo[n] for n in labels],
64 color='y',
65 linewidth=1)
66 # plot.hist(histo, bins=range(max(histo)))
67 plot.xticks()
68 plot.axvline(x=q, label="Q {}".format(q))
69 #plot.show()
70 plot.xlabel(r"""$Q (-10 log_(10) \rho)$""")
71 plot.ylabel('Number of Bases')
72 plot.suptitle('{} Q-Score Histogram'.format(annotation))
73 plot.legend(loc='upper left', fontsize='x-small')
74 with new_output_file("qscore_histogram.png") as qual_histogram:
75 plot.savefig(qual_histogram, format='png', dpi=92)
76 # except ValueError:
77 # pass
78 except Exception:
79 traceback.print_exc()
80
81 def plot_nucleotide_distribution_histo(dataframe,
82 annotation=qa_params.get('annotation', '')):
83 try:
84 #dataframe = defaultdict(list)
85 colors = iter(('r', 'y', 'g', 'b', 'pink'))
86 plot.figure()
87 last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row
88 for base in bases:
89 try:
90 plot.bar(left=range(len(dataframe[base])),
91 height=dataframe[base],
92 width=2,
93 bottom=last_base,
94 color=colors.next(),
95 label=base,
96 linewidth=0)
97 last_base = add(last_base, dataframe[base])
98 except:
99 #print [len(d) for d in dataframe.data.values()]
100 raise
101 plot.autoscale(tight=True)
102 plot.xlabel('Base Position')
103 plot.ylabel('Nucleotide Count')
104 plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation))
105 plot.legend(loc='upper right', fontsize='xx-small')
106 with new_output_file("nucleotide_distribution.png") as nuc_histogram:
107 plot.savefig(nuc_histogram, format='png', dpi=108)
108 except Exception:
109 traceback.print_exc()
110
111 def plot_quality_distribution_histo(histo,
112 annotation=qa_params.get('annotation', ''),
113 q=qa_params.get('user Q threshold', 27)
114 ):
115 try:
116 binning = 20
117 plot.figure(figsize=(12, 6))
118 vec = [tuple(c.elements()) for c in histo]
119 means = [mean(t) for t in vec]
120 upper_iqr = array([percentile(t, 75) for t in vec])
121 lower_iqr = array([percentile(t, 25) for t in vec])
122 # plot.boxplot(vec,
123 # widths=.000005,
124 # whis=(5,95),
125 # showfliers=False,
126 # showbox=True,
127 # showmeans=True,
128 # meanline=True,
129 # showcaps=True,
130 # whiskerprops=dict(linestyle='-', color='0.60')
131 # )
132
133 plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range')
134 plot.plot(means, 'r-', label='Mean')
135 plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25)
136 plot.autoscale(tight=True)
137 plot.axhline(y=q, label="Q {}".format(q))
138 plot.xlabel('Base Position')
139 plot.ylabel(r"""$Q (-10 log_(10) \rho)$""")
140 plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical')
141 plot.suptitle('{} Q-Score Distribution Plot'.format(annotation))
142 plot.legend(loc='lower left', fontsize='x-small')
143 plot.grid(True)
144 with new_output_file("qscore_distribution.png") as q_distro:
145 plot.savefig(q_distro, format='png', dpi=108)
146
147 except Exception:
148 traceback.print_exc()