jpayne@0
|
1 import subprocess
|
jpayne@0
|
2 from qa_core import new_output_file, qa_params
|
jpayne@0
|
3 from collections import defaultdict
|
jpayne@0
|
4 import warnings
|
jpayne@0
|
5 warnings.filterwarnings("ignore")
|
jpayne@0
|
6 import pandas
|
jpayne@0
|
7 from pandas import DataFrame
|
jpayne@0
|
8 import matplotlib.pyplot as plot
|
jpayne@0
|
9 from numpy import zeros, add, mean, array, percentile, arange
|
jpayne@0
|
10 import traceback
|
jpayne@0
|
11 from gims_qa_tests import cluster
|
jpayne@0
|
12 import logging
|
jpayne@0
|
13
|
jpayne@0
|
14 bases = ("A", "T", "G", "C", "N")
|
jpayne@0
|
15
|
jpayne@0
|
16 def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs):
|
jpayne@0
|
17 #log.debug(df)
|
jpayne@0
|
18 #log.debug(annotation)
|
jpayne@0
|
19 plot_quality_histo(df['qualityHisto'], annotation, **kwargs)
|
jpayne@0
|
20 plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation)
|
jpayne@0
|
21 plot_quality_distribution_histo(df['baseQScoreHisto'], annotation)
|
jpayne@0
|
22 plot_base_balance(df, annotation, **kwargs)
|
jpayne@0
|
23 #plot.show()
|
jpayne@0
|
24
|
jpayne@0
|
25 def plot_base_balance(df,
|
jpayne@0
|
26 annotation=qa_params.get('annotation', ''),
|
jpayne@0
|
27 threshold=qa_params.get('permitted base balance variance', .2),
|
jpayne@0
|
28 **kwargs):
|
jpayne@0
|
29 try:
|
jpayne@0
|
30 plot.figure()
|
jpayne@0
|
31 plot.axis((0,2,0,2))
|
jpayne@0
|
32 plot.suptitle("{} Base Balance".format(annotation))
|
jpayne@0
|
33 plot.xlabel("G/C")
|
jpayne@0
|
34 plot.ylabel("A/T")
|
jpayne@0
|
35 thres_min = 1 - threshold
|
jpayne@0
|
36 thres_max = 1 + threshold
|
jpayne@0
|
37 for coord in (thres_min, thres_max):
|
jpayne@0
|
38 for linef in (plot.axvline, plot.axhline):
|
jpayne@0
|
39 linef(coord, linestyle='dashed', label=str(coord))
|
jpayne@0
|
40
|
jpayne@0
|
41 plot.annotate(thres_max, xy=(0, thres_max))
|
jpayne@0
|
42 plot.annotate(thres_min, xy=(0, thres_min))
|
jpayne@0
|
43 plot.annotate(thres_max, xy=(thres_max, 0))
|
jpayne@0
|
44 plot.annotate(thres_min, xy=(thres_min, 0))
|
jpayne@0
|
45
|
jpayne@0
|
46 a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')]
|
jpayne@0
|
47 y, x = a / float(t), g / float(c)
|
jpayne@0
|
48 plot.plot((x,), (y,), marker="+")
|
jpayne@0
|
49 plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data')
|
jpayne@0
|
50 with new_output_file("base_balance.png") as base_bal:
|
jpayne@0
|
51 plot.savefig(base_bal, format='png', dpi=92)
|
jpayne@0
|
52 except Exception:
|
jpayne@0
|
53 traceback.print_exc()
|
jpayne@0
|
54
|
jpayne@0
|
55
|
jpayne@0
|
56 def plot_quality_histo(histo,
|
jpayne@0
|
57 annotation=qa_params.get('annotation', ''),
|
jpayne@0
|
58 q=qa_params.get('user Q threshold', 27)):
|
jpayne@0
|
59 try:
|
jpayne@0
|
60 plot.figure()
|
jpayne@0
|
61 labels = range(max(histo.keys()) + 1)
|
jpayne@0
|
62 plot.bar(left=labels,
|
jpayne@0
|
63 height=[histo[n] for n in labels],
|
jpayne@0
|
64 color='y',
|
jpayne@0
|
65 linewidth=1)
|
jpayne@0
|
66 # plot.hist(histo, bins=range(max(histo)))
|
jpayne@0
|
67 plot.xticks()
|
jpayne@0
|
68 plot.axvline(x=q, label="Q {}".format(q))
|
jpayne@0
|
69 #plot.show()
|
jpayne@0
|
70 plot.xlabel(r"""$Q (-10 log_(10) \rho)$""")
|
jpayne@0
|
71 plot.ylabel('Number of Bases')
|
jpayne@0
|
72 plot.suptitle('{} Q-Score Histogram'.format(annotation))
|
jpayne@0
|
73 plot.legend(loc='upper left', fontsize='x-small')
|
jpayne@0
|
74 with new_output_file("qscore_histogram.png") as qual_histogram:
|
jpayne@0
|
75 plot.savefig(qual_histogram, format='png', dpi=92)
|
jpayne@0
|
76 # except ValueError:
|
jpayne@0
|
77 # pass
|
jpayne@0
|
78 except Exception:
|
jpayne@0
|
79 traceback.print_exc()
|
jpayne@0
|
80
|
jpayne@0
|
81 def plot_nucleotide_distribution_histo(dataframe,
|
jpayne@0
|
82 annotation=qa_params.get('annotation', '')):
|
jpayne@0
|
83 try:
|
jpayne@0
|
84 #dataframe = defaultdict(list)
|
jpayne@0
|
85 colors = iter(('r', 'y', 'g', 'b', 'pink'))
|
jpayne@0
|
86 plot.figure()
|
jpayne@0
|
87 last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row
|
jpayne@0
|
88 for base in bases:
|
jpayne@0
|
89 try:
|
jpayne@0
|
90 plot.bar(left=range(len(dataframe[base])),
|
jpayne@0
|
91 height=dataframe[base],
|
jpayne@0
|
92 width=2,
|
jpayne@0
|
93 bottom=last_base,
|
jpayne@0
|
94 color=colors.next(),
|
jpayne@0
|
95 label=base,
|
jpayne@0
|
96 linewidth=0)
|
jpayne@0
|
97 last_base = add(last_base, dataframe[base])
|
jpayne@0
|
98 except:
|
jpayne@0
|
99 #print [len(d) for d in dataframe.data.values()]
|
jpayne@0
|
100 raise
|
jpayne@0
|
101 plot.autoscale(tight=True)
|
jpayne@0
|
102 plot.xlabel('Base Position')
|
jpayne@0
|
103 plot.ylabel('Nucleotide Count')
|
jpayne@0
|
104 plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation))
|
jpayne@0
|
105 plot.legend(loc='upper right', fontsize='xx-small')
|
jpayne@0
|
106 with new_output_file("nucleotide_distribution.png") as nuc_histogram:
|
jpayne@0
|
107 plot.savefig(nuc_histogram, format='png', dpi=108)
|
jpayne@0
|
108 except Exception:
|
jpayne@0
|
109 traceback.print_exc()
|
jpayne@0
|
110
|
jpayne@0
|
111 def plot_quality_distribution_histo(histo,
|
jpayne@0
|
112 annotation=qa_params.get('annotation', ''),
|
jpayne@0
|
113 q=qa_params.get('user Q threshold', 27)
|
jpayne@0
|
114 ):
|
jpayne@0
|
115 try:
|
jpayne@0
|
116 binning = 20
|
jpayne@0
|
117 plot.figure(figsize=(12, 6))
|
jpayne@0
|
118 vec = [tuple(c.elements()) for c in histo]
|
jpayne@0
|
119 means = [mean(t) for t in vec]
|
jpayne@0
|
120 upper_iqr = array([percentile(t, 75) for t in vec])
|
jpayne@0
|
121 lower_iqr = array([percentile(t, 25) for t in vec])
|
jpayne@0
|
122 # plot.boxplot(vec,
|
jpayne@0
|
123 # widths=.000005,
|
jpayne@0
|
124 # whis=(5,95),
|
jpayne@0
|
125 # showfliers=False,
|
jpayne@0
|
126 # showbox=True,
|
jpayne@0
|
127 # showmeans=True,
|
jpayne@0
|
128 # meanline=True,
|
jpayne@0
|
129 # showcaps=True,
|
jpayne@0
|
130 # whiskerprops=dict(linestyle='-', color='0.60')
|
jpayne@0
|
131 # )
|
jpayne@0
|
132
|
jpayne@0
|
133 plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range')
|
jpayne@0
|
134 plot.plot(means, 'r-', label='Mean')
|
jpayne@0
|
135 plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25)
|
jpayne@0
|
136 plot.autoscale(tight=True)
|
jpayne@0
|
137 plot.axhline(y=q, label="Q {}".format(q))
|
jpayne@0
|
138 plot.xlabel('Base Position')
|
jpayne@0
|
139 plot.ylabel(r"""$Q (-10 log_(10) \rho)$""")
|
jpayne@0
|
140 plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical')
|
jpayne@0
|
141 plot.suptitle('{} Q-Score Distribution Plot'.format(annotation))
|
jpayne@0
|
142 plot.legend(loc='lower left', fontsize='x-small')
|
jpayne@0
|
143 plot.grid(True)
|
jpayne@0
|
144 with new_output_file("qscore_distribution.png") as q_distro:
|
jpayne@0
|
145 plot.savefig(q_distro, format='png', dpi=108)
|
jpayne@0
|
146
|
jpayne@0
|
147 except Exception:
|
jpayne@0
|
148 traceback.print_exc() |