annotate qa_core/gims_qa_plot.py @ 2:021864b5a0a5 tip

planemo upload
author jpayne
date Fri, 11 May 2018 10:25:14 -0400
parents dafec28bd37e
children
rev   line source
jpayne@0 1 import subprocess
jpayne@0 2 from qa_core import new_output_file, qa_params
jpayne@0 3 from collections import defaultdict
jpayne@0 4 import warnings
jpayne@0 5 warnings.filterwarnings("ignore")
jpayne@0 6 import pandas
jpayne@0 7 from pandas import DataFrame
jpayne@0 8 import matplotlib.pyplot as plot
jpayne@0 9 from numpy import zeros, add, mean, array, percentile, arange
jpayne@0 10 import traceback
jpayne@0 11 from gims_qa_tests import cluster
jpayne@0 12 import logging
jpayne@0 13
jpayne@0 14 bases = ("A", "T", "G", "C", "N")
jpayne@0 15
jpayne@0 16 def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs):
jpayne@0 17 #log.debug(df)
jpayne@0 18 #log.debug(annotation)
jpayne@0 19 plot_quality_histo(df['qualityHisto'], annotation, **kwargs)
jpayne@0 20 plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation)
jpayne@0 21 plot_quality_distribution_histo(df['baseQScoreHisto'], annotation)
jpayne@0 22 plot_base_balance(df, annotation, **kwargs)
jpayne@0 23 #plot.show()
jpayne@0 24
jpayne@0 25 def plot_base_balance(df,
jpayne@0 26 annotation=qa_params.get('annotation', ''),
jpayne@0 27 threshold=qa_params.get('permitted base balance variance', .2),
jpayne@0 28 **kwargs):
jpayne@0 29 try:
jpayne@0 30 plot.figure()
jpayne@0 31 plot.axis((0,2,0,2))
jpayne@0 32 plot.suptitle("{} Base Balance".format(annotation))
jpayne@0 33 plot.xlabel("G/C")
jpayne@0 34 plot.ylabel("A/T")
jpayne@0 35 thres_min = 1 - threshold
jpayne@0 36 thres_max = 1 + threshold
jpayne@0 37 for coord in (thres_min, thres_max):
jpayne@0 38 for linef in (plot.axvline, plot.axhline):
jpayne@0 39 linef(coord, linestyle='dashed', label=str(coord))
jpayne@0 40
jpayne@0 41 plot.annotate(thres_max, xy=(0, thres_max))
jpayne@0 42 plot.annotate(thres_min, xy=(0, thres_min))
jpayne@0 43 plot.annotate(thres_max, xy=(thres_max, 0))
jpayne@0 44 plot.annotate(thres_min, xy=(thres_min, 0))
jpayne@0 45
jpayne@0 46 a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')]
jpayne@0 47 y, x = a / float(t), g / float(c)
jpayne@0 48 plot.plot((x,), (y,), marker="+")
jpayne@0 49 plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data')
jpayne@0 50 with new_output_file("base_balance.png") as base_bal:
jpayne@0 51 plot.savefig(base_bal, format='png', dpi=92)
jpayne@0 52 except Exception:
jpayne@0 53 traceback.print_exc()
jpayne@0 54
jpayne@0 55
jpayne@0 56 def plot_quality_histo(histo,
jpayne@0 57 annotation=qa_params.get('annotation', ''),
jpayne@0 58 q=qa_params.get('user Q threshold', 27)):
jpayne@0 59 try:
jpayne@0 60 plot.figure()
jpayne@0 61 labels = range(max(histo.keys()) + 1)
jpayne@0 62 plot.bar(left=labels,
jpayne@0 63 height=[histo[n] for n in labels],
jpayne@0 64 color='y',
jpayne@0 65 linewidth=1)
jpayne@0 66 # plot.hist(histo, bins=range(max(histo)))
jpayne@0 67 plot.xticks()
jpayne@0 68 plot.axvline(x=q, label="Q {}".format(q))
jpayne@0 69 #plot.show()
jpayne@0 70 plot.xlabel(r"""$Q (-10 log_(10) \rho)$""")
jpayne@0 71 plot.ylabel('Number of Bases')
jpayne@0 72 plot.suptitle('{} Q-Score Histogram'.format(annotation))
jpayne@0 73 plot.legend(loc='upper left', fontsize='x-small')
jpayne@0 74 with new_output_file("qscore_histogram.png") as qual_histogram:
jpayne@0 75 plot.savefig(qual_histogram, format='png', dpi=92)
jpayne@0 76 # except ValueError:
jpayne@0 77 # pass
jpayne@0 78 except Exception:
jpayne@0 79 traceback.print_exc()
jpayne@0 80
jpayne@0 81 def plot_nucleotide_distribution_histo(dataframe,
jpayne@0 82 annotation=qa_params.get('annotation', '')):
jpayne@0 83 try:
jpayne@0 84 #dataframe = defaultdict(list)
jpayne@0 85 colors = iter(('r', 'y', 'g', 'b', 'pink'))
jpayne@0 86 plot.figure()
jpayne@0 87 last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row
jpayne@0 88 for base in bases:
jpayne@0 89 try:
jpayne@0 90 plot.bar(left=range(len(dataframe[base])),
jpayne@0 91 height=dataframe[base],
jpayne@0 92 width=2,
jpayne@0 93 bottom=last_base,
jpayne@0 94 color=colors.next(),
jpayne@0 95 label=base,
jpayne@0 96 linewidth=0)
jpayne@0 97 last_base = add(last_base, dataframe[base])
jpayne@0 98 except:
jpayne@0 99 #print [len(d) for d in dataframe.data.values()]
jpayne@0 100 raise
jpayne@0 101 plot.autoscale(tight=True)
jpayne@0 102 plot.xlabel('Base Position')
jpayne@0 103 plot.ylabel('Nucleotide Count')
jpayne@0 104 plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation))
jpayne@0 105 plot.legend(loc='upper right', fontsize='xx-small')
jpayne@0 106 with new_output_file("nucleotide_distribution.png") as nuc_histogram:
jpayne@0 107 plot.savefig(nuc_histogram, format='png', dpi=108)
jpayne@0 108 except Exception:
jpayne@0 109 traceback.print_exc()
jpayne@0 110
jpayne@0 111 def plot_quality_distribution_histo(histo,
jpayne@0 112 annotation=qa_params.get('annotation', ''),
jpayne@0 113 q=qa_params.get('user Q threshold', 27)
jpayne@0 114 ):
jpayne@0 115 try:
jpayne@0 116 binning = 20
jpayne@0 117 plot.figure(figsize=(12, 6))
jpayne@0 118 vec = [tuple(c.elements()) for c in histo]
jpayne@0 119 means = [mean(t) for t in vec]
jpayne@0 120 upper_iqr = array([percentile(t, 75) for t in vec])
jpayne@0 121 lower_iqr = array([percentile(t, 25) for t in vec])
jpayne@0 122 # plot.boxplot(vec,
jpayne@0 123 # widths=.000005,
jpayne@0 124 # whis=(5,95),
jpayne@0 125 # showfliers=False,
jpayne@0 126 # showbox=True,
jpayne@0 127 # showmeans=True,
jpayne@0 128 # meanline=True,
jpayne@0 129 # showcaps=True,
jpayne@0 130 # whiskerprops=dict(linestyle='-', color='0.60')
jpayne@0 131 # )
jpayne@0 132
jpayne@0 133 plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range')
jpayne@0 134 plot.plot(means, 'r-', label='Mean')
jpayne@0 135 plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25)
jpayne@0 136 plot.autoscale(tight=True)
jpayne@0 137 plot.axhline(y=q, label="Q {}".format(q))
jpayne@0 138 plot.xlabel('Base Position')
jpayne@0 139 plot.ylabel(r"""$Q (-10 log_(10) \rho)$""")
jpayne@0 140 plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical')
jpayne@0 141 plot.suptitle('{} Q-Score Distribution Plot'.format(annotation))
jpayne@0 142 plot.legend(loc='lower left', fontsize='x-small')
jpayne@0 143 plot.grid(True)
jpayne@0 144 with new_output_file("qscore_distribution.png") as q_distro:
jpayne@0 145 plot.savefig(q_distro, format='png', dpi=108)
jpayne@0 146
jpayne@0 147 except Exception:
jpayne@0 148 traceback.print_exc()