jpayne@0: import subprocess jpayne@0: from qa_core import new_output_file, qa_params jpayne@0: from collections import defaultdict jpayne@0: import warnings jpayne@0: warnings.filterwarnings("ignore") jpayne@0: import pandas jpayne@0: from pandas import DataFrame jpayne@0: import matplotlib.pyplot as plot jpayne@0: from numpy import zeros, add, mean, array, percentile, arange jpayne@0: import traceback jpayne@0: from gims_qa_tests import cluster jpayne@0: import logging jpayne@0: jpayne@0: bases = ("A", "T", "G", "C", "N") jpayne@0: jpayne@0: def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs): jpayne@0: #log.debug(df) jpayne@0: #log.debug(annotation) jpayne@0: plot_quality_histo(df['qualityHisto'], annotation, **kwargs) jpayne@0: plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation) jpayne@0: plot_quality_distribution_histo(df['baseQScoreHisto'], annotation) jpayne@0: plot_base_balance(df, annotation, **kwargs) jpayne@0: #plot.show() jpayne@0: jpayne@0: def plot_base_balance(df, jpayne@0: annotation=qa_params.get('annotation', ''), jpayne@0: threshold=qa_params.get('permitted base balance variance', .2), jpayne@0: **kwargs): jpayne@0: try: jpayne@0: plot.figure() jpayne@0: plot.axis((0,2,0,2)) jpayne@0: plot.suptitle("{} Base Balance".format(annotation)) jpayne@0: plot.xlabel("G/C") jpayne@0: plot.ylabel("A/T") jpayne@0: thres_min = 1 - threshold jpayne@0: thres_max = 1 + threshold jpayne@0: for coord in (thres_min, thres_max): jpayne@0: for linef in (plot.axvline, plot.axhline): jpayne@0: linef(coord, linestyle='dashed', label=str(coord)) jpayne@0: jpayne@0: plot.annotate(thres_max, xy=(0, thres_max)) jpayne@0: plot.annotate(thres_min, xy=(0, thres_min)) jpayne@0: plot.annotate(thres_max, xy=(thres_max, 0)) jpayne@0: plot.annotate(thres_min, xy=(thres_min, 0)) jpayne@0: jpayne@0: a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')] jpayne@0: y, x = a / float(t), g / float(c) jpayne@0: plot.plot((x,), (y,), marker="+") jpayne@0: plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data') jpayne@0: with new_output_file("base_balance.png") as base_bal: jpayne@0: plot.savefig(base_bal, format='png', dpi=92) jpayne@0: except Exception: jpayne@0: traceback.print_exc() jpayne@0: jpayne@0: jpayne@0: def plot_quality_histo(histo, jpayne@0: annotation=qa_params.get('annotation', ''), jpayne@0: q=qa_params.get('user Q threshold', 27)): jpayne@0: try: jpayne@0: plot.figure() jpayne@0: labels = range(max(histo.keys()) + 1) jpayne@0: plot.bar(left=labels, jpayne@0: height=[histo[n] for n in labels], jpayne@0: color='y', jpayne@0: linewidth=1) jpayne@0: # plot.hist(histo, bins=range(max(histo))) jpayne@0: plot.xticks() jpayne@0: plot.axvline(x=q, label="Q {}".format(q)) jpayne@0: #plot.show() jpayne@0: plot.xlabel(r"""$Q (-10 log_(10) \rho)$""") jpayne@0: plot.ylabel('Number of Bases') jpayne@0: plot.suptitle('{} Q-Score Histogram'.format(annotation)) jpayne@0: plot.legend(loc='upper left', fontsize='x-small') jpayne@0: with new_output_file("qscore_histogram.png") as qual_histogram: jpayne@0: plot.savefig(qual_histogram, format='png', dpi=92) jpayne@0: # except ValueError: jpayne@0: # pass jpayne@0: except Exception: jpayne@0: traceback.print_exc() jpayne@0: jpayne@0: def plot_nucleotide_distribution_histo(dataframe, jpayne@0: annotation=qa_params.get('annotation', '')): jpayne@0: try: jpayne@0: #dataframe = defaultdict(list) jpayne@0: colors = iter(('r', 'y', 'g', 'b', 'pink')) jpayne@0: plot.figure() jpayne@0: last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row jpayne@0: for base in bases: jpayne@0: try: jpayne@0: plot.bar(left=range(len(dataframe[base])), jpayne@0: height=dataframe[base], jpayne@0: width=2, jpayne@0: bottom=last_base, jpayne@0: color=colors.next(), jpayne@0: label=base, jpayne@0: linewidth=0) jpayne@0: last_base = add(last_base, dataframe[base]) jpayne@0: except: jpayne@0: #print [len(d) for d in dataframe.data.values()] jpayne@0: raise jpayne@0: plot.autoscale(tight=True) jpayne@0: plot.xlabel('Base Position') jpayne@0: plot.ylabel('Nucleotide Count') jpayne@0: plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation)) jpayne@0: plot.legend(loc='upper right', fontsize='xx-small') jpayne@0: with new_output_file("nucleotide_distribution.png") as nuc_histogram: jpayne@0: plot.savefig(nuc_histogram, format='png', dpi=108) jpayne@0: except Exception: jpayne@0: traceback.print_exc() jpayne@0: jpayne@0: def plot_quality_distribution_histo(histo, jpayne@0: annotation=qa_params.get('annotation', ''), jpayne@0: q=qa_params.get('user Q threshold', 27) jpayne@0: ): jpayne@0: try: jpayne@0: binning = 20 jpayne@0: plot.figure(figsize=(12, 6)) jpayne@0: vec = [tuple(c.elements()) for c in histo] jpayne@0: means = [mean(t) for t in vec] jpayne@0: upper_iqr = array([percentile(t, 75) for t in vec]) jpayne@0: lower_iqr = array([percentile(t, 25) for t in vec]) jpayne@0: # plot.boxplot(vec, jpayne@0: # widths=.000005, jpayne@0: # whis=(5,95), jpayne@0: # showfliers=False, jpayne@0: # showbox=True, jpayne@0: # showmeans=True, jpayne@0: # meanline=True, jpayne@0: # showcaps=True, jpayne@0: # whiskerprops=dict(linestyle='-', color='0.60') jpayne@0: # ) jpayne@0: jpayne@0: plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range') jpayne@0: plot.plot(means, 'r-', label='Mean') jpayne@0: plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25) jpayne@0: plot.autoscale(tight=True) jpayne@0: plot.axhline(y=q, label="Q {}".format(q)) jpayne@0: plot.xlabel('Base Position') jpayne@0: plot.ylabel(r"""$Q (-10 log_(10) \rho)$""") jpayne@0: plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical') jpayne@0: plot.suptitle('{} Q-Score Distribution Plot'.format(annotation)) jpayne@0: plot.legend(loc='lower left', fontsize='x-small') jpayne@0: plot.grid(True) jpayne@0: with new_output_file("qscore_distribution.png") as q_distro: jpayne@0: plot.savefig(q_distro, format='png', dpi=108) jpayne@0: jpayne@0: except Exception: jpayne@0: traceback.print_exc()