Mercurial > repos > jpayne > cfsan_qaqc
view qa_core/gims_qa_plot.py @ 0:dafec28bd37e
planemo upload
author | jpayne |
---|---|
date | Tue, 08 May 2018 18:54:20 -0400 |
parents | |
children |
line wrap: on
line source
import subprocess from qa_core import new_output_file, qa_params from collections import defaultdict import warnings warnings.filterwarnings("ignore") import pandas from pandas import DataFrame import matplotlib.pyplot as plot from numpy import zeros, add, mean, array, percentile, arange import traceback from gims_qa_tests import cluster import logging bases = ("A", "T", "G", "C", "N") def qa_plots(df, annotation, log=logging.getLogger('qa.plots'), **kwargs): #log.debug(df) #log.debug(annotation) plot_quality_histo(df['qualityHisto'], annotation, **kwargs) plot_nucleotide_distribution_histo(df['nucleotideHisto'], annotation) plot_quality_distribution_histo(df['baseQScoreHisto'], annotation) plot_base_balance(df, annotation, **kwargs) #plot.show() def plot_base_balance(df, annotation=qa_params.get('annotation', ''), threshold=qa_params.get('permitted base balance variance', .2), **kwargs): try: plot.figure() plot.axis((0,2,0,2)) plot.suptitle("{} Base Balance".format(annotation)) plot.xlabel("G/C") plot.ylabel("A/T") thres_min = 1 - threshold thres_max = 1 + threshold for coord in (thres_min, thres_max): for linef in (plot.axvline, plot.axhline): linef(coord, linestyle='dashed', label=str(coord)) plot.annotate(thres_max, xy=(0, thres_max)) plot.annotate(thres_min, xy=(0, thres_min)) plot.annotate(thres_max, xy=(thres_max, 0)) plot.annotate(thres_min, xy=(thres_min, 0)) a, t, g, c = [sum(df['nucleotideHisto'][b]) for b in ('A', 'T', 'G', 'C')] y, x = a / float(t), g / float(c) plot.plot((x,), (y,), marker="+") plot.annotate("({:.2},{:.2})".format(x, y), xy=(x+.05, y+.05), textcoords='data') with new_output_file("base_balance.png") as base_bal: plot.savefig(base_bal, format='png', dpi=92) except Exception: traceback.print_exc() def plot_quality_histo(histo, annotation=qa_params.get('annotation', ''), q=qa_params.get('user Q threshold', 27)): try: plot.figure() labels = range(max(histo.keys()) + 1) plot.bar(left=labels, height=[histo[n] for n in labels], color='y', linewidth=1) # plot.hist(histo, bins=range(max(histo))) plot.xticks() plot.axvline(x=q, label="Q {}".format(q)) #plot.show() plot.xlabel(r"""$Q (-10 log_(10) \rho)$""") plot.ylabel('Number of Bases') plot.suptitle('{} Q-Score Histogram'.format(annotation)) plot.legend(loc='upper left', fontsize='x-small') with new_output_file("qscore_histogram.png") as qual_histogram: plot.savefig(qual_histogram, format='png', dpi=92) # except ValueError: # pass except Exception: traceback.print_exc() def plot_nucleotide_distribution_histo(dataframe, annotation=qa_params.get('annotation', '')): try: #dataframe = defaultdict(list) colors = iter(('r', 'y', 'g', 'b', 'pink')) plot.figure() last_base = zeros((len(dataframe.values()[0]),)) #set all zeros for bottom of first bar row for base in bases: try: plot.bar(left=range(len(dataframe[base])), height=dataframe[base], width=2, bottom=last_base, color=colors.next(), label=base, linewidth=0) last_base = add(last_base, dataframe[base]) except: #print [len(d) for d in dataframe.data.values()] raise plot.autoscale(tight=True) plot.xlabel('Base Position') plot.ylabel('Nucleotide Count') plot.suptitle('{} Nucleotide Distribution Plot'.format(annotation)) plot.legend(loc='upper right', fontsize='xx-small') with new_output_file("nucleotide_distribution.png") as nuc_histogram: plot.savefig(nuc_histogram, format='png', dpi=108) except Exception: traceback.print_exc() def plot_quality_distribution_histo(histo, annotation=qa_params.get('annotation', ''), q=qa_params.get('user Q threshold', 27) ): try: binning = 20 plot.figure(figsize=(12, 6)) vec = [tuple(c.elements()) for c in histo] means = [mean(t) for t in vec] upper_iqr = array([percentile(t, 75) for t in vec]) lower_iqr = array([percentile(t, 25) for t in vec]) # plot.boxplot(vec, # widths=.000005, # whis=(5,95), # showfliers=False, # showbox=True, # showmeans=True, # meanline=True, # showcaps=True, # whiskerprops=dict(linestyle='-', color='0.60') # ) plot.fill_between(arange(len(upper_iqr)), upper_iqr, lower_iqr, facecolor='0.50', alpha=0.5, label='interquartile range') plot.plot(means, 'r-', label='Mean') plot.plot(upper_iqr, 'b-', label='interquartile range', alpha=0.25) plot.autoscale(tight=True) plot.axhline(y=q, label="Q {}".format(q)) plot.xlabel('Base Position') plot.ylabel(r"""$Q (-10 log_(10) \rho)$""") plot.xticks(range(0, len(histo), binning), range(0, len(histo), binning), rotation='vertical') plot.suptitle('{} Q-Score Distribution Plot'.format(annotation)) plot.legend(loc='lower left', fontsize='x-small') plot.grid(True) with new_output_file("qscore_distribution.png") as q_distro: plot.savefig(q_distro, format='png', dpi=108) except Exception: traceback.print_exc()