kkonganti@0: #!/usr/bin/env python3 kkonganti@0: kkonganti@0: # Kranti Konganti kkonganti@0: # 03/06/2024 kkonganti@0: kkonganti@0: import argparse kkonganti@0: import glob kkonganti@0: import inspect kkonganti@0: import logging kkonganti@0: import os kkonganti@0: import pprint kkonganti@0: import re kkonganti@0: from collections import defaultdict kkonganti@0: kkonganti@0: kkonganti@0: # Multiple inheritence for pretty printing of help text. kkonganti@0: class MultiArgFormatClasses( kkonganti@0: argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter kkonganti@0: ): kkonganti@0: pass kkonganti@0: kkonganti@0: kkonganti@0: # Main kkonganti@0: def main() -> None: kkonganti@0: """ kkonganti@0: The succesful execution of this script requires access to properly formatted kkonganti@0: lineages.csv file which has no more than 9 columns. kkonganti@0: kkonganti@0: It takes the lineages.csv file, the *_hits.csv results from `sourmash gather` kkonganti@0: mentioned with -smres option and and a root parent directory of the kkonganti@0: `salmon quant` results mentioned with -sal option and generates a final kkonganti@0: results table with the TPM values and a .krona.tsv file for each sample kkonganti@0: to be used by KronaTools. kkonganti@0: """ kkonganti@0: # Set logging. kkonganti@0: logging.basicConfig( kkonganti@0: format="\n" kkonganti@0: + "=" * 55 kkonganti@0: + "\n%(asctime)s - %(levelname)s\n" kkonganti@0: + "=" * 55 kkonganti@0: + "\n%(message)s\n\n", kkonganti@0: level=logging.DEBUG, kkonganti@0: ) kkonganti@0: kkonganti@0: # Debug print. kkonganti@0: ppp = pprint.PrettyPrinter(width=55) kkonganti@0: prog_name = inspect.stack()[0].filename kkonganti@0: kkonganti@0: parser = argparse.ArgumentParser( kkonganti@0: prog=prog_name, description=main.__doc__, formatter_class=MultiArgFormatClasses kkonganti@0: ) kkonganti@0: kkonganti@0: required = parser.add_argument_group("required arguments") kkonganti@0: kkonganti@0: required.add_argument( kkonganti@0: "-sal", kkonganti@0: dest="salmon_res_dir", kkonganti@0: default=False, kkonganti@0: required=True, kkonganti@0: help="Absolute UNIX path to the parent directory that contains the\n" kkonganti@0: + "`salmon quant` results. For example, if path to\n" kkonganti@0: + "`quant.sf` is in /hpc/john_doe/test/salmon_res/sampleA/quant.sf, then\n" kkonganti@0: + "use this command-line option as:\n" kkonganti@0: + "-sal /hpc/john_doe/test/salmon_res", kkonganti@0: ) kkonganti@0: required.add_argument( kkonganti@0: "-lin", kkonganti@0: dest="lin", kkonganti@0: default=False, kkonganti@0: required=True, kkonganti@0: help="Absolute UNIX Path to the lineages CSV file.\n" kkonganti@0: + "This file should have only 9 columns.", kkonganti@0: ) kkonganti@0: required.add_argument( kkonganti@0: "-smres", kkonganti@0: dest="sm_res_dir", kkonganti@0: default=False, kkonganti@0: required=True, kkonganti@0: help="Absolute UNIX path to the parent directory that contains the\n" kkonganti@0: + "filtered `sourmas gather` results. For example, if path to\n" kkonganti@0: + "`sampleA.csv` is in /hpc/john_doe/test/sourmash_gather/sampleA.csv,\n" kkonganti@0: + "then use this command-line option as:\n" kkonganti@0: + "-sal /hpc/john_doe/test", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-op", kkonganti@0: dest="out_prefix", kkonganti@0: default="nowayout.tblsum", kkonganti@0: required=False, kkonganti@0: help="Set the output file(s) prefix for output(s) generated\n" kkonganti@0: + "by this program.", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-sf", kkonganti@0: dest="scale_down_factor", kkonganti@0: default=float(10000), kkonganti@0: required=False, kkonganti@0: help="Set the scaling factor by which TPM values are scaled\ndown.", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-smres-suffix", kkonganti@0: dest="sm_res_suffix", kkonganti@0: default="_hits.csv", kkonganti@0: required=False, kkonganti@0: help="Find the `sourmash gather` result files ending in this\nsuffix.", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-failed-suffix", kkonganti@0: dest="failed_suffix", kkonganti@0: default="_FAILED.txt", kkonganti@0: required=False, kkonganti@0: help="Find the sample names which failed classification stored\n" kkonganti@0: + "inside the files ending in this suffix.", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-num-lin-cols", kkonganti@0: dest="num_lin_cols", kkonganti@0: default=int(9), kkonganti@0: required=False, kkonganti@0: help="Number of columns expected in the lineages CSV file.", kkonganti@0: ) kkonganti@0: parser.add_argument( kkonganti@0: "-lin-acc-regex", kkonganti@0: dest="lin_acc_regex", kkonganti@0: default=re.compile(r"\w+[\-\.]{1}[0-9]+"), kkonganti@0: required=False, kkonganti@0: help="The pattern of the lineage's accession.", kkonganti@0: ) kkonganti@0: kkonganti@0: args = parser.parse_args() kkonganti@0: salmon_res_dir = args.salmon_res_dir kkonganti@0: sm_res_dir = args.sm_res_dir kkonganti@0: sm_res_suffix = args.sm_res_suffix kkonganti@0: failed_suffix = args.failed_suffix kkonganti@0: out_prefix = args.out_prefix kkonganti@0: lin = args.lin kkonganti@0: num_lin_cols = args.num_lin_cols kkonganti@0: acc_pat = args.lin_acc_regex kkonganti@0: scale_down = float(args.scale_down_factor) kkonganti@0: no_hit = "Unclassified" kkonganti@0: no_hit_reads = "reads mapped to the database" kkonganti@0: tpm_const = float(1000000.0000000000) kkonganti@0: round_to = 10 kkonganti@0: all_samples = set() kkonganti@0: ( kkonganti@0: lineage2sample, kkonganti@0: unclassified2sample, kkonganti@0: lineage2sm, kkonganti@0: sm2passed, kkonganti@0: reads_total, kkonganti@0: per_taxon_reads, kkonganti@0: lineages, kkonganti@0: ) = ( kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(defaultdict), kkonganti@0: defaultdict(int), kkonganti@0: ) kkonganti@0: kkonganti@0: salmon_comb_res = os.path.join(os.getcwd(), out_prefix + ".txt") kkonganti@0: # salmon_comb_res_reads_mapped = os.path.join( kkonganti@0: # os.getcwd(), re.sub(".tblsum", "_reads_mapped.tblsum", out_prefix) + ".txt" kkonganti@0: # ) kkonganti@0: salmon_comb_res_indiv_reads_mapped = os.path.join( kkonganti@0: os.getcwd(), kkonganti@0: re.sub(".tblsum", "_indiv_reads_mapped.tblsum", out_prefix) + ".txt", kkonganti@0: ) kkonganti@0: salmon_res_files = glob.glob( kkonganti@0: os.path.join(salmon_res_dir, "*", "quant.sf"), recursive=True kkonganti@0: ) kkonganti@0: sample_res_files_failed = glob.glob( kkonganti@0: os.path.join(salmon_res_dir, "*" + failed_suffix), recursive=True kkonganti@0: ) kkonganti@0: sm_res_files = glob.glob( kkonganti@0: os.path.join(sm_res_dir, "*" + sm_res_suffix), recursive=True kkonganti@0: ) kkonganti@0: kkonganti@0: # Basic checks kkonganti@0: if lin and not (os.path.exists(lin) and os.path.getsize(lin) > 0): kkonganti@0: logging.error( kkonganti@0: "The lineages file,\n" kkonganti@0: + f"{os.path.basename(lin)} does not exist or is empty!" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: kkonganti@0: if salmon_res_dir: kkonganti@0: if not os.path.isdir(salmon_res_dir): kkonganti@0: logging.error("UNIX path\n" + f"{salmon_res_dir}\n" + "does not exist!") kkonganti@0: exit(1) kkonganti@0: if len(salmon_res_files) <= 0: kkonganti@0: with open(salmon_comb_res, "w") as salmon_comb_res_fh, open( kkonganti@0: salmon_comb_res_indiv_reads_mapped, "w" kkonganti@0: ) as salmon_comb_res_indiv_reads_mapped_fh: kkonganti@0: salmon_comb_res_fh.write(f"Sample\n{no_hit} reads in all samples\n") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write( kkonganti@0: f"Sample\nNo {no_hit_reads} from all samples\n" kkonganti@0: ) kkonganti@0: salmon_comb_res_fh.close() kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.close() kkonganti@0: exit(0) kkonganti@0: kkonganti@0: # Only proceed if lineages.csv exists. kkonganti@0: if lin and os.path.exists(lin) and os.path.getsize(lin) > 0: kkonganti@0: lin_fh = open(lin, "r") kkonganti@0: _ = lin_fh.readline() kkonganti@0: kkonganti@0: # Index lineages.csv kkonganti@0: for line in lin_fh: kkonganti@0: cols = line.strip().split(",") kkonganti@0: kkonganti@0: if len(cols) < num_lin_cols: kkonganti@0: logging.error( kkonganti@0: f"The file {os.path.basename(lin)} seems to\n" kkonganti@0: + "be malformed. It contains less than required 9 columns." kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: kkonganti@0: if cols[0] in lineages.keys(): kkonganti@0: continue kkonganti@0: # logging.info( kkonganti@0: # f"There is a duplicate accession [{cols[0]}]" kkonganti@0: # + f" in the lineages file {os.path.basename(lin)}!" kkonganti@0: # ) kkonganti@0: elif acc_pat.match(cols[0]): kkonganti@0: lineages[cols[0]] = ",".join(cols[1:]) kkonganti@0: kkonganti@0: lin_fh.close() kkonganti@0: kkonganti@0: # Index each samples' filtered sourmash results. kkonganti@0: for sm_res_file in sm_res_files: kkonganti@0: sample_name = re.sub(sm_res_suffix, "", os.path.basename(sm_res_file)) kkonganti@0: kkonganti@0: with open(sm_res_file, "r") as sm_res_fh: kkonganti@0: _ = sm_res_fh.readline() kkonganti@0: for line in sm_res_fh: kkonganti@0: acc = acc_pat.findall(line.strip().split(",")[9]) kkonganti@0: kkonganti@0: if len(acc) == 0: kkonganti@0: logging.info( kkonganti@0: f"Got empty lineage accession: {acc}" kkonganti@0: + f"\nRow elements: {line.strip().split(',')}" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: if len(acc) not in [1]: kkonganti@0: logging.info( kkonganti@0: f"Got more than one lineage accession: {acc}" kkonganti@0: + f"\nRow elements: {line.strip().split(',')}" kkonganti@0: ) kkonganti@0: logging.info(f"Considering first element: {acc[0]}") kkonganti@0: if acc[0] not in lineages.keys(): kkonganti@0: logging.error( kkonganti@0: f"The lineage accession {acc[0]} is not found in {os.path.basename(lin)}" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: lineage2sm[lineages[acc[0]]].setdefault(sample_name, 1) kkonganti@0: sm2passed["sourmash_passed"].setdefault(sample_name, 1) kkonganti@0: sm_res_fh.close() kkonganti@0: kkonganti@0: # Index each samples' salmon results. kkonganti@0: for salmon_res_file in salmon_res_files: kkonganti@0: sample_name = re.match( kkonganti@0: r"(^.+?)((\_salmon\_res)|(\.salmon))$", kkonganti@0: os.path.basename(os.path.dirname(salmon_res_file)), kkonganti@0: )[1] kkonganti@0: salmon_meta_json = os.path.join( kkonganti@0: os.path.dirname(salmon_res_file), "aux_info", "meta_info.json" kkonganti@0: ) kkonganti@0: kkonganti@0: if ( kkonganti@0: not os.path.exists(salmon_meta_json) kkonganti@0: or not os.path.getsize(salmon_meta_json) > 0 kkonganti@0: ): kkonganti@0: logging.error( kkonganti@0: "The file\n" kkonganti@0: + f"{salmon_meta_json}\ndoes not exist or is empty!\n" kkonganti@0: + "Did `salmon quant` fail?" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: kkonganti@0: if ( kkonganti@0: not os.path.exists(salmon_res_file) kkonganti@0: or not os.path.getsize(salmon_res_file) > 0 kkonganti@0: ): kkonganti@0: logging.error( kkonganti@0: "The file\n" kkonganti@0: + f"{salmon_res_file}\ndoes not exist or is empty!\n" kkonganti@0: + "Did `salmon quant` fail?" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: kkonganti@0: # Initiate all_tpm, rem_tpm and reads_mapped kkonganti@0: # all_tpm kkonganti@0: reads_total[sample_name].setdefault("all_tpm", []).append(float(0.0)) kkonganti@0: # rem_tpm kkonganti@0: reads_total[sample_name].setdefault("rem_tpm", []).append(float(0.0)) kkonganti@0: # reads_mapped kkonganti@0: reads_total[sample_name].setdefault("reads_mapped", []).append(float(0.0)) kkonganti@0: kkonganti@0: with open(salmon_res_file, "r") as salmon_res_fh: kkonganti@0: for line in salmon_res_fh.readlines(): kkonganti@0: if re.match(r"^Name.+", line): kkonganti@0: continue kkonganti@0: cols = line.strip().split("\t") kkonganti@0: ref_acc = cols[0] kkonganti@0: tpm = cols[3] kkonganti@0: num_reads_mapped = cols[4] kkonganti@0: kkonganti@0: ( kkonganti@0: reads_total[sample_name] kkonganti@0: .setdefault("all_tpm", []) kkonganti@0: .append( kkonganti@0: round(float(tpm), round_to), kkonganti@0: ) kkonganti@0: ) kkonganti@0: kkonganti@0: ( kkonganti@0: reads_total[sample_name] kkonganti@0: .setdefault("reads_mapped", []) kkonganti@0: .append( kkonganti@0: round(float(num_reads_mapped), round_to), kkonganti@0: ) kkonganti@0: ) kkonganti@0: kkonganti@0: if lineages[ref_acc] in lineage2sm.keys(): kkonganti@0: ( kkonganti@0: lineage2sample[lineages[ref_acc]] kkonganti@0: .setdefault(sample_name, []) kkonganti@0: .append(round(float(tpm), round_to)) kkonganti@0: ) kkonganti@0: ( kkonganti@0: per_taxon_reads[sample_name] kkonganti@0: .setdefault(lineages[ref_acc], []) kkonganti@0: .append(round(float(num_reads_mapped))) kkonganti@0: ) kkonganti@0: else: kkonganti@0: ( kkonganti@0: reads_total[sample_name] kkonganti@0: .setdefault("rem_tpm", []) kkonganti@0: .append( kkonganti@0: round(float(tpm), round_to), kkonganti@0: ) kkonganti@0: ) kkonganti@0: kkonganti@0: salmon_res_fh.close() kkonganti@0: kkonganti@0: # Index each samples' complete failure results i.e., 100% unclassified. kkonganti@0: for sample_res_file_failed in sample_res_files_failed: kkonganti@0: sample_name = re.sub( kkonganti@0: failed_suffix, "", os.path.basename(sample_res_file_failed) kkonganti@0: ) kkonganti@0: with open("".join(sample_res_file_failed), "r") as no_calls_fh: kkonganti@0: for line in no_calls_fh.readlines(): kkonganti@0: if line in ["\n", "\n\r", "\r"]: kkonganti@0: continue kkonganti@0: unclassified2sample[sample_name].setdefault(no_hit, tpm_const) kkonganti@0: no_calls_fh.close() kkonganti@0: kkonganti@0: # Finally, write all results. kkonganti@0: for sample in sorted(reads_total.keys()) + sorted(unclassified2sample.keys()): kkonganti@0: all_samples.add(sample) kkonganti@0: kkonganti@0: # Check if sourmash results exist but salmon `quant` failed kkonganti@0: # and if so, set the sample to 100% Unclassified as well. kkonganti@0: for sample in sm2passed["sourmash_passed"].keys(): kkonganti@0: if sample not in all_samples: kkonganti@0: unclassified2sample[sample].setdefault(no_hit, tpm_const) kkonganti@0: all_samples.add(sample) kkonganti@0: kkonganti@0: # Write total number of reads mapped to nowayout database. kkonganti@0: # with open(salmon_comb_res_reads_mapped, "w") as nowo_reads_mapped_fh: kkonganti@0: # nowo_reads_mapped_fh.write( kkonganti@0: # "\t".join( kkonganti@0: # [ kkonganti@0: # "Sample", kkonganti@0: # "All reads", kkonganti@0: # "Classified reads", kkonganti@0: # "Unclassified reads (Reads failed thresholds )", kkonganti@0: # ] kkonganti@0: # ) kkonganti@0: # ) kkonganti@0: kkonganti@0: # for sample in all_samples: kkonganti@0: # if sample in reads_total.keys(): kkonganti@0: # nowo_reads_mapped_fh.write( kkonganti@0: # "\n" kkonganti@0: # + "\t".join( kkonganti@0: # [ kkonganti@0: # f"\n{sample}", kkonganti@0: # f"{int(sum(reads_total[sample]['reads_mapped']))}", kkonganti@0: # f"{int(reads_total[sample]['reads_mapped'])}", kkonganti@0: # f"{int(reads_total[sample]['rem_tpm'])}", kkonganti@0: # ], kkonganti@0: # ) kkonganti@0: # ) kkonganti@0: # else: kkonganti@0: # nowo_reads_mapped_fh.write(f"\n{sample}\t{int(0.0)}") kkonganti@0: # nowo_reads_mapped_fh.close() kkonganti@0: kkonganti@0: # Write scaled down TPM values for each sample. kkonganti@0: with open(salmon_comb_res, "w") as salmon_comb_res_fh, open( kkonganti@0: salmon_comb_res_indiv_reads_mapped, "w" kkonganti@0: ) as salmon_comb_res_indiv_reads_mapped_fh: kkonganti@0: salmon_comb_res_fh.write("Lineage\t" + "\t".join(all_samples) + "\n") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write( kkonganti@0: "Lineage\t" + "\t".join(all_samples) + "\n" kkonganti@0: ) kkonganti@0: kkonganti@0: # Write *.krona.tsv header for all samples. kkonganti@0: for sample in all_samples: kkonganti@0: krona_fh = open( kkonganti@0: os.path.join(salmon_res_dir, sample + ".krona.tsv"), "w" kkonganti@0: ) kkonganti@0: krona_fh.write( kkonganti@0: "\t".join( kkonganti@0: [ kkonganti@0: "fraction", kkonganti@0: "superkingdom", kkonganti@0: "phylum", kkonganti@0: "class", kkonganti@0: "order", kkonganti@0: "family", kkonganti@0: "genus", kkonganti@0: "species", kkonganti@0: ] kkonganti@0: ) kkonganti@0: ) kkonganti@0: krona_fh.close() kkonganti@0: kkonganti@0: # Write the TPM values (TPM/scale_down) for valid lineages. kkonganti@0: for lineage in lineage2sm.keys(): kkonganti@0: salmon_comb_res_fh.write(lineage) kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(lineage) kkonganti@0: kkonganti@0: for sample in all_samples: kkonganti@0: krona_fh = open( kkonganti@0: os.path.join(salmon_res_dir, sample + ".krona.tsv"), "a" kkonganti@0: ) kkonganti@0: kkonganti@0: if sample in unclassified2sample.keys(): kkonganti@0: salmon_comb_res_fh.write(f"\t0.0") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"\t0") kkonganti@0: elif sample in lineage2sample[lineage].keys(): kkonganti@0: reads = sum(per_taxon_reads[sample][lineage]) kkonganti@0: tpm = sum(lineage2sample[lineage][sample]) kkonganti@0: tph = round(tpm / scale_down, round_to) kkonganti@0: lineage2sample[sample].setdefault("hits_tpm", []).append( kkonganti@0: float(tpm) kkonganti@0: ) kkonganti@0: kkonganti@0: salmon_comb_res_fh.write(f"\t{tph}") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"\t{reads}") kkonganti@0: krona_lin_row = lineage.split(",") kkonganti@0: kkonganti@0: if len(krona_lin_row) > num_lin_cols - 1: kkonganti@0: logging.error( kkonganti@0: "Taxonomy columns are more than 8 for the following lineage:" kkonganti@0: + f"{krona_lin_row}" kkonganti@0: ) kkonganti@0: exit(1) kkonganti@0: else: kkonganti@0: krona_fh.write( kkonganti@0: "\n" kkonganti@0: + str(round((tpm / tpm_const), round_to)) kkonganti@0: + "\t" kkonganti@0: + "\t".join(krona_lin_row[:-1]) kkonganti@0: ) kkonganti@0: else: kkonganti@0: salmon_comb_res_fh.write(f"\t0.0") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"\t0") kkonganti@0: krona_fh.close() kkonganti@0: kkonganti@0: salmon_comb_res_fh.write("\n") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"\n") kkonganti@0: kkonganti@0: # Finally write TPH (TPM/scale_down) for Unclassified kkonganti@0: # Row = Unclassified / No reads mapped to the database ... kkonganti@0: salmon_comb_res_fh.write(f"{no_hit}") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"Total {no_hit_reads}") kkonganti@0: kkonganti@0: for sample in all_samples: kkonganti@0: krona_ufh = open( kkonganti@0: os.path.join(salmon_res_dir, sample + ".krona.tsv"), "a" kkonganti@0: ) kkonganti@0: # krona_ufh.write("\t") kkonganti@0: if sample in unclassified2sample.keys(): kkonganti@0: salmon_comb_res_fh.write( kkonganti@0: f"\t{round((unclassified2sample[sample][no_hit] / scale_down), round_to)}" kkonganti@0: ) kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write(f"\t0") kkonganti@0: krona_ufh.write( kkonganti@0: f"\n{round((unclassified2sample[sample][no_hit] / tpm_const), round_to)}" kkonganti@0: ) kkonganti@0: else: kkonganti@0: trace_tpm = tpm_const - sum(reads_total[sample]["all_tpm"]) kkonganti@0: trace_tpm = float(f"{trace_tpm:.{round_to}f}") kkonganti@0: if trace_tpm <= 0: kkonganti@0: trace_tpm = float(0.0) kkonganti@0: tph_unclassified = float( kkonganti@0: f"{(sum(reads_total[sample]['rem_tpm']) + trace_tpm) / scale_down:{round_to}f}" kkonganti@0: ) kkonganti@0: krona_unclassified = float( kkonganti@0: f"{(sum(reads_total[sample]['rem_tpm']) + trace_tpm) / tpm_const:{round_to}f}" kkonganti@0: ) kkonganti@0: salmon_comb_res_fh.write(f"\t{tph_unclassified}") kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.write( kkonganti@0: f"\t{int(sum(sum(per_taxon_reads[sample].values(), [])))}" kkonganti@0: ) kkonganti@0: krona_ufh.write(f"\n{krona_unclassified}") kkonganti@0: krona_ufh.write("\t" + "\t".join(["unclassified"] * (num_lin_cols - 2))) kkonganti@0: krona_ufh.close() kkonganti@0: kkonganti@0: salmon_comb_res_fh.close() kkonganti@0: salmon_comb_res_indiv_reads_mapped_fh.close() kkonganti@0: # ppp.pprint(lineage2sample) kkonganti@0: # ppp.pprint(lineage2sm) kkonganti@0: # ppp.pprint(reads_total) kkonganti@0: kkonganti@0: kkonganti@0: if __name__ == "__main__": kkonganti@0: main()