annotate 1.0.0/bin/gen_salmon_res_table.py @ 0:801b85b03a17 draft default tip

planemo upload
author galaxytrakr
date Thu, 28 May 2026 20:31:42 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
1 #!/usr/bin/env python3
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
2
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
3 # Kranti Konganti
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
4
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
5 import argparse
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
6 import glob
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
7 import inspect
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
8 import json
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
9 import logging
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
10 import os
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
11 import pickle
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
12 import pprint
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
13 import re
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
14 from collections import defaultdict
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
15
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
16 import yaml
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
17
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
18
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
19 # Multiple inheritence for pretty printing of help text.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
20 class MultiArgFormatClasses(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
21 pass
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
22
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
23
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
24 # Main
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
25 def main() -> None:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
26 """
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
27 The succesful execution of this script requires access to bettercallsal formatted
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
28 db flat files. On raven2, they are at /hpc/db/bettercallsall/PDGXXXXXXXXXX.XXXXX
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
29
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
30 It takes the ACC2SERO.pickle file and *.reference_target.cluster_list.tsv file
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
31 for that particular NCBI Pathogens release from the db directory mentioned with
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
32 -db option and a root parent directory of the `salmon quant` results mentioned
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
33 with -sal option and generates a final results table with number of reads
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
34 mapped and a .json file to be used with MultiQC to generate a stacked bar plot.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
35
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
36 Using -url option optionally adds an extra column of NCBI Pathogens Isolates
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
37 Browser, which directly links out to NCBI Pathogens Isolates SNP viewer tool.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
38 """
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
39 # Set logging.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
40 logging.basicConfig(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
41 format="\n" + "=" * 55 + "\n%(asctime)s - %(levelname)s\n" + "=" * 55 + "\n%(message)s\n\n",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
42 level=logging.DEBUG,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
43 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
44
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
45 # Debug print.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
46 ppp = pprint.PrettyPrinter(width=55)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
47 prog_name = inspect.stack()[0].filename
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
48
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
49 parser = argparse.ArgumentParser(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
50 prog=prog_name, description=main.__doc__, formatter_class=MultiArgFormatClasses
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
51 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
52
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
53 required = parser.add_argument_group("required arguments")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
54
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
55 required.add_argument(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
56 "-sal",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
57 dest="salmon_res_dir",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
58 default=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
59 required=True,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
60 help="Absolute UNIX path to the parent directory that contains the\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
61 + "`salmon quant` results directory. For example, if path to\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
62 + "`quant.sf` is in /hpc/john_doe/test/salmon_res/quant.sf, then\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
63 + "use this command-line option as:\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
64 + "-sal /hpc/john_doe/test",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
65 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
66 required.add_argument(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
67 "-snp",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
68 dest="rtc",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
69 default=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
70 required=True,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
71 help="Absolute UNIX Path to the PDG SNP reference target cluster\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
72 + "metadata file. On raven2, these are located at\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
73 + "/hpc/db/bettercallsal/PDGXXXXXXXXXX.XXXXX\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
74 + "Required if -sal is on.",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
75 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
76 required.add_argument(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
77 "-pickle",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
78 dest="acc2sero",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
79 default=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
80 required=True,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
81 help="Absolute UNIX Path to the *ACC2SERO.pickle\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
82 + "metadata file. On raven2, these are located at\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
83 + "/hpc/db/bettercallsal/PDGXXXXXXXXXX.XXXXX\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
84 + "Required if -sal is on.",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
85 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
86 parser.add_argument(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
87 "-op",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
88 dest="out_prefix",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
89 default="bettercallsal.tblsum",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
90 required=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
91 help="Set the output file(s) prefix for output(s) generated\n" + "by this program.",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
92 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
93 parser.add_argument(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
94 "-url",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
95 dest="show_snp_clust_info",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
96 default=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
97 required=False,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
98 action="store_true",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
99 help="Show SNP cluster participation information of the final genome hit.\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
100 + "This may be useful to see a relative placement of your sample in\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
101 + "NCBI Isolates SNP Tree Viewer based on genome similarity but however\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
102 + "due to rapid nature of the updates at NCBI Pathogen Detection Project,\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
103 + "the placement may be in an outdated cluster.",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
104 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
105
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
106 args = parser.parse_args()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
107 salmon_res_dir = args.salmon_res_dir
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
108 out_prefix = args.out_prefix
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
109 show_snp_clust_col = args.show_snp_clust_info
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
110 rtc = args.rtc
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
111 pickled_sero = args.acc2sero
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
112 no_hit = "No genome hit"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
113 no_presence = "Salmonella presence not detected"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
114 bcs_sal_yn_prefix = "bettercallsal_salyn"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
115 sal_y = "Detected"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
116 sal_n = "Not detected"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
117 null_value = "NULL"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
118 assm_pat = re.compile(r"GC[AF]\_[0-9]+\.*[0-9]*")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
119 ncbi_pathogens_base_url = "https://www.ncbi.nlm.nih.gov/pathogens/"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
120 ncbi_pathogens_genome_base = "https://www.ncbi.nlm.nih.gov/datasets/genome/"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
121
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
122 sample2salmon, snp_clusters, multiqc_salmon_counts, seen_sero, sal_yn = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
123 defaultdict(defaultdict),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
124 defaultdict(defaultdict),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
125 defaultdict(defaultdict),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
126 defaultdict(int),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
127 defaultdict(int),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
128 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
129
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
130 cell_colors_yml = {
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
131 bcs_sal_yn_prefix: {sal_y: "#c8e6c9 !important;", sal_n: "#ffcdd2 !important;"}
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
132 }
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
133
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
134 salmon_comb_res = os.path.join(os.getcwd(), out_prefix + ".txt")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
135 bcs_sal_yn = re.sub(out_prefix, bcs_sal_yn_prefix + ".tblsum", salmon_comb_res)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
136 cell_colors_yml_file = re.sub(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
137 out_prefix + ".txt", bcs_sal_yn_prefix + ".cellcolors.yml", salmon_comb_res
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
138 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
139 salmon_comb_res_mqc = os.path.join(os.getcwd(), str(out_prefix).split(".")[0] + "_mqc.json")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
140 salmon_res_files = glob.glob(os.path.join(salmon_res_dir, "*", "quant.sf"), recursive=True)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
141 salmon_res_file_failed = glob.glob(os.path.join(salmon_res_dir, "BCS_NO_CALLS.txt"))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
142
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
143 if rtc and (not os.path.exists(rtc) or not os.path.getsize(rtc) > 0):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
144 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
145 "The reference target cluster metadata file,\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
146 + f"{os.path.basename(rtc)} does not exist or is empty!"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
147 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
148 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
149
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
150 if rtc and (not salmon_res_dir or not pickled_sero):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
151 logging.error("When -rtc is on, -sal and -ps are also required.")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
152 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
153
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
154 if pickled_sero and (not os.path.exists(pickled_sero) or not os.path.getsize(pickled_sero)):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
155 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
156 "The pickle file,\n" + f"{os.path.basename(pickled_sero)} does not exist or is empty!"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
157 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
158 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
159
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
160 if salmon_res_dir:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
161 if not os.path.isdir(salmon_res_dir):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
162 logging.error("UNIX path\n" + f"{salmon_res_dir}\n" + "does not exist!")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
163 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
164 if len(salmon_res_files) <= 0:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
165 # logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
166 # "Parent directory,\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
167 # + f"{salmon_res_dir}"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
168 # + "\ndoes not seem to have any directories that contain\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
169 # + "the `quant.sf` file(s)."
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
170 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
171 # exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
172 with open(salmon_comb_res, "w") as salmon_comb_res_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
173 salmon_comb_res_fh.write(f"Sample\n{no_hit}s in any samples\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
174 salmon_comb_res_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
175
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
176 with open(bcs_sal_yn, "w") as bcs_sal_yn_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
177 bcs_sal_yn_fh.write(f"Sample\n{no_presence} in any samples\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
178 bcs_sal_yn_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
179
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
180 exit(0)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
181
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
182 if rtc and os.path.exists(rtc) and os.path.getsize(rtc) > 0:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
183
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
184 # pdg_release = re.match(r"(^PDG\d+\.\d+)\..+", os.path.basename(rtc))[1] + "/"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
185 acc2sero = pickle.load(file=open(pickled_sero, "rb"))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
186
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
187 with open(rtc, "r") as rtc_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
188
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
189 for line in rtc_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
190 cols = line.strip().split("\t")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
191
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
192 if len(cols) < 4:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
193 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
194 f"The file {os.path.basename(rtc)} seems to\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
195 + "be malformed. It contains less than required 4 columns."
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
196 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
197 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
198 elif cols[3] != null_value:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
199 snp_clusters[cols[0]].setdefault("assembly_accs", []).append(cols[3])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
200 snp_clusters[cols[3]].setdefault("snp_clust_id", []).append(cols[0])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
201 snp_clusters[cols[3]].setdefault("pathdb_acc_id", []).append(cols[1])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
202 if len(snp_clusters[cols[3]]["snp_clust_id"]) > 1:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
203 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
204 f"There is a duplicate reference accession [{cols[3]}]"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
205 + f"in the metadata file{os.path.basename(rtc)}!"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
206 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
207 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
208
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
209 rtc_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
210
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
211 for salmon_res_file in salmon_res_files:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
212 sample_name = re.match(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
213 r"(^.+?)((\_salmon\_res)|(\.salmon))$",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
214 os.path.basename(os.path.dirname(salmon_res_file)),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
215 )[1]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
216 salmon_meta_json = os.path.join(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
217 os.path.dirname(salmon_res_file), "aux_info", "meta_info.json"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
218 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
219
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
220 if not os.path.exists(salmon_meta_json) or not os.path.getsize(salmon_meta_json) > 0:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
221 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
222 "The file\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
223 + f"{salmon_meta_json}\ndoes not exist or is empty!\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
224 + "Did `salmon quant` fail?"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
225 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
226 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
227
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
228 if not os.path.exists(salmon_res_file) or not os.path.getsize(salmon_res_file):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
229 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
230 "The file\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
231 + f"{salmon_res_file}\ndoes not exist or is empty!\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
232 + "Did `salmon quant` fail?"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
233 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
234 exit(1)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
235
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
236 with open(salmon_res_file, "r") as salmon_res_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
237 for line in salmon_res_fh.readlines():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
238 if re.match(r"^Name.+", line):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
239 continue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
240 cols = line.strip().split("\t")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
241 ref_acc = "_".join(cols[0].split("_")[:2])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
242
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
243 if ref_acc not in snp_clusters.keys():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
244 snp_clusters[ref_acc]["snp_clust_id"] = ref_acc
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
245 snp_clusters[ref_acc]["pathdb_acc_id"] = ref_acc
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
246
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
247 (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
248 sample2salmon[sample_name]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
249 .setdefault(acc2sero[cols[0]], [])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
250 .append(int(round(float(cols[4]), 2)))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
251 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
252 (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
253 sample2salmon[sample_name]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
254 .setdefault("snp_clust_ids", {})
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
255 .setdefault("".join(snp_clusters[ref_acc]["snp_clust_id"]), [])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
256 .append("".join(snp_clusters[ref_acc]["pathdb_acc_id"]))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
257 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
258 seen_sero[acc2sero[cols[0]]] = 1
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
259
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
260 salmon_meta_json_read = json.load(open(salmon_meta_json, "r"))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
261 (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
262 sample2salmon[sample_name]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
263 .setdefault("tot_reads", [])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
264 .append(salmon_meta_json_read["num_processed"])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
265 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
266
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
267 with open(salmon_comb_res, "w") as salmon_comb_res_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
268
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
269 # snp_clust_col_header = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
270 # "\tSNP Cluster(s) by Genome Hit\n" if show_snp_clust_col else "\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
271 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
272 snp_clust_col_header = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
273 "\tNCBI Pathogens Isolate Browser\n" if show_snp_clust_col else "\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
274 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
275 serotypes = sorted(seen_sero.keys())
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
276 formatted_serotypes = [
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
277 re.sub(r"\,antigen_formula=", " | ", s)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
278 for s in [re.sub(r"serotype=", "", s) for s in serotypes]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
279 ]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
280 salmon_comb_res_fh.write(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
281 "Sample\t" + "\t".join(formatted_serotypes) + snp_clust_col_header
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
282 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
283 # sample_snp_relation = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
284 # ncbi_pathogens_base_url
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
285 # + pdg_release
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
286 # + "".join(snp_clusters[ref_acc]["snp_clust_id"])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
287 # + "?accessions="
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
288 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
289 if len(salmon_res_file_failed) == 1:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
290 with (open("".join(salmon_res_file_failed), "r")) as no_calls_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
291 for line in no_calls_fh.readlines():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
292 if line in ["\n", "\n\r", "\r"]:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
293 continue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
294 salmon_comb_res_fh.write(line.strip())
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
295 sal_yn[line.strip()] += 0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
296 for serotype in serotypes:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
297 salmon_comb_res_fh.write("\t-")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
298 salmon_comb_res_fh.write(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
299 "\t-\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
300 ) if show_snp_clust_col else salmon_comb_res_fh.write("\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
301 no_calls_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
302
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
303 for sample, counts in sorted(sample2salmon.items()):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
304 salmon_comb_res_fh.write(sample)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
305 snp_cluster_res_col = list()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
306
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
307 for snp_clust_id in sample2salmon[sample]["snp_clust_ids"].keys():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
308 # print(snp_clust_id)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
309 # print(",".join(sample2salmon[sample]["snp_clust_ids"][snp_clust_id]))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
310 # ppp.pprint(sample2salmon[sample]["snp_clust_ids"])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
311 # ppp.pprint(sample2salmon[sample]["snp_clust_ids"][snp_clust_id])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
312 # final_url_text = ",".join(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
313 # sample2salmon[sample]["snp_clust_ids"][snp_clust_id]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
314 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
315 # final_url_text_to_show = snp_clust_id
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
316 # snp_cluster_res_col.append(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
317 # "".join(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
318 # [
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
319 # f'<a href="',
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
320 # sample_snp_relation,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
321 # ",".join(sample2salmon[sample]["snp_clust_ids"][snp_clust_id]),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
322 # f'" target="_blank">{snp_clust_id}</a>',
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
323 # ]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
324 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
325 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
326 # ppp.pprint(sample2salmon[sample])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
327 for pathdbacc in sample2salmon[sample]["snp_clust_ids"][snp_clust_id]:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
328 # final_url_text_to_show = " ".join(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
329 # sample2salmon[sample]["snp_clust_ids"][snp_clust_id]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
330 # )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
331 sample_snp_relation = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
332 ncbi_pathogens_genome_base
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
333 if assm_pat.match(pathdbacc)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
334 else ncbi_pathogens_base_url + "isolates/#"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
335 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
336
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
337 snp_cluster_res_col.append(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
338 "".join(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
339 [
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
340 f'<a href="',
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
341 sample_snp_relation,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
342 pathdbacc,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
343 f'" target="_blank">{pathdbacc}</a>',
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
344 ]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
345 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
346 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
347
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
348 per_serotype_counts = 0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
349 for serotype in serotypes:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
350
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
351 if serotype in sample2salmon[sample].keys():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
352 # ppp.pprint(counts)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
353 sample_perc_mapped = round(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
354 sum(counts[serotype]) / sum(counts["tot_reads"]) * 100, 2
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
355 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
356 salmon_comb_res_fh.write(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
357 f"\t{sum(counts[serotype])} ({sample_perc_mapped}%)"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
358 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
359 multiqc_salmon_counts[sample].setdefault(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
360 re.match(r"^serotype=(.+?)\,antigen_formula.*", serotype)[1],
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
361 sum(counts[serotype]),
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
362 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
363 per_serotype_counts += sum(counts[serotype])
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
364 sal_yn[sample] += 1
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
365 else:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
366 salmon_comb_res_fh.write(f"\t-")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
367 sal_yn[sample] += 0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
368
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
369 multiqc_salmon_counts[sample].setdefault(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
370 no_hit, sum(counts["tot_reads"]) - per_serotype_counts
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
371 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
372 snp_clust_col_val = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
373 f'\t{" ".join(snp_cluster_res_col)}\n' if show_snp_clust_col else "\n"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
374 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
375 # ppp.pprint(multiqc_salmon_counts)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
376 salmon_comb_res_fh.write(snp_clust_col_val)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
377
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
378 with open(bcs_sal_yn, "w") as bcs_sal_yn_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
379 bcs_sal_yn_fh.write("Sample\tSalmonella Presence\tNo. of Serotypes\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
380 for sample in sal_yn.keys():
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
381 if sal_yn[sample] > 0:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
382 bcs_sal_yn_fh.write(f"{sample}\tDetected\t{sal_yn[sample]}\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
383 else:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
384 bcs_sal_yn_fh.write(f"{sample}\tNot detected\t{sal_yn[sample]}\n")
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
385
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
386 with open(cell_colors_yml_file, "w") as cell_colors_fh:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
387 yaml.dump(cell_colors_yml, cell_colors_fh, default_flow_style=False)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
388
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
389 salmon_plot_json(salmon_comb_res_mqc, multiqc_salmon_counts, no_hit)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
390
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
391 salmon_comb_res_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
392 bcs_sal_yn_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
393 cell_colors_fh.close()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
394
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
395
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
396 def salmon_plot_json(file: None, sample_salmon_counts: None, no_hit: None) -> None:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
397 """
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
398 This method will take a dictionary of salmon counts per sample
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
399 and will dump a JSON that will be used by MultiQC.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
400 """
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
401
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
402 if file is None or sample_salmon_counts is None:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
403 logging.error(
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
404 "Neither an output file to dump the JSON for MultiQC or the"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
405 + "dictionary holding the salmon counts was not passed."
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
406 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
407
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
408 # Credit: http://phrogz.net/tmp/24colors.html
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
409 # Will cycle through 20 distinct colors.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
410 distinct_color_palette = [
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
411 "#FF0000",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
412 "#FFFF00",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
413 "#00EAFF",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
414 "#AA00FF",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
415 "#FF7F00",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
416 "#BFFF00",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
417 "#0095FF",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
418 "#FF00AA",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
419 "#FFD400",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
420 "#6AFF00",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
421 "#0040FF",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
422 "#EDB9B9",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
423 "#B9D7ED",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
424 "#E7E9B9",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
425 "#DCB9ED",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
426 "#B9EDE0",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
427 "#8F2323",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
428 "#23628F",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
429 "#8F6A23",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
430 "#6B238F",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
431 "#4F8F23",
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
432 ]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
433
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
434 # Credit: https://mokole.com/palette.html
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
435 # Will use this palette if we run out ouf
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
436 # 20 serotypes. More than 50 serotypes
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
437 # per run is probably rare but if not,
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
438 # will cycle through about 45.
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
439 distinct_color_palette2 = [
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
440 "#2F4F4F", # darkslategray
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
441 "#556B2F", # darkolivegreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
442 "#A0522D", # sienna
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
443 "#2E8B57", # seagreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
444 "#006400", # darkgreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
445 "#8B0000", # darkred
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
446 "#808000", # olive
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
447 "#BC8F8F", # rosybrown
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
448 "#663399", # rebeccapurple
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
449 "#B8860B", # darkgoldenrod
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
450 "#4682B4", # steelblue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
451 "#000080", # navy
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
452 "#D2691E", # chocolate
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
453 "#9ACD32", # yellowgreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
454 "#20B2AA", # lightseagreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
455 "#CD5C5C", # indianred
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
456 "#8FBC8F", # darkseagreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
457 "#800080", # purple
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
458 "#B03060", # maroon3
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
459 "#FF8C00", # darkorange
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
460 "#FFD700", # gold
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
461 "#FFFF00", # yellow
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
462 "#DEB887", # burlywood
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
463 "#00FF00", # lime
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
464 "#BA55D3", # mediumorchid
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
465 "#00FA9A", # mediumspringgreen
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
466 "#4169E1", # royalblue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
467 "#E9967A", # darksalmon
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
468 "#DC143C", # crimson
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
469 "#00FFFF", # aqua
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
470 "#F4A460", # sandybrown
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
471 "#9370DB", # mediumpurple
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
472 "#0000FF", # blue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
473 "#ADFF2F", # greenyellow
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
474 "#FF6347", # tomato
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
475 "#D8BFD8", # thistle
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
476 "#FF00FF", # fuchsia
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
477 "#DB7093", # palevioletred
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
478 "#F0E68C", # khaki
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
479 "#6495ED", # cornflower
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
480 "#DDA0DD", # plum
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
481 "#EE82EE", # violet
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
482 "#7FFFD4", # aquamarine
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
483 "#FAFAD2", # lightgoldenrod
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
484 "#FF69B4", # hotpink
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
485 "#FFB6C1", # lightpink
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
486 ]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
487
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
488 no_hit_color = "#434348"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
489 col_count = 0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
490 serotypes = set()
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
491 salmon_counts = defaultdict(defaultdict)
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
492 salmon_counts["id"] = "BETTERCALLSAL_SALMON_COUNTS"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
493 salmon_counts["section_name"] = "Salmon read counts"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
494 salmon_counts["description"] = (
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
495 "This section shows the read counts from running <code>salmon</code> "
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
496 + "in <code>--meta</code> mode using SE, merged PE or concatenated PE reads against "
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
497 + "an on-the-fly <code>salmon</code> index generated from the genome hits "
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
498 + "of <code>kma</code>."
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
499 )
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
500 salmon_counts["plot_type"] = "bargraph"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
501 salmon_counts["pconfig"]["id"] = "bettercallsal_salmon_counts_plot"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
502 salmon_counts["pconfig"]["title"] = "Salmon: Read counts"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
503 salmon_counts["pconfig"]["ylab"] = "Number of reads"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
504 salmon_counts["pconfig"]["xDecimals"] = "false"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
505 salmon_counts["pconfig"]["cpswitch_counts_label"] = "Number of reads (Counts)"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
506 salmon_counts["pconfig"]["cpswitch_percent_label"] = "Number of reads (Percentages)"
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
507
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
508 for sample in sorted(sample_salmon_counts.keys()):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
509 serotypes.update(list(sample_salmon_counts[sample].keys()))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
510 salmon_counts["data"][sample] = sample_salmon_counts[sample]
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
511
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
512 if len(serotypes) > len(distinct_color_palette):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
513 distinct_color_palette = distinct_color_palette2
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
514
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
515 for serotype in sorted(serotypes):
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
516 if serotype == no_hit:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
517 continue
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
518 if col_count == len(distinct_color_palette) - 1:
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
519 col_count = 0
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
520
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
521 col_count += 1
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
522 salmon_counts["categories"][serotype] = {"color": distinct_color_palette[col_count]}
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
523
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
524 salmon_counts["categories"][no_hit] = {"color": no_hit_color}
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
525 json.dump(salmon_counts, open(file, "w"))
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
526
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
527
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
528 if __name__ == "__main__":
801b85b03a17 planemo upload
galaxytrakr
parents:
diff changeset
529 main()