annotate run_fastq_scan.py @ 0:4e629e82c5b1 draft default tip

planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
author estrain
date Fri, 13 Mar 2026 12:51:10 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
1 #!/usr/bin/env
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
2
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
3 ## Run fastq-scan to get mean read length and mean quality score
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
4 ## author: errol strain, estrain@gmail.com
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
5
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
6 from argparse import (ArgumentParser, FileType)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
7 import sys
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
8 import glob
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
9 import subprocess
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
10 import json
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
11
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
12 def parse_args():
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
13 "Parse the input arguments, use '-h' for help."
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
14
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
15 parser = ArgumentParser(description='Run fastq-scan on a pair of gzipped FASTQ files')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
16
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
17 # Read inputs
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
18 parser.add_argument('--fastq', type=str, required=True, nargs=2, help='FASTQ files')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
19 parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
20 parser.add_argument('--type', type=str, required=True, nargs=1, help='File Type (text or gz)')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
21
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
22 return parser.parse_args()
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
23
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
24 args =parse_args()
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
25
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
26 # FASTA file
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
27 r1 = args.fastq[0]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
28 r2 = args.fastq[1]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
29
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
30 # Read 1
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
31 if str(args.type[0]) == "gz" :
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
32 cmd1 = ["zcat", r1]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
33 else :
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
34 cmd1 = ["cat", r1]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
35 cmd2 = ["fastq-scan"]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
36 pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
37 r1json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
38 r1q = round(r1json["qc_stats"]["qual_mean"],1)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
39 r1l = round(r1json["qc_stats"]["read_mean"],1)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
40
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
41 # Read 2
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
42 if str(args.type[0]) == "gz" :
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
43 cmd1 = ["zcat", r2]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
44 else :
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
45 cmd1 = ["cat", r2]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
46 cmd2 = ["fastq-scan"]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
47 pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
48 r2json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0])
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
49 r2q = round(r2json["qc_stats"]["qual_mean"],1)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
50 r2l = round(r2json["qc_stats"]["read_mean"],1)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
51
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
52 # Write output to be used by sum_mlst.py
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
53 output = open(args.output[0],"w")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
54 output.write(str(r1l) + "\t" + str(r2l) + "\t" + str(r1q) + "\t" + str(r2q))