Mercurial > repos > estrain > microrunqc
annotate run_fastq_scan.py @ 0:4e629e82c5b1 draft default tip
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 12:51:10 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
1 #!/usr/bin/env |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
2 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
3 ## Run fastq-scan to get mean read length and mean quality score |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
4 ## author: errol strain, estrain@gmail.com |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
5 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
6 from argparse import (ArgumentParser, FileType) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
7 import sys |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
8 import glob |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
9 import subprocess |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
10 import json |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
11 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
12 def parse_args(): |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
13 "Parse the input arguments, use '-h' for help." |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
14 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
15 parser = ArgumentParser(description='Run fastq-scan on a pair of gzipped FASTQ files') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
16 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
17 # Read inputs |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
18 parser.add_argument('--fastq', type=str, required=True, nargs=2, help='FASTQ files') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
19 parser.add_argument('--output', type=str, required=True, nargs=1, help='Output File') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
20 parser.add_argument('--type', type=str, required=True, nargs=1, help='File Type (text or gz)') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
21 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
22 return parser.parse_args() |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
23 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
24 args =parse_args() |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
25 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
26 # FASTA file |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
27 r1 = args.fastq[0] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
28 r2 = args.fastq[1] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
29 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
30 # Read 1 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
31 if str(args.type[0]) == "gz" : |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
32 cmd1 = ["zcat", r1] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
33 else : |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
34 cmd1 = ["cat", r1] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
35 cmd2 = ["fastq-scan"] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
36 pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
37 r1json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0]) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
38 r1q = round(r1json["qc_stats"]["qual_mean"],1) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
39 r1l = round(r1json["qc_stats"]["read_mean"],1) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
40 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
41 # Read 2 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
42 if str(args.type[0]) == "gz" : |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
43 cmd1 = ["zcat", r2] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
44 else : |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
45 cmd1 = ["cat", r2] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
46 cmd2 = ["fastq-scan"] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
47 pcmd1= subprocess.Popen(cmd1,stdout= subprocess.PIPE,shell=False) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
48 r2json = json.loads(subprocess.Popen(cmd2, stdin=pcmd1.stdout,stdout=subprocess.PIPE,shell=False).communicate()[0]) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
49 r2q = round(r2json["qc_stats"]["qual_mean"],1) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
50 r2l = round(r2json["qc_stats"]["read_mean"],1) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
51 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
52 # Write output to be used by sum_mlst.py |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
53 output = open(args.output[0],"w") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
54 output.write(str(r1l) + "\t" + str(r2l) + "\t" + str(r1q) + "\t" + str(r2q)) |
