comparison microrunqc.xml @ 0:4e629e82c5b1 draft default tip

planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
author estrain
date Fri, 13 Mar 2026 12:51:10 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4e629e82c5b1
1 <tool id="microrunqc" name="microrunqc" version="1.0.6">
2
3 <requirements>
4 <requirement type="package" version="2.4.0">skesa</requirement>
5 <requirement type="package" version="2.23.0">mlst</requirement>
6 <requirement type="package" version="0.7.17">bwa</requirement>
7 <requirement type="package" version="1.0.1">fastq-scan</requirement>
8 </requirements>
9
10 <command detect_errors="exit_code"><![CDATA[
11
12 skesa
13
14 #set fqscan = "text"
15 #if $jobtype.select == "fastq_fr"
16 #set outname = $jobtype.fastq1.name
17 #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2)
18 --fastq $jobtype.fastq1,$jobtype.fastq2
19 #if $jobtype.fastq1.is_of_type("fastq.gz")
20 #set fqscan = "gz"
21 #else if $jobtype.fastq1.is_of_type("fastqsanger.gz")
22 #set fqscan = "gz"
23 #end if
24 #else if $jobtype.select == "fastq_pair"
25 #set outname = $jobtype.coll.name
26 #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse)
27 --fastq $jobtype.coll.forward,$jobtype.coll.reverse
28 #if $jobtype.coll.forward.is_of_type("fastq.gz")
29 #set fqscan = "gz"
30 #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz")
31 #set fqscan = "gz"
32 #end if
33 #end if
34
35 #set num_cores = 1
36
37 #if $options.select =="basic"
38 --cores $num_cores
39 --memory 8
40 #else if $options.select=="advanced"
41 #if $options.cores
42 #set num_cores = $options.cores
43 --cores $options.cores
44 #end if
45 #if $options.memory
46 --memory $options.memory
47 #end if
48 #if $options.hash_count
49 --hash_count
50 #end if
51 #if $options.estimated_kmers
52 --estimated_kmers $options.estimated.kmers
53 #end if
54 #if $options.skip
55 --skip_bloom_filter
56 #end if
57 #if $options.kmer
58 --kmer $options.kmer
59 #end if
60 #if $options.min_count
61 --min_count $options.min_count
62 #end if
63 #if $options.max_kmer_count
64 --max_kmer_count $options.max_kmer_count
65 #end if
66 #if $options.vector_percent
67 --vector_percent $options.vector_percent
68 #end if
69 #if $options.insert_size
70 --insert_size $options.insert.size
71 #end if
72 #if $options.steps
73 --steps $options.steps
74 #end if
75 #if $options.fraction
76 --fraction $options.fraction
77 #end if
78 #if $options.max_snp_len
79 --max_snp_len $options.max_snp_len
80 #end if
81 #if $options.min_contig
82 --min_contig $options.min_contig
83 #end if
84 #if $options.allow_snps
85 --allow_snps
86 #end if
87 #end if
88
89 > ${outname}.fasta;
90
91 bwa index ${outname}.fasta;
92 bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
93
94 mlst --nopath --threads $num_cores --datadir $mlst_databases.fields.path/pubmlst --blastdb $mlst_databases.fields.path/blast/mlst.fa
95 #if $options.select=="advanced"
96 #if $options.minid
97 --minid $options.minid
98 #end if
99 #if $options.mincov
100 --mincov $options.mincov
101 #end if
102 #if $options.minscore
103 --minscore $options.minscore
104 #end if
105 #end if
106 ${outname}.fasta > ${outname}.mlst_raw.tsv;
107
108 python $__tool_directory__/mlstAddFields.py ${outname}.mlst_raw.tsv $mlst_databases.fields.path/pubmlst > ${outname}.mlst.tsv;
109
110 python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
111
112 python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
113
114 ]]></command>
115 <inputs>
116 <conditional name="jobtype">
117 <param name="select" type="select" label="Select Input">
118 <option value="fastq_fr">Forward and Reverse FASTQ</option>
119 <option value="fastq_pair">Paired FASTQ Collection</option>
120 </param>
121 <when value="fastq_fr">
122 <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" />
123 <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" />
124 </when>
125 <when value="fastq_pair">
126 <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" />
127 </when>
128 </conditional>
129
130 <conditional name="options">
131 <param name="select" type="select" label="Options Type">
132 <option value="basic">Basic</option>
133 <option value="advanced">Advanced</option>
134 </param>
135 <when value="advanced">
136 <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/>
137 <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/>
138 <param name="hash_count" optional="true" type="boolean" label="hash counter"/>
139 <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/>
140 <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/>
141 <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/>
142 <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/>
143 <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/>
144 <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value="">
145 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
146 </param>
147 <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/>
148 <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/>
149 <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value="">
150 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
151 </param>
152 <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/>
153 <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/>
154 <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/>
155 <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" />
156 <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" />
157 <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" />
158 </when>
159 <when value="basic"/>
160 </conditional>
161
162 <param name="mlst_databases" label="Select a mlst database" type="select">
163 <options from_data_table="mlst">
164 <validator message="No database is available" type="no_options" />
165 </options>
166 </param>
167
168 </inputs>
169 <outputs>
170 <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/>
171 <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/>
172 <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/>
173 </outputs>
174
175 <help><![CDATA[
176
177 ]]></help>
178 <citations>
179 <citation type="bibtex">
180 @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
181 title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
182 based on DeBruijn graphs. It uses conservative heuristics and is designed to
183 create breaks at repeat regions in the genome. This leads to excellent sequence
184 quality but not necessarily a large N50 statistic. It is a multi-threaded
185 application that scales well with the number of processors. For different runs
186 with the same inputs, including the order of reads, the order and orientation
187 of contigs in the output is deterministic. },
188 url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
189 author={National Center for Biotechnology Information },
190 }</citation>
191
192 <citation type="bibtex">
193 @UNPUBLISHED{Seemann2016,
194 author = "Seemann T",
195 title = "MLST: Scan contig files against PubMLST typing schemes",
196 year = "2016",
197 url = {https://github.com/tseemann/mlst}
198 }</citation>
199 </citations>
200 </tool>