comparison cfsan_centriflaken.xml @ 47:30191f39a957

"planemo upload"
author kkonganti
date Mon, 11 Jul 2022 12:19:48 -0400
parents 0dd0ebe5cddf
children 8202a3cedcc4
comparison
equal deleted inserted replaced
46:0dd0ebe5cddf 47:30191f39a957
41 --fq_filter_by_len $fq_filter_by_len 41 --fq_filter_by_len $fq_filter_by_len
42 #end if 42 #end if
43 --fq_filename_delim '${fq_filename_delim}' 43 --fq_filename_delim '${fq_filename_delim}'
44 --fq_filename_delim_idx $fq_filename_delim_idx 44 --fq_filename_delim_idx $fq_filename_delim_idx
45 --centrifuge_extract_bug '${centrifuge_extract_bug}' 45 --centrifuge_extract_bug '${centrifuge_extract_bug}'
46 -profile $runtime_profile 46 -profile kondagac;
47 -resume;
48 mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html'; 47 mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html';
49 mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs; 48 mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs;
50 rm -rf ./cpipes-output; 49 rm -rf ./cpipes-output;
51 rm -rf ./work 50 rm -rf ./work
52 ]]></command> 51 ]]></command>
70 <option value="pacbio_raw">PacBio regular CLR reads (&lt;20% error)</option> 69 <option value="pacbio_raw">PacBio regular CLR reads (&lt;20% error)</option>
71 <option value="pacbio_corr">PacBio reads that were corrected with other methods (&lt;3% error)</option> 70 <option value="pacbio_corr">PacBio reads that were corrected with other methods (&lt;3% error)</option>
72 <option value="pacbio_hifi">PacBio HiFi reads (&lt;1% error)</option> 71 <option value="pacbio_hifi">PacBio HiFi reads (&lt;1% error)</option>
73 </param> 72 </param>
74 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the R1 FASTQ or Unpaired FASTQ"/> 73 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the R1 FASTQ or Unpaired FASTQ"/>
75 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"/> 74 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"
75 help="THIS OPTION IS IGNORED IF THE INPUT READS ARE UNPAIRED/LONG READS."/>
76 <param name="fq_filter_by_len" optional="true" value="" type="integer" label="Enter minimum read length to retain before starting the analysis" 76 <param name="fq_filter_by_len" optional="true" value="" type="integer" label="Enter minimum read length to retain before starting the analysis"
77 help="Keep this option empty to use default values. Default for centriflaken (long reads) is 4000 bp and for centriflaken_hy (short reads) is 75 bp)"/> 77 help="Keep this option empty to use default values. Default for centriflaken (long reads) is 4000 bp and for centriflaken_hy (short reads) is 75 bp)"/>
78 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" 78 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)"
79 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)"/> 79 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)"/>
80 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" /> 80 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" />
81 <param name="centrifuge_extract_bug" type="text" value="Escherichia coli" label="Reads belonging to this taxa are extracted and a MAG is generated to allow for serotyping"/> 81 <param name="centrifuge_extract_bug" type="text" value="Escherichia coli" label="Reads belonging to this taxa are extracted and a MAG is generated to allow for serotyping"/>
82 <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g."> 82 <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g.">
83 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> 83 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator>
84 </param> 84 </param>
85 <param name="runtime_profile" type="select" label="Run time profile"> 85 <!-- <param name="runtime_profile" type="select" label="Run time profile">
86 <option value="kondagac" selected="true">conda</option> 86 <option value="kondagac" selected="true">conda</option>
87 <option value="cingularitygac">singularity</option> 87 <option value="cingularitygac">singularity</option>
88 </param> 88 </param> -->
89 </inputs> 89 </inputs>
90 <outputs> 90 <outputs>
91 <data name="multiqc_report" format="html" label="${pipeline}: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/> 91 <data name="multiqc_report" format="html" label="${pipeline}: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/>
92 <collection name="assembled_mags" type="list" label="${pipeline}: Assembled MAGs on ${on_string}"> 92 <collection name="assembled_mags" type="list" label="${pipeline}: Assembled MAGs on ${on_string}">
93 <discover_datasets pattern="(?P&lt;name&gt;.*)\.assembly_filtered_contigs\.fasta" ext="fasta" directory="kraken2_extract_contigs"/> 93 <discover_datasets pattern="(?P&lt;name&gt;.*)\.assembly_filtered_contigs\.fasta" ext="fasta" directory="kraken2_extract_contigs"/>
122 122
123 .. class:: infomark 123 .. class:: infomark
124 124
125 **Testing and Validation** 125 **Testing and Validation**
126 126
127 The pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs 127 The CPIPES - Centriflaken Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs
128 in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs 128 in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs
129 of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. 129 of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline.
130 The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection 130 The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection
131 and classification of STECs for each sample. We tested the pipeline with nanopore data obtained from 21 additional enriched samples from 131 and classification of STECs for each sample. We tested the pipeline with Nanopore data obtained from 21 additional enriched samples from
132 irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was 132 irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was
133 done on the command line on the CFSAN Raven2 HPC Cluster. 133 done on the command line on the CFSAN Raven2 HPC Cluster.
134 134
135 135
136 ---- 136 ----