Mercurial > repos > kkonganti > cfsan_centriflaken
comparison cfsan_centriflaken.xml @ 47:30191f39a957
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 11 Jul 2022 12:19:48 -0400 |
parents | 0dd0ebe5cddf |
children | 8202a3cedcc4 |
comparison
equal
deleted
inserted
replaced
46:0dd0ebe5cddf | 47:30191f39a957 |
---|---|
41 --fq_filter_by_len $fq_filter_by_len | 41 --fq_filter_by_len $fq_filter_by_len |
42 #end if | 42 #end if |
43 --fq_filename_delim '${fq_filename_delim}' | 43 --fq_filename_delim '${fq_filename_delim}' |
44 --fq_filename_delim_idx $fq_filename_delim_idx | 44 --fq_filename_delim_idx $fq_filename_delim_idx |
45 --centrifuge_extract_bug '${centrifuge_extract_bug}' | 45 --centrifuge_extract_bug '${centrifuge_extract_bug}' |
46 -profile $runtime_profile | 46 -profile kondagac; |
47 -resume; | |
48 mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html'; | 47 mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html'; |
49 mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs; | 48 mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs; |
50 rm -rf ./cpipes-output; | 49 rm -rf ./cpipes-output; |
51 rm -rf ./work | 50 rm -rf ./work |
52 ]]></command> | 51 ]]></command> |
70 <option value="pacbio_raw">PacBio regular CLR reads (<20% error)</option> | 69 <option value="pacbio_raw">PacBio regular CLR reads (<20% error)</option> |
71 <option value="pacbio_corr">PacBio reads that were corrected with other methods (<3% error)</option> | 70 <option value="pacbio_corr">PacBio reads that were corrected with other methods (<3% error)</option> |
72 <option value="pacbio_hifi">PacBio HiFi reads (<1% error)</option> | 71 <option value="pacbio_hifi">PacBio HiFi reads (<1% error)</option> |
73 </param> | 72 </param> |
74 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the R1 FASTQ or Unpaired FASTQ"/> | 73 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the R1 FASTQ or Unpaired FASTQ"/> |
75 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"/> | 74 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ" |
75 help="THIS OPTION IS IGNORED IF THE INPUT READS ARE UNPAIRED/LONG READS."/> | |
76 <param name="fq_filter_by_len" optional="true" value="" type="integer" label="Enter minimum read length to retain before starting the analysis" | 76 <param name="fq_filter_by_len" optional="true" value="" type="integer" label="Enter minimum read length to retain before starting the analysis" |
77 help="Keep this option empty to use default values. Default for centriflaken (long reads) is 4000 bp and for centriflaken_hy (short reads) is 75 bp)"/> | 77 help="Keep this option empty to use default values. Default for centriflaken (long reads) is 4000 bp and for centriflaken_hy (short reads) is 75 bp)"/> |
78 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" | 78 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" |
79 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)"/> | 79 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)"/> |
80 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" /> | 80 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" /> |
81 <param name="centrifuge_extract_bug" type="text" value="Escherichia coli" label="Reads belonging to this taxa are extracted and a MAG is generated to allow for serotyping"/> | 81 <param name="centrifuge_extract_bug" type="text" value="Escherichia coli" label="Reads belonging to this taxa are extracted and a MAG is generated to allow for serotyping"/> |
82 <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g."> | 82 <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g."> |
83 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> | 83 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> |
84 </param> | 84 </param> |
85 <param name="runtime_profile" type="select" label="Run time profile"> | 85 <!-- <param name="runtime_profile" type="select" label="Run time profile"> |
86 <option value="kondagac" selected="true">conda</option> | 86 <option value="kondagac" selected="true">conda</option> |
87 <option value="cingularitygac">singularity</option> | 87 <option value="cingularitygac">singularity</option> |
88 </param> | 88 </param> --> |
89 </inputs> | 89 </inputs> |
90 <outputs> | 90 <outputs> |
91 <data name="multiqc_report" format="html" label="${pipeline}: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/> | 91 <data name="multiqc_report" format="html" label="${pipeline}: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/> |
92 <collection name="assembled_mags" type="list" label="${pipeline}: Assembled MAGs on ${on_string}"> | 92 <collection name="assembled_mags" type="list" label="${pipeline}: Assembled MAGs on ${on_string}"> |
93 <discover_datasets pattern="(?P<name>.*)\.assembly_filtered_contigs\.fasta" ext="fasta" directory="kraken2_extract_contigs"/> | 93 <discover_datasets pattern="(?P<name>.*)\.assembly_filtered_contigs\.fasta" ext="fasta" directory="kraken2_extract_contigs"/> |
122 | 122 |
123 .. class:: infomark | 123 .. class:: infomark |
124 | 124 |
125 **Testing and Validation** | 125 **Testing and Validation** |
126 | 126 |
127 The pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs | 127 The CPIPES - Centriflaken Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs |
128 in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs | 128 in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs |
129 of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. | 129 of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. |
130 The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection | 130 The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection |
131 and classification of STECs for each sample. We tested the pipeline with nanopore data obtained from 21 additional enriched samples from | 131 and classification of STECs for each sample. We tested the pipeline with Nanopore data obtained from 21 additional enriched samples from |
132 irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was | 132 irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was |
133 done on the command line on the CFSAN Raven2 HPC Cluster. | 133 done on the command line on the CFSAN Raven2 HPC Cluster. |
134 | 134 |
135 | 135 |
136 ---- | 136 ---- |