Mercurial > repos > kkonganti > cfsan_centriflaken
comparison cfsan_centriflaken.xml @ 0:77494b0fa3c7
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 27 Jun 2022 15:55:37 -0400 |
parents | |
children | e0d902b50cff |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:77494b0fa3c7 |
---|---|
1 <tool id="cfsan_centriflaken" name="Centriflaken" version="0.2.0+galaxy0"> | |
2 <description>An automated pipeline to generate a MAG of interest (E.coli or Salmonella) and perform serotyping.</description> | |
3 <requirements> | |
4 <requirement type="package" version="22.04">nextflow</requirement> | |
5 <requirement type="package">graphviz</requirement> | |
6 </requirements> | |
7 <version_command>nextflow -version</version_command> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 mkdir -p cpipes-input | |
10 #for $input in $LIST | |
11 ln -sf '$input' './cpipes-input/${input.element_identifier}'; | |
12 #end for | |
13 pwd_path=\$(pwd) | |
14 $__tool_directory__/0.2.1/cpipes | |
15 #if (reads.type == "long"): | |
16 --pipeline $pipeline | |
17 #else: | |
18 --pipeline $pipeline | |
19 #end if | |
20 --input \${pwd_path}/cpipes-input | |
21 --output \${pwd_path}/cpipes-output | |
22 #if ($reads_lib.paired_end == "true"): | |
23 --fq_single_end false | |
24 --fq_suffix '${fq_suffix}' | |
25 --fq2_suffix '${fq2_suffix}' | |
26 #else: | |
27 --fq_single_end true | |
28 --fq_suffix '${fq_suffix}' | |
29 #end if | |
30 --fq_filename_delim '${fq_filename_delim}' | |
31 --fq_filename_delim_idx $fq_filename_delim_idx | |
32 --centrifuge_extract_bug '${centrifuge_extract_bug}' | |
33 --flye_genome_size '${genome_size}' | |
34 -profile $profile | |
35 ]]></command> | |
36 <inputs> | |
37 <param name="input" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastqsanger" label="Input reads" /> | |
38 <conditional name="reads"> | |
39 <param name="type" type="select" label="Sequencing Read Library Type" value="long"> | |
40 <option value="long">Long reads</option> | |
41 <option value="short">Short reads</option> | |
42 </param> | |
43 <when value="short"> | |
44 <conditional name="reads_lib"> | |
45 <param name="paired_end" type="select" label="Sequencing Read Library Layout" value="false"> | |
46 <option value="false">Short read Single-End or Long reads</option> | |
47 <option value="true">Short read Paired-End</option> | |
48 </param> | |
49 <when value="true"> | |
50 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the FASTQ R1 file of Paired-End reads."/> | |
51 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the FASTQ R2 file of Paired-End reads."/> | |
52 </when> | |
53 <when value="false"> | |
54 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the FASTQ R1 file of Paired-End reads."/> | |
55 </when> | |
56 </conditional> | |
57 </when> | |
58 <when value="long"> | |
59 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the FASTQ file of Long reads."/> | |
60 </when> | |
61 </conditional> | |
62 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" | |
63 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)"/> | |
64 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delimitor_idx)" /> | |
65 <param name="centrifuge_extract_bug" type="text" value="Escherichia coli" label="Reads belonging to this taxa are extracted and a MAG is generated to allow for serotyping"/> | |
66 <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g."> | |
67 <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> | |
68 </param> | |
69 <param name="runtime_profile" type="select" label="Run time profile" value="kondagac"> | |
70 <option value="kondagac">conda</option> | |
71 <option value="cingularitygac">singularity</option> | |
72 </param> | |
73 </inputs> | |
74 <outputs> | |
75 <data name="multiqc_report" format="html" label="MultiQC Report on ${on_string}"> | |
76 <discover_datasets pattern="multiqc_report.html" assign_primary_output="true" directory="cpipes-output"/> | |
77 </data> | |
78 <data name="assembled_mags" format="fasta" label="CENTRIFLAKEN: Assembled MAGs"> | |
79 <discover_datasets pattern=".*\.assembly_filtered_contigs.fasta" visible="true" directory="cpipes-output"/> | |
80 </data> | |
81 </outputs> | |
82 <help><![CDATA[ | |
83 | |
84 .. class:: infomark | |
85 | |
86 **Purpose** | |
87 | |
88 Centriflaken suite of automated data analysis pipelines based on Nextflow DSL2 developed at CFSAN, FDA. Thess piepelines allow rapid | |
89 and effective construction of metagenomic assembled genomes (MAGs) to enable bacterial source-tracking. It is based on methods described in our | |
90 previous publication (https://doi.org/10.1371/journal.pone.0245172). | |
91 ---- | |
92 | |
93 .. class:: infomark | |
94 | |
95 **Testing and Validation** | |
96 | |
97 The pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs | |
98 in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs | |
99 of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. | |
100 The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection | |
101 and classification of STECs for each sample. We tested the pipeline with nanopore data obtained from 21 additional enriched samples from | |
102 irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was | |
103 done on the command line on the CFSAN Raven2 HPC Cluster. | |
104 | |
105 | |
106 ---- | |
107 | |
108 .. class:: infomark | |
109 | |
110 **Outputs** | |
111 | |
112 The main output files are: | |
113 | |
114 :: | |
115 | |
116 - MultiQC Report: Contains a brief summary report including any serotyping and AMR result tables. | |
117 - Final assembly: contains contigs and possibly scaffolds (see below). | |
118 | |
119 ]]></help> | |
120 <citations> | |
121 <citation type="bibtex"> | |
122 @misc{gitlabCPIPES, | |
123 author = {Konganti, Kranti}, | |
124 year = {2022}, | |
125 title = {CPIPES - Centriflaken}, | |
126 publisher = {GitLab}, | |
127 journal = {GitLab repository}, | |
128 url = {https://cfsan-git.fda.gov/Kranti.Konganti/cpipes}} | |
129 </citation> | |
130 </citations> | |
131 </tool> |