annotate fastq_dump.xml @ 1:c9fb9ecae076

planemo upload
author jasmine_amir
date Thu, 02 Feb 2023 18:29:47 -0500
parents 878e5d4becef
children
rev   line source
jasmine_amir@1 1 <tool id="fastq_dump" name="Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
jasmine_amir@0 2 <description>format from NCBI SRA</description>
jasmine_amir@0 3 <macros>
jasmine_amir@0 4 <import>macros.xml</import>
jasmine_amir@0 5 </macros>
jasmine_amir@0 6 <expand macro="edam_ontology"/>
jasmine_amir@0 7 <expand macro="bio_tools"/>
jasmine_amir@0 8 <expand macro="requirements"/>
jasmine_amir@0 9 <version_command>fastq-dump --version | tr -d $'\n'</version_command>
jasmine_amir@0 10 <command detect_errors="exit_code"><![CDATA[
jasmine_amir@0 11 @COPY_CONFIGFILE@
jasmine_amir@0 12 @SET_ACCESSIONS@
jasmine_amir@0 13
jasmine_amir@0 14 #if $input.input_select == "sra_file":
jasmine_amir@0 15 fastq-dump --log-level fatal --accession '${input.file.name}'
jasmine_amir@0 16 #else:
jasmine_amir@0 17 ## Do not use prefetch if region is specified, to avoid downloading
jasmine_amir@0 18 ## the complete sra file.
jasmine_amir@0 19 #if ( str( $adv.region ) == "" ) and ( str( $adv.minID ) == "" ) and ( str( $adv.maxID ) == "" ):
jasmine_amir@0 20 prefetch -X 200000000 "\$acc" &&
jasmine_amir@0 21 #end if
jasmine_amir@0 22 fastq-dump --accession "\$acc"
jasmine_amir@0 23 --split-files
jasmine_amir@0 24 #end if
jasmine_amir@0 25 --defline-seq '@\$sn[_\$rn]/\$ri'
jasmine_amir@0 26 --defline-qual '+'
jasmine_amir@0 27
jasmine_amir@0 28 $adv.split
jasmine_amir@0 29 #if str( $adv.alignments ) == "aligned":
jasmine_amir@0 30 --aligned
jasmine_amir@0 31 #end if
jasmine_amir@0 32 #if str( $adv.alignments ) == "unaligned":
jasmine_amir@0 33 --unaligned
jasmine_amir@0 34 #end if
jasmine_amir@0 35 #if str( $adv.minID ) != "":
jasmine_amir@0 36 --minSpotId "$adv.minID"
jasmine_amir@0 37 #end if
jasmine_amir@0 38 #if str( $adv.maxID ) != "":
jasmine_amir@0 39 --maxSpotId "$adv.maxID"
jasmine_amir@0 40 #end if
jasmine_amir@0 41 #if str( $adv.minlen ) != "":
jasmine_amir@0 42 --minReadLen "$adv.minlen"
jasmine_amir@0 43 #end if
jasmine_amir@0 44 #if str( $adv.readfilter ) != "":
jasmine_amir@0 45 --read-filter "$adv.readfilter"
jasmine_amir@0 46 #end if
jasmine_amir@0 47 #if str( $adv.region ) != "":
jasmine_amir@0 48 --aligned-region "$adv.region"
jasmine_amir@0 49 #end if
jasmine_amir@0 50 #if str( $adv.spotgroups ) != "":
jasmine_amir@0 51 --spot-groups "$adv.spotgroups"
jasmine_amir@0 52 #end if
jasmine_amir@0 53 #if str( $adv.matepairDist ) != "":
jasmine_amir@0 54 --matepair-distance "$adv.matepairDist"
jasmine_amir@0 55 #end if
jasmine_amir@0 56 $adv.clip
jasmine_amir@0 57 $adv.skip_technical
jasmine_amir@0 58
jasmine_amir@0 59 #if str( $outputformat ) == "fastqsanger.gz":
jasmine_amir@0 60 --gzip
jasmine_amir@0 61 #elif str( $outputformat ) == "fastqsanger.bz2":
jasmine_amir@0 62 --bzip2
jasmine_amir@0 63 #end if
jasmine_amir@0 64
jasmine_amir@0 65 #if str($adv.table) != "":
jasmine_amir@0 66 --table $adv.table
jasmine_amir@0 67 #end if
jasmine_amir@0 68 ;
jasmine_amir@0 69
jasmine_amir@0 70 mkdir -p output &&
jasmine_amir@0 71 data=(\$(ls ./*.fast*));
jasmine_amir@0 72 if [ \${\#data[@]} -eq 2 ]; then
jasmine_amir@0 73 mv "\${data[0]}" output/"\${data[0]}"_forward.$outputformat;
jasmine_amir@0 74 mv "\${data[1]}" output/"\${data[1]}"_reverse.$outputformat;
jasmine_amir@0 75 elif [ \${\#data[@]} -eq 1 ]; then
jasmine_amir@0 76 mv "\${data[0]}" output/"\${data[0]}"__single.$outputformat;
jasmine_amir@0 77 fi;
jasmine_amir@0 78
jasmine_amir@0 79 #if $input.input_select != "sra_file":
jasmine_amir@0 80 ); done;
jasmine_amir@0 81 #end if
jasmine_amir@0 82 echo "Done with all accessions."
jasmine_amir@0 83 ]]>
jasmine_amir@0 84 </command>
jasmine_amir@0 85 <expand macro="configfile_hack"/>
jasmine_amir@0 86 <inputs>
jasmine_amir@0 87 <expand macro="input_conditional"/>
jasmine_amir@0 88 <param name="outputformat" type="select" display="radio" label="Select output format" help="Compression will greatly reduce the amount of space occupied by downloaded data. Downstream applications such as a short-read mappers will accept compressed data as input. Consider this example: an uncoimpressed 400 Mb fastq datasets compresses to 100 Mb or 80 Mb by gzip or bzip2, respectively. " argument="--gzip --bzip2">
jasmine_amir@0 89 <option value="fastqsanger.gz">gzip compressed fastq</option>
jasmine_amir@0 90 <option value="fastqsanger">Uncompressed fastq</option>
jasmine_amir@0 91 <option value="fastqsanger.bz2">bzip2 compressed fastq</option>
jasmine_amir@0 92 </param>
jasmine_amir@0 93 <section name="adv" title="Advanced Options" expanded="False">
jasmine_amir@0 94 <param name="minID" type="integer" label="Minimum spot ID" optional="true" help="Minimum spot id to be dumped." argument="--minSpotId"/>
jasmine_amir@0 95 <param name="maxID" type="integer" label="Maximum spot ID" optional="true" help="Maximum spot id to be dumped." argument="--maxSpotId"/>
jasmine_amir@0 96 <param name="minlen" type="integer" label="Minimum read length" optional="true" help="Filter by sequence length. Will dump only reads longer or equal to this value." argument="--minReadLen"/>
jasmine_amir@0 97 <param name="split" type="boolean" checked="true" truevalue="--split-spot" falsevalue="" label="Split spot by read pairs" help="Split spots into individual reads." argument="--split-spot"/>
jasmine_amir@0 98 <expand macro="alignments"/>
jasmine_amir@0 99 <expand macro="region"/>
jasmine_amir@0 100 <expand macro="matepairDist"/>
jasmine_amir@0 101 <param name="readfilter" type="select" value="" label="filter by value" argument="--read-filter">
jasmine_amir@0 102 <option value="">None</option>
jasmine_amir@0 103 <option value="pass">pass</option>
jasmine_amir@0 104 <option value="reject">reject</option>
jasmine_amir@0 105 <option value="criteria">criteria</option>
jasmine_amir@0 106 <option value="redacted">redacted</option>
jasmine_amir@0 107 </param>
jasmine_amir@0 108 <param name="spotgroups" type="text" label="Filter by spot-groups" optional="true" argument="--spot-groups"/>
jasmine_amir@0 109 <param type="boolean" truevalue="--clip" falsevalue="" argument="--clip" label="Apply left and right clips" />
jasmine_amir@0 110 <param type="boolean" truevalue="--skip-technical" falsevalue="" checked="False" label="Dump only biological reads" argument="--skip-technical"/>
jasmine_amir@0 111 <param label="Table name within cSRA object" type="text" value="" optional="true" help="For SRA of noisy long-reads put SEQUENCE" argument="--table"/>
jasmine_amir@0 112 </section>
jasmine_amir@0 113 </inputs>
jasmine_amir@0 114 <outputs>
jasmine_amir@0 115 <collection name="list_paired" type="list:paired" label="Paired-end data (fastq-dump)">
jasmine_amir@0 116 <!-- Use named regex group to grab pattern
jasmine_amir@0 117 <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list
jasmine_amir@0 118 identifier in the nested collection and identifier_1 is either
jasmine_amir@0 119 forward or reverse (for instance samp1_forward.fq).
jasmine_amir@0 120 -->
jasmine_amir@0 121 <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger" ext="fastqsanger" directory="output"/>
jasmine_amir@0 122 <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.gz_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.gz" ext="fastqsanger.gz" directory="output"/>
jasmine_amir@0 123 <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.bz2_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.bz2" ext="fastqsanger.bz2" directory="output"/>
jasmine_amir@0 124 </collection>
jasmine_amir@0 125 <collection name="list_single" type='list' label="Single-end data (fastq-dump)">
jasmine_amir@0 126 <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq__single\.fastqsanger" directory="output" ext='fastqsanger'/>
jasmine_amir@0 127 <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.gz__single\.fastqsanger.gz" directory="output" ext='fastqsanger.gz'/>
jasmine_amir@0 128 <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.bz2__single\.fastqsanger.bz2" directory="output" ext='fastqsanger.bz2'/>
jasmine_amir@0 129 </collection>
jasmine_amir@0 130 </outputs>
jasmine_amir@0 131 <tests>
jasmine_amir@0 132 <test expect_num_outputs="2">
jasmine_amir@0 133 <param name="input_select" value="accession_number"/>
jasmine_amir@0 134 <param name="outputformat" value="fastqsanger"/>
jasmine_amir@0 135 <param name="accession" value="SRR044777"/>
jasmine_amir@0 136 <param name="skip_technical" value="True"/>
jasmine_amir@0 137 <output_collection name="list_single" type="list" count="1">
jasmine_amir@0 138 <element name="SRR044777">
jasmine_amir@0 139 <assert_contents>
jasmine_amir@0 140 <not_has_text text="rRNA_primer"/>
jasmine_amir@0 141 <has_text text="F47USSH02GNP1D"/>
jasmine_amir@0 142 </assert_contents>
jasmine_amir@0 143 </element>
jasmine_amir@0 144 </output_collection>
jasmine_amir@0 145 </test>
jasmine_amir@0 146 <test expect_num_outputs="2">
jasmine_amir@0 147 <param name="input_select" value="accession_number"/>
jasmine_amir@0 148 <param name="outputformat" value="fastqsanger.gz"/>
jasmine_amir@0 149 <param name="accession" value="SRR925743"/>
jasmine_amir@0 150 <param name="maxID" value="5"/>
jasmine_amir@0 151 <output_collection name="list_paired" type="list:paired" count="1">
jasmine_amir@0 152 <element name="SRR925743">
jasmine_amir@0 153 <element name="forward" file="SRR925743_forward.fastqsanger" decompress="True"/>
jasmine_amir@0 154 <element name="reverse" file="SRR925743_reverse.fastqsanger" decompress="True"/>
jasmine_amir@0 155 </element>
jasmine_amir@0 156 </output_collection>
jasmine_amir@0 157 </test>
jasmine_amir@0 158 <test expect_num_outputs="2">
jasmine_amir@0 159 <param name="input_select" value="accession_number"/>
jasmine_amir@0 160 <param name="outputformat" value="fastqsanger"/>
jasmine_amir@0 161 <param name="accession" value="SRR925743"/>
jasmine_amir@0 162 <param name="maxID" value="5"/>
jasmine_amir@0 163 <output_collection name="list_paired" type="list:paired" count="1">
jasmine_amir@0 164 <element name="SRR925743">
jasmine_amir@0 165 <element name="forward" file="SRR925743_forward.fastqsanger"/>
jasmine_amir@0 166 <element name="reverse" file="SRR925743_reverse.fastqsanger"/>
jasmine_amir@0 167 </element>
jasmine_amir@0 168 </output_collection>
jasmine_amir@0 169 </test>
jasmine_amir@0 170 <test expect_num_outputs="2">
jasmine_amir@0 171 <param name="input_select" value="file_list"/>
jasmine_amir@0 172 <param name="outputformat" value="fastqsanger"/>
jasmine_amir@0 173 <param name="file_list" value="list_pe"/>
jasmine_amir@0 174 <param name="maxID" value="5"/>
jasmine_amir@0 175 <output_collection name="list_paired" type="list:paired" count="1">
jasmine_amir@0 176 <element name="DRR015708">
jasmine_amir@0 177 <element name="forward" file="DRR015708_forward.fastqsanger"/>
jasmine_amir@0 178 <element name="reverse" file="DRR015708_reverse.fastqsanger"/>
jasmine_amir@0 179 </element>
jasmine_amir@0 180 </output_collection>
jasmine_amir@0 181 </test>
jasmine_amir@0 182 <test expect_num_outputs="2">
jasmine_amir@0 183 <param name="input_select" value="file_list"/>
jasmine_amir@0 184 <param name="outputformat" value="fastqsanger"/>
jasmine_amir@0 185 <param name="file_list" value="list_pe2"/>
jasmine_amir@0 186 <param name="maxID" value="5"/>
jasmine_amir@0 187 <output_collection name="list_paired" type="list:paired" count="1">
jasmine_amir@0 188 <element name="ERR027433">
jasmine_amir@0 189 <element name="forward" file="ERR027433_forward.fastqsanger"/>
jasmine_amir@0 190 <element name="reverse" file="ERR027433_reverse.fastqsanger"/>
jasmine_amir@0 191 </element>
jasmine_amir@0 192 </output_collection>
jasmine_amir@0 193 </test>
jasmine_amir@0 194 <test expect_num_outputs="2">
jasmine_amir@0 195 <param name="input_select" value="file_list"/>
jasmine_amir@0 196 <param name="outputformat" value="fastqsanger"/>
jasmine_amir@0 197 <param name="file_list" value="list_se"/>
jasmine_amir@0 198 <param name="maxID" value="5"/>
jasmine_amir@0 199 <output_collection name="list_single" type="list" count="1">
jasmine_amir@0 200 <element name="SRR1993644" file="SRR1993644.fastqsanger"/>
jasmine_amir@0 201 </output_collection>
jasmine_amir@0 202 </test>
jasmine_amir@0 203 <test expect_num_outputs="2">
jasmine_amir@0 204 <param name="input_select" value="accession_number"/>
jasmine_amir@0 205 <param name="outputformat" value="fastqsanger.gz"/>
jasmine_amir@0 206 <param name="accession" value="SRR6982805"/>
jasmine_amir@0 207 <param name="maxID" value="2"/>
jasmine_amir@0 208 <param name="table" value="SEQUENCE"/>
jasmine_amir@0 209 <output_collection name="list_single" type="list" count="1">
jasmine_amir@0 210 <element name="SRR6982805" file="SRR6982805.fastqsanger.gz" ftype="fastqsanger.gz" decompress="True"/>
jasmine_amir@0 211 </output_collection>
jasmine_amir@0 212 </test>
jasmine_amir@0 213 <test expect_num_outputs="2">
jasmine_amir@0 214 <param name="input_select" value="accession_number"/>
jasmine_amir@0 215 <param name="outputformat" value="fastqsanger.gz"/>
jasmine_amir@0 216 <param name="accession" value="ERR086330, SRR11953971"/>
jasmine_amir@0 217 <output_collection name="list_paired" type="list:paired" count="2">
jasmine_amir@0 218 <element name="ERR086330">
jasmine_amir@0 219 <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
jasmine_amir@0 220 <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
jasmine_amir@0 221 </element>
jasmine_amir@0 222 <element name="SRR11953971">
jasmine_amir@0 223 <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
jasmine_amir@0 224 <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
jasmine_amir@0 225 </element>
jasmine_amir@0 226 </output_collection>
jasmine_amir@0 227 </test>
jasmine_amir@0 228 </tests>
jasmine_amir@0 229 <help><![CDATA[
jasmine_amir@0 230 **What it does?**
jasmine_amir@0 231
jasmine_amir@0 232 This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit. The following applies:
jasmine_amir@0 233
jasmine_amir@0 234 - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates.
jasmine_amir@0 235 - if data is single ended, each element of the collection will be a single fastq_ dataset.
jasmine_amir@0 236
jasmine_amir@0 237
jasmine_amir@0 238 @HOW_TO_USE_IT@
jasmine_amir@0 239
jasmine_amir@0 240 -----
jasmine_amir@0 241
jasmine_amir@0 242 **Output**
jasmine_amir@0 243
jasmine_amir@0 244 In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
jasmine_amir@0 245 Some collections may be empty if the accessions provided in the list do not contain one of the type of data.
jasmine_amir@0 246
jasmine_amir@0 247 .. class:: warningmark
jasmine_amir@0 248
jasmine_amir@0 249 When you decide to dump technical reads (in Advanced Options Dump only biological reads is set to No), you will probably find your PAIRED data in the other data collection as it is impossible to determine if it was 2 biological reads or one biological and one technical.
jasmine_amir@0 250
jasmine_amir@0 251 .. class:: warningmark
jasmine_amir@0 252
jasmine_amir@0 253 By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
jasmine_amir@0 254 To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
jasmine_amir@0 255
jasmine_amir@0 256 @ACCESSION_LIST_HOWTO@
jasmine_amir@0 257
jasmine_amir@0 258 -----
jasmine_amir@0 259
jasmine_amir@0 260
jasmine_amir@0 261 .. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
jasmine_amir@0 262 .. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump
jasmine_amir@0 263 .. _collection: https://galaxyproject.org/tutorials/collections/
jasmine_amir@0 264 .. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
jasmine_amir@0 265
jasmine_amir@0 266 @SRATOOLS_ATTRRIBUTION@
jasmine_amir@0 267 ]]>
jasmine_amir@0 268 </help>
jasmine_amir@0 269 <expand macro="citation"/>
jasmine_amir@0 270 </tool>