comparison aws_sra.xml @ 30:73ee30eb273a draft

planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit d0c08749588099d40db3c23bfd554800ac307a30
author galaxytrakr
date Tue, 24 Mar 2026 03:09:41 +0000
parents 569a598c7e68
children 90ae4d437133
comparison
equal deleted inserted replaced
29:569a598c7e68 30:73ee30eb273a
1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt_0.8" profile="23.0"> 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt_0.50" profile="22.01">
2 <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description> 2 <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description>
3 3
4 <requirements> 4 <requirements>
5 <requirement type="package" version="3.1.1">sra-tools</requirement>
6 <requirement type="package" version="2.8">pigz</requirement>
5 <requirement type="package" version="2.34.8">awscli</requirement> 7 <requirement type="package" version="2.34.8">awscli</requirement>
6 <requirement type="package" version="3.2.1">sra-tools</requirement>
7 <requirement type="package" version="2.8">pigz</requirement>
8 </requirements> 8 </requirements>
9 9
10 <version_command>fasterq-dump --version</version_command> 10 <version_command>fasterq-dump --version</version_command>
11 11
12 <command detect_errors="aggressive"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 ## Create a clean list of accessions from the user input 13 #if $input.input_select == "accession_number":
14 echo "$accession" | sed 's/,/\n/g; s/ \+/\n/g' | grep . > accessions.txt && 14 echo '${input.accession}' | sed -r 's/(\,|\;|__cn__)/\n/g' > accessions &&
15 15 #else:
16 ## Loop over each clean accession 16 grep '^[[:space:]]*[ESD]RR[0-9]\{1,\}[[:space:]]*$' '${input.file_list}' > accessions &&
17 for acc in $(cat accessions.txt); 17 #end if
18 do 18 mkdir -p output &&
19 echo "Processing accession: $acc" && 19 mkdir -p outputOther &&
20 20 for acc in \$(cat ./accessions);
21 ## 1. Create unique directories for this accession 21 do (
22 mkdir -p sra_cache_${acc} fastq_out_${acc} && 22 echo "Processing accession: \$acc" &&
23 23 mkdir -p sra_cache_\${acc} &&
24 ## 2. Download the file from S3 using aws s3 cp 24 aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/\${acc}/\${acc}" ./sra_cache_\${acc}/\${acc} &&
25 aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/${acc}/${acc}" ./sra_cache_${acc}/ && 25 fasterq-dump -e \${GALAXY_SLOTS:-4} -t . --split-3 ./sra_cache_\${acc}/\${acc} &&
26 26 rm -rf sra_cache_\${acc} &&
27 ## 3. Convert with fasterq-dump 27 count="\$(ls \${acc}*.fastq 2>/dev/null | wc -l)" &&
28 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && 28 echo "Found \$count fastq file(s) for \$acc" &&
29 29 data=(\$(ls \${acc}*.fastq 2>/dev/null)) &&
30 ## 4. Compress with pigz 30 if [ "\$count" -eq 1 ]; then
31 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && 31 pigz -cqp \${GALAXY_SLOTS:-4} "\${data[0]}" > output/"\${acc}".fastqsanger.gz &&
32 32 rm "\${data[0]}";
33 ## 5. Move outputs for collection discovery 33 elif [ -e "\${acc}".fastq ]; then
34 #if str($layout) == 'paired' 34 pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}".fastq > outputOther/"\${acc}"__single.fastqsanger.gz &&
35 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && 35 pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_1.fastq > output/"\${acc}"_1.fastqsanger.gz &&
36 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' 36 pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_2.fastq > output/"\${acc}"_2.fastqsanger.gz &&
37 #else 37 rm "\${acc}"*.fastq;
38 mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' 38 elif [ "\$count" -eq 2 ]; then
39 #end if && 39 pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_1.fastq > output/"\${acc}"_1.fastqsanger.gz &&
40 40 pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_2.fastq > output/"\${acc}"_2.fastqsanger.gz &&
41 ## 6. Clean up 41 rm "\${acc}"*.fastq;
42 rm -rf sra_cache_${acc} fastq_out_${acc} 42 else
43 done 43 for file in \${data[*]}; do
44 pigz -cqp \${GALAXY_SLOTS:-4} "\$file" > outputOther/"\$file"sanger.gz &&
45 rm "\$file";
46 done;
47 fi
48 ); done;
49 echo "Done with all accessions."
44 ]]></command> 50 ]]></command>
45 51
46 <inputs> 52 <inputs>
47 <param name="accession" type="text" label="SRA Accession(s)" help="Provide one or more accession numbers (separated by commas, spaces, or newlines)."/> 53 <conditional name="input">
48 <param name="layout" type="select" label="Read layout" help="This setting is applied to all accessions."> 54 <param name="input_select" type="select" label="Select input type">
49 <option value="paired" selected="true">Paired-end (R1 + R2)</option> 55 <option value="accession_number">SRA Accession number(s)</option>
50 <option value="single">Single-end</option> 56 <option value="file_list">File containing accession list</option>
51 </param> 57 </param>
58 <when value="accession_number">
59 <param name="accession" type="text" label="SRA Accession(s)"
60 help="One or more SRA run accessions (SRR, ERR, or DRR), separated by commas or spaces.">
61 <validator type="empty_field" message="At least one SRA accession is required."/>
62 <sanitizer>
63 <valid initial="string.printable">
64 <remove value="&apos;"/>
65 </valid>
66 <mapping initial="none">
67 <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
68 </mapping>
69 </sanitizer>
70 </param>
71 </when>
72 <when value="file_list">
73 <param name="file_list" type="data" format="txt,tabular" label="Accession list file"
74 help="A text file with one SRA accession (SRR, ERR, or DRR) per line."/>
75 </when>
76 </conditional>
52 </inputs> 77 </inputs>
53 78
54 <outputs> 79 <outputs>
55 <collection name="output_r1" type="list" label="FASTQ Reads (R1) for ${accession}"> 80 <collection name="list_paired" type="list:paired" label="Paired-end FASTQ (aws_sra)">
56 <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" /> 81 <discover_datasets
82 pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[12])\.fastqsanger\.gz"
83 directory="output"
84 ext="fastqsanger.gz"/>
57 </collection> 85 </collection>
58 <collection name="output_r2" type="list" label="FASTQ Reads (R2) for ${accession}"> 86 <collection name="output_single" type="list" label="Single-end FASTQ (aws_sra)">
59 <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" /> 87 <discover_datasets
60 <filter>layout == 'paired'</filter> 88 pattern="(?P&lt;designation&gt;.+)\.fastqsanger\.gz"
89 directory="output"
90 ext="fastqsanger.gz"/>
91 </collection>
92 <collection name="output_other" type="list" label="Other FASTQ (aws_sra)">
93 <discover_datasets
94 pattern="(?P&lt;designation&gt;.+)\.fastqsanger\.gz"
95 directory="outputOther"
96 format="fastqsanger.gz"/>
61 </collection> 97 </collection>
62 </outputs> 98 </outputs>
63 99
64 <tests> 100 <tests>
65 <test expect_num_outputs="2"> 101 <test expect_num_outputs="3">
66 <param name="accession" value="SRR13333333"/> 102 <conditional name="input">
67 <param name="layout" value="paired"/> 103 <param name="input_select" value="accession_number"/>
68 <output_collection name="output_r1" type="list" count="1"> 104 <param name="accession" value="SRR13333333"/>
69 <element name="SRR13333333_1" ftype="fastqsanger.gz"> 105 </conditional>
70 <assert_contents> 106 <output_collection name="list_paired" type="list:paired" count="1">
71 <has_text text="@SRR13333333"/> 107 <element name="SRR13333333">
72 </assert_contents> 108 <element name="forward" ftype="fastqsanger.gz">
73 </element> 109 <assert_contents>
74 </output_collection> 110 <has_text text="@SRR13333333"/>
75 <output_collection name="output_r2" type="list" count="1"> 111 </assert_contents>
76 <element name="SRR13333333_2" ftype="fastqsanger.gz"> 112 </element>
77 <assert_contents> 113 <element name="reverse" ftype="fastqsanger.gz">
78 <has_text text="@SRR13333333"/> 114 <assert_contents>
79 </assert_contents> 115 <has_text text="@SRR13333333"/>
116 </assert_contents>
117 </element>
80 </element> 118 </element>
81 </output_collection> 119 </output_collection>
82 </test> 120 </test>
121
83 </tests> 122 </tests>
84 123
85 <help><![CDATA[ 124 <help><![CDATA[
86 **NCBI SRA AWS Fetch** 125 **NCBI SRA AWS Fetch**
87 126
88 Fetches one or more SRA runs from the public `sra-pub-run-odp` S3 bucket and converts them to gzip-compressed FASTQ using `fasterq-dump`. This tool uses `aws s3 cp` for direct downloads within the AWS environment. 127 Fetches one or more SRA runs from the public ``sra-pub-run-odp`` S3 bucket and converts them to
128 gzip-compressed FASTQ using ``fasterq-dump``. Downloads use ``aws s3 cp`` with no credentials
129 required (public bucket).
130
131 **Inputs**
132
133 - **SRA Accession number(s)**: Type one or more accessions (SRR, ERR, or DRR) directly, comma- or space-separated.
134 - **File containing accession list**: A text file with one accession per line. This option is connectable as a workflow input.
135
136 **Outputs**
137
138 Three collections are always created (some may be empty depending on the data):
139
140 - **Paired-end FASTQ**: A nested ``list:paired`` collection — each accession contains a forward/reverse pair.
141 - **Single-end FASTQ**: A flat list for reads confirmed single-end by ``--split-3``.
142 - **Other FASTQ**: Reads that could not be cleanly classified.
89 ]]></help> 143 ]]></help>
90 144
91 <citations> 145 <citations>
92 <citation type="bibtex"> 146 <citation type="bibtex">
93 @misc{ncbi_sra_aws, 147 @misc{ncbi_sra_aws,
94 title = {{NCBI} {SRA} on {AWS} Open Data}, 148 title = {{NCBI} {SRA} on {AWS} Open Data},
95 author = {{National Center for Biotechnology Information}}, 149 author = {{National Center for Biotechnology Information}},
96 howpublished = {\\url{https://registry.opendata.aws/ncbi-sra/}}, 150 howpublished = {\url{https://registry.opendata.aws/ncbi-sra/}},
97 note = {Accessed via AWS S3 without credentials} 151 note = {Accessed via AWS S3 without credentials}
98 } 152 }
99 </citation> 153 </citation>
100 <citation type="bibtex"> 154 <citation type="doi">10.1093/nar/gkq1019</citation>
101 @article{sra_toolkit,
102 title = {The {NCBI} {SRA} and portable data in biology},
103 author = {Leinonen, Rasko and Sugawara, Hideaki and Shumway, Martin and
104 {International Nucleotide Sequence Database Collaboration}},
105 journal = {Nucleic Acids Research},
106 volume = {39},
107 number = {suppl\\\_1},
108 pages = {D19--D21},
109 year = {2011},
110 doi = {10.1093/nar/gkq1019}
111 }
112 </citation>
113 </citations> 155 </citations>
114 </tool> 156 </tool>