comparison aws_sra.xml @ 21:02f45c03c306 draft

planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit f72940592c22c9ba88f1dcb23ef8bb5199ce434e
author galaxytrakr
date Mon, 23 Mar 2026 23:33:12 +0000
parents 2b4efa539c71
children 5ecb94ab82c3
comparison
equal deleted inserted replaced
20:2b4efa539c71 21:02f45c03c306
1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.20" profile="23.0"> 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="1.3.0" profile="23.0">
2 <description>Fetches SRA runs from AWS and converts them to FASTQ</description> 2 <description>Fetches a single SRA run from AWS and converts it to FASTQ</description>
3
3 <requirements> 4 <requirements>
4 <requirement type="package" version="2.34.8">awscli</requirement> 5 <requirement type="package" version="2.34.8">awscli</requirement>
5 <requirement type="package" version="3.2.1">sra-tools</requirement> 6 <requirement type="package" version="3.2.1">sra-tools</requirement>
6 <requirement type="package" version="2.8">pigz</requirement> 7 <requirement type="package" version="2.8">pigz</requirement>
7 </requirements> 8 </requirements>
9
8 <version_command>fasterq-dump --version</version_command> 10 <version_command>fasterq-dump --version</version_command>
9 11
10 <command detect_errors="aggressive"><![CDATA[ 12 <command detect_errors="aggressive"><![CDATA[
11 ## Single Run Mode 13 #set $acc = str($accession).strip()
12 #if $run_type.mode == 'single'
13 #set $acc = str($run_type.accession).strip()
14 echo "Processing single accession: $acc" &&
15 mkdir -p sra_cache fastq_out &&
16 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ &&
17 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} &&
18 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq &&
19 #if str($layout) == 'paired'
20 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' &&
21 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single'
22 #else
23 mv ./fastq_out/*.fastq.gz '$output_r1_single'
24 #end if
25 14
26 ## Batch Run Mode 15 echo "Processing single accession: $acc" &&
16
17 ## 1. Create temporary directories
18 mkdir -p sra_cache fastq_out &&
19
20 ## 2. Download the file from S3 using the discovered path format (no .sra)
21 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ &&
22
23 ## 3. Convert with fasterq-dump, using the correct argument order
24 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} &&
25
26 ## 4. Compress with pigz
27 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq &&
28
29 ## 5. Move the final outputs to their Galaxy dataset paths
30 #if str($layout) == 'paired'
31 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' &&
32 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2'
27 #else 33 #else
28 #for $acc in $run_type.accession_list.lines: 34 # Be explicit about the single-end filename, removing the wildcard
29 #set $acc = $acc.strip() 35 mv ./fastq_out/${acc}.fastq.gz '$output_r1'
30 #if $acc:
31 echo "Processing batch accession: $acc" &&
32 mkdir -p sra_cache_${acc} fastq_out_${acc} &&
33 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ &&
34 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} &&
35 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq &&
36 #if str($layout) == 'paired'
37 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' &&
38 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz'
39 #else
40 mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz'
41 #end if &&
42 rm -rf sra_cache_${acc} fastq_out_${acc}
43 #end if
44 # end for
45 #end if 36 #end if
46 ]]></command> 37 ]]></command>
47 38
48 <inputs> 39 <inputs>
49 <conditional name="run_type"> 40 <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/>
50 <param name="mode" type="select" label="Execution Mode"> 41 <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running.">
51 <option value="single" selected="true">Single Accession</option>
52 <option value="batch">Batch of Accessions</option>
53 </param>
54 <when value="single">
55 <param name="accession" type="text" label="SRA Accession"/>
56 </when>
57 <when value="batch">
58 <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/>
59 </when>
60 </conditional>
61 <param name="layout" type="select" label="Read layout">
62 <option value="paired" selected="true">Paired-end (R1 + R2)</option> 42 <option value="paired" selected="true">Paired-end (R1 + R2)</option>
63 <option value="single">Single-end</option> 43 <option value="single">Single-end</option>
64 </param> 44 </param>
65 </inputs> 45 </inputs>
66 46
67 <outputs> 47 <outputs>
68 <!-- Outputs for Single Run Mode --> 48 <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/>
69 <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz"> 49 <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz">
70 <filter>run_type['mode'] == 'single'</filter> 50 <filter>layout == 'paired'</filter>
71 </data> 51 </data>
72 <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz"> 52 </outputs>
73 <filter>run_type['mode'] == 'single' and layout == 'paired'</filter>
74 </data>
75 53
76 <!-- Outputs for Batch Mode --> 54 <tests>
77 <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)"> 55 <test expect_num_outputs="2">
78 <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" /> 56 <param name="accession" value="SRR13333333"/>
79 <filter>run_type['mode'] == 'batch'</filter> 57 <param name="layout" value="paired"/>
80 </collection> 58 <output name="output_r1" ftype="fastqsanger.gz">
81 <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)"> 59 <assert_contents>
82 <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" /> 60 <has_text text="@SRR13333333"/>
83 <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter> 61 </assert_contents>
84 </collection> 62 </output>
85 </outputs> 63 <output name="output_r2" ftype="fastqsanger.gz">
86 64 <assert_contents>
65 <has_text text="@SRR13333333"/>
66 </assert_contents>
67 </output>
68 </test>
69 <test expect_num_outputs="1">
70 <param name="accession" value="SRR11181815"/> <!-- A known single-end run -->
71 <param name="layout" value="single"/>
72 <output name="output_r1" ftype="fastqsanger.gz">
73 <assert_contents>
74 <has_text text="@SRR11181815"/>
75 </assert_contents>
76 </output>
77 </test>
78 </tests>
79
87 <help><![CDATA[ 80 <help><![CDATA[
88 **NCBI SRA AWS Fetch** 81 **NCBI SRA AWS Fetch**
89 82
90 Fetches SRA runs from the public `sra-pub-run-odp` bucket on Amazon S3 and converts them to gzip-compressed FASTQ using `fasterq-dump`. 83 Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`.
91
92 This tool can be run on a single SRA accession or a list of accessions provided as a text file (one per line).
93
94 Outputs are automatically organized into collections suitable for downstream analysis.
95 ]]></help> 84 ]]></help>
96 85
97 <citations> 86 <citations>
98 <citation type="bibtex"> 87 <citation type="bibtex">
99 @misc{ncbi_sra_aws, 88 @misc{ncbi_sra_aws,