comparison aws_sra.xml @ 23:d7f68b3cde39 draft

planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit e9adf514c1b6b341c9e5bf8cc5a41c79b738d48e
author galaxytrakr
date Mon, 23 Mar 2026 23:55:36 +0000
parents 5ecb94ab82c3
children 7dbb60c48056
comparison
equal deleted inserted replaced
22:5ecb94ab82c3 23:d7f68b3cde39
1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4+gt_0.1" profile="23.0"> 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt+0.2" profile="23.0">
2 <description>Fetches a single SRA run from AWS and converts it to FASTQ</description> 2 <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description>
3 3
4 <requirements> 4 <requirements>
5 <requirement type="package" version="2.34.8">awscli</requirement> 5 <requirement type="package" version="2.34.8">awscli</requirement>
6 <requirement type="package" version="3.2.1">sra-tools</requirement> 6 <requirement type="package" version="3.2.1">sra-tools</requirement>
7 <requirement type="package" version="2.8">pigz</requirement> 7 <requirement type="package" version="2.8">pigz</requirement>
8 </requirements> 8 </requirements>
9 9
10 <version_command>fasterq-dump --version</version_command> 10 <version_command>fasterq-dump --version</version_command>
11 11
12 <command detect_errors="aggressive"><![CDATA[ 12 <command detect_errors="aggressive"><![CDATA[
13 #set $acc = str($accession).strip() 13 ## Create a clean list of accessions from the user input
14 echo "$accession" | sed 's/,/\n/g; s/ \+/\n/g' | grep . > accessions.txt &&
14 15
15 echo "Processing single accession: $acc" && 16 ## Loop over each clean accession
17 for acc in $(cat accessions.txt);
18 do
19 echo "Processing accession: $acc" &&
16 20
17 ## 1. Create temporary directories 21 ## 1. Create unique directories for this accession
18 mkdir -p sra_cache fastq_out && 22 mkdir -p sra_cache_${acc} fastq_out_${acc} &&
19 23
20 ## 2. Download the file from S3 using the discovered path format (no .sra) 24 ## 2. Download the file from S3 using aws s3 cp
21 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && 25 aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/${acc}/${acc}" ./sra_cache_${acc}/ &&
22 26
23 ## 3. Convert with fasterq-dump, using the correct argument order 27 ## 3. Convert with fasterq-dump
24 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && 28 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} &&
25 29
26 ## 4. Compress with pigz 30 ## 4. Compress with pigz
27 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && 31 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq &&
28 32
29 ## 5. Move the final outputs to their Galaxy dataset paths 33 ## 5. Move outputs for collection discovery
30 #if str($layout) == 'paired' 34 #if str($layout) == 'paired'
31 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' && 35 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' &&
32 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2' 36 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz'
33 #else 37 #else
34 # Be explicit about the single-end filename, removing the wildcard 38 mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1.files_path/${acc}.fastq.gz'
35 mv ./fastq_out/${acc}.fastq.gz '$output_r1' 39 #end if &&
36 #end if 40
41 ## 6. Clean up
42 rm -rf sra_cache_${acc} fastq_out_${acc}
43 done
37 ]]></command> 44 ]]></command>
38 45
39 <inputs> 46 <inputs>
40 <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> 47 <param name="accession" type="text" multiple="true" label="SRA Accession(s)" help="Provide one or more accession numbers (separated by commas, spaces, or newlines). This field accepts a dataset list of accessions in a workflow."/>
41 <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> 48 <param name="layout" type="select" label="Read layout" help="This setting is applied to all accessions.">
42 <option value="paired" selected="true">Paired-end (R1 + R2)</option> 49 <option value="paired" selected="true">Paired-end (R1 + R2)</option>
43 <option value="single">Single-end</option> 50 <option value="single">Single-end</option>
44 </param> 51 </param>
45 </inputs> 52 </inputs>
46 53
47 <outputs> 54 <outputs>
48 <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/> 55 <collection name="output_r1" type="list" label="FASTQ Reads (R1) for ${accession}">
49 <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz"> 56 <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" />
57 </collection>
58 <collection name="output_r2" type="list" label="FASTQ Reads (R2) for ${accession}">
59 <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" />
50 <filter>layout == 'paired'</filter> 60 <filter>layout == 'paired'</filter>
51 </data> 61 </collection>
52 </outputs> 62 </outputs>
53 63
54 <tests> 64 <tests>
55 <test expect_num_outputs="2"> 65 <test expect_num_outputs="2">
56 <param name="accession" value="SRR13333333"/> 66 <param name="accession" value="SRR13333333"/>
57 <param name="layout" value="paired"/> 67 <param name="layout" value="paired"/>
58 <output name="output_r1" ftype="fastqsanger.gz"> 68 <output_collection name="output_r1" type="list" count="1">
59 <assert_contents> 69 <element name="SRR13333333_1" ftype="fastqsanger.gz" has_text="@SRR13333333"/>
60 <has_text text="@SRR13333333"/> 70 </output_collection>
61 </assert_contents> 71 <output_collection name="output_r2" type="list" count="1">
62 </output> 72 <element name="SRR13333333_2" ftype="fastqsanger.gz" has_text="@SRR13333333"/>
63 <output name="output_r2" ftype="fastqsanger.gz"> 73 </output_collection>
64 <assert_contents>
65 <has_text text="@SRR13333333"/>
66 </assert_contents>
67 </output>
68 </test>
69 <test expect_num_outputs="1">
70 <param name="accession" value="SRR11181815"/> <!-- A known single-end run -->
71 <param name="layout" value="single"/>
72 <output name="output_r1" ftype="fastqsanger.gz">
73 <assert_contents>
74 <has_text text="@SRR11181815"/>
75 </assert_contents>
76 </output>
77 </test> 74 </test>
78 </tests> 75 </tests>
79 76
80 <help><![CDATA[ 77 <help><![CDATA[
81 **NCBI SRA AWS Fetch** 78 **NCBI SRA AWS Fetch**
82 79
83 Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`. 80 Fetches one or more SRA runs from the public `sra-pub-run-odp` S3 bucket and converts them to gzip-compressed FASTQ using `fasterq-dump`. This tool uses `aws s3 cp` for direct downloads within the AWS environment.
84 ]]></help> 81 ]]></help>
85 82
86 <citations> 83 <citations>
87 <citation type="bibtex"> 84 <citation type="bibtex">
88 @misc{ncbi_sra_aws, 85 @misc{ncbi_sra_aws,