comparison aws_sra.xml @ 17:9fb80e0392ce draft

planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 9707fa5e3ca6db5b58f271d133484d078cf65390
author galaxytrakr
date Mon, 23 Mar 2026 20:44:25 +0000
parents 58cc45662c63
children 5680c31cd031
comparison
equal deleted inserted replaced
16:58cc45662c63 17:9fb80e0392ce
1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.16" profile="23.0"> 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.17" profile="23.0">
2 <description>Fetches SRA runs from AWS and converts them to FASTQ</description> 2 <description>Fetches SRA runs from AWS and converts them to FASTQ</description>
3
4 <requirements> 3 <requirements>
5 <requirement type="package" version="2.34.8">awscli</requirement> 4 <requirement type="package" version="2.34.8">awscli</requirement>
6 <requirement type="package" version="3.2.1">sra-tools</requirement> 5 <requirement type="package" version="3.2.1">sra-tools</requirement>
7 <requirement type="package" version="2.8">pigz</requirement> 6 <requirement type="package" version="2.8">pigz</requirement>
8 </requirements> 7 </requirements>
9
10 <version_command>fasterq-dump --version</version_command> 8 <version_command>fasterq-dump --version</version_command>
11 9
12 <command detect_errors="aggressive"><![CDATA[ 10 <command detect_errors="aggressive"><![CDATA[
13 ## This loop handles both 'single' and 'batch' modes. 11 ## Single Run Mode
14 #for $acc_line in $run_type.mode == 'single' and str($run_type.accession).split() or $run_type.accession_list.lines: 12 #if $run_type.mode == 'single'
15 #set $acc = $acc_line.strip() 13 #set $acc = str($run_type.accession).strip()
16 #if $acc: 14 echo "Processing single accession: $acc" &&
15 mkdir -p sra_cache fastq_out &&
16 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ &&
17 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} &&
18 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq &&
19 #if str($layout) == 'paired'
20 # Move files directly to the single output datasets
21 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' &&
22 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single'
23 #else
24 mv ./fastq_out/*.fastq.gz '$output_r1_single'
25 #end if
17 26
18 echo "Processing accession: $acc" && 27 ## Batch Run Mode
19 28 #else
20 ## 1. Create unique directories for this accession 29 #for $acc in $run_type.accession_list.lines:
21 mkdir -p sra_cache_${acc} fastq_out_${acc} && 30 #set $acc = $acc.strip()
22 31 #if $acc:
23 ## 2. Download the file from S3 using the discovered path format 32 echo "Processing batch accession: $acc" &&
24 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && 33 mkdir -p sra_cache_${acc} fastq_out_${acc} &&
25 34 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ &&
26 ## 3. Convert with fasterq-dump, using the correct argument order 35 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} &&
27 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && 36 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq &&
28 37 #if str($layout) == 'paired'
29 ## 4. Compress with pigz 38 # Move files to the special path for collection discovery
30 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && 39 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' &&
31 40 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz'
32 ## 5. Move outputs to special directories Galaxy can discover 41 #else
33 #if $layout == 'paired' 42 mv ./fastq_out_${acc}/*.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz'
34 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && 43 #end if &&
35 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' 44 rm -rf sra_cache_${acc} fastq_out_${acc}
36 #else 45 #end if
37 mv ./fastq_out_${acc}/*.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' 46 #end for
38 #end if && 47 #end if
39
40 ## 6. Clean up temporary files
41 rm -rf sra_cache_${acc} fastq_out_${acc}
42
43 #end if
44 #end for
45 ]]></command> 48 ]]></command>
46 49
47 <inputs> 50 <inputs>
48 <!-- This conditional allows the user to choose a single run or a list of runs -->
49 <conditional name="run_type"> 51 <conditional name="run_type">
50 <param name="mode" type="select" label="Execution Mode" help="Run on a single accession or a list of accessions from a file."> 52 <param name="mode" type="select" label="Execution Mode">
51 <option value="single" selected="true">Single Accession</option> 53 <option value="single" selected="true">Single Accession</option>
52 <option value="batch">Batch of Accessions</option> 54 <option value="batch">Batch of Accessions</option>
53 </param> 55 </param>
54 <when value="single"> 56 <when value="single">
55 <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> 57 <param name="accession" type="text" label="SRA Accession"/>
56 </when> 58 </when>
57 <when value="batch"> 59 <when value="batch">
58 <param name="accession_list" type="data" format="txt" label="List of SRA Accessions" help="A plain text file with one SRA accession per line."/> 60 <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/>
59 </when> 61 </when>
60 </conditional> 62 </conditional>
61 63 <param name="layout" type="select" label="Read layout">
62 <!-- This layout parameter is always required -->
63 <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running.">
64 <option value="paired" selected="true">Paired-end (R1 + R2)</option> 64 <option value="paired" selected="true">Paired-end (R1 + R2)</option>
65 <option value="single">Single-end</option> 65 <option value="single">Single-end</option>
66 </param> 66 </param>
67 </inputs> 67 </inputs>
68 68
69 <outputs> 69 <outputs>
70 <!-- These collections will gather all the files produced by the loop --> 70 <!-- Outputs for Single Run Mode -->
71 <collection name="output_r1" type="list" label="${run_type.accession or 'FASTQ Reads (R1)'}"> 71 <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz">
72 <filter>run_type['mode'] == 'single'</filter>
73 </data>
74 <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz">
75 <filter>run_type['mode'] == 'single' and layout == 'paired'</filter>
76 </data>
77
78 <!-- Outputs for Batch Mode -->
79 <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)">
72 <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" /> 80 <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" />
81 <filter>run_type['mode'] == 'batch'</filter>
73 </collection> 82 </collection>
74 <collection name="output_r2" type="list" label="${run_type.accession or 'FASTQ Reads (R2)'}"> 83 <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)">
75 <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" /> 84 <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" />
76 <filter>layout == 'paired'</filter> 85 <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter>
77 </collection> 86 </collection>
78 </outputs> 87 </outputs>
79 88
80 <tests>
81 <test expect_num_outputs="2">
82 <param name="mode" value="single"/>
83 <param name="accession" value="SRR13333333"/>
84 <param name="layout" value="paired"/>
85 <output_collection name="output_r1" type="list" count="1">
86 <element name="SRR13333333_1" ftype="fastqsanger.gz">
87 <assert_contents>
88 <has_text text="@SRR13333333"/>
89 </assert_contents>
90 </element>
91 </output_collection>
92 <output_collection name="output_r2" type="list" count="1">
93 <element name="SRR13333333_2" ftype="fastqsanger.gz">
94 <assert_contents>
95 <has_text text="@SRR13333333"/>
96 </assert_contents>
97 </element>
98 </output_collection>
99 </test>
100 </tests>
101
102 <help><![CDATA[ 89 <help><![CDATA[
103 **NCBI SRA AWS Fetch** 90 **NCBI SRA AWS Fetch**
104 91
105 Fetches SRA runs from the public `sra-pub-run-odp` bucket on Amazon S3 and converts them to gzip-compressed FASTQ using `fasterq-dump`. 92 Fetches SRA runs from the public `sra-pub-run-odp` bucket on Amazon S3 and converts them to gzip-compressed FASTQ using `fasterq-dump`.
106 93