Mercurial > repos > galaxytrakr > aws_sra
comparison aws_sra.xml @ 17:9fb80e0392ce draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 9707fa5e3ca6db5b58f271d133484d078cf65390
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 20:44:25 +0000 |
| parents | 58cc45662c63 |
| children | 5680c31cd031 |
comparison
equal
deleted
inserted
replaced
| 16:58cc45662c63 | 17:9fb80e0392ce |
|---|---|
| 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.16" profile="23.0"> | 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.17" profile="23.0"> |
| 2 <description>Fetches SRA runs from AWS and converts them to FASTQ</description> | 2 <description>Fetches SRA runs from AWS and converts them to FASTQ</description> |
| 3 | |
| 4 <requirements> | 3 <requirements> |
| 5 <requirement type="package" version="2.34.8">awscli</requirement> | 4 <requirement type="package" version="2.34.8">awscli</requirement> |
| 6 <requirement type="package" version="3.2.1">sra-tools</requirement> | 5 <requirement type="package" version="3.2.1">sra-tools</requirement> |
| 7 <requirement type="package" version="2.8">pigz</requirement> | 6 <requirement type="package" version="2.8">pigz</requirement> |
| 8 </requirements> | 7 </requirements> |
| 9 | |
| 10 <version_command>fasterq-dump --version</version_command> | 8 <version_command>fasterq-dump --version</version_command> |
| 11 | 9 |
| 12 <command detect_errors="aggressive"><![CDATA[ | 10 <command detect_errors="aggressive"><![CDATA[ |
| 13 ## This loop handles both 'single' and 'batch' modes. | 11 ## Single Run Mode |
| 14 #for $acc_line in $run_type.mode == 'single' and str($run_type.accession).split() or $run_type.accession_list.lines: | 12 #if $run_type.mode == 'single' |
| 15 #set $acc = $acc_line.strip() | 13 #set $acc = str($run_type.accession).strip() |
| 16 #if $acc: | 14 echo "Processing single accession: $acc" && |
| 15 mkdir -p sra_cache fastq_out && | |
| 16 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && | |
| 17 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && | |
| 18 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && | |
| 19 #if str($layout) == 'paired' | |
| 20 # Move files directly to the single output datasets | |
| 21 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' && | |
| 22 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single' | |
| 23 #else | |
| 24 mv ./fastq_out/*.fastq.gz '$output_r1_single' | |
| 25 #end if | |
| 17 | 26 |
| 18 echo "Processing accession: $acc" && | 27 ## Batch Run Mode |
| 19 | 28 #else |
| 20 ## 1. Create unique directories for this accession | 29 #for $acc in $run_type.accession_list.lines: |
| 21 mkdir -p sra_cache_${acc} fastq_out_${acc} && | 30 #set $acc = $acc.strip() |
| 22 | 31 #if $acc: |
| 23 ## 2. Download the file from S3 using the discovered path format | 32 echo "Processing batch accession: $acc" && |
| 24 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && | 33 mkdir -p sra_cache_${acc} fastq_out_${acc} && |
| 25 | 34 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && |
| 26 ## 3. Convert with fasterq-dump, using the correct argument order | 35 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && |
| 27 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && | 36 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && |
| 28 | 37 #if str($layout) == 'paired' |
| 29 ## 4. Compress with pigz | 38 # Move files to the special path for collection discovery |
| 30 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && | 39 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' && |
| 31 | 40 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz' |
| 32 ## 5. Move outputs to special directories Galaxy can discover | 41 #else |
| 33 #if $layout == 'paired' | 42 mv ./fastq_out_${acc}/*.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz' |
| 34 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && | 43 #end if && |
| 35 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' | 44 rm -rf sra_cache_${acc} fastq_out_${acc} |
| 36 #else | 45 #end if |
| 37 mv ./fastq_out_${acc}/*.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' | 46 #end for |
| 38 #end if && | 47 #end if |
| 39 | |
| 40 ## 6. Clean up temporary files | |
| 41 rm -rf sra_cache_${acc} fastq_out_${acc} | |
| 42 | |
| 43 #end if | |
| 44 #end for | |
| 45 ]]></command> | 48 ]]></command> |
| 46 | 49 |
| 47 <inputs> | 50 <inputs> |
| 48 <!-- This conditional allows the user to choose a single run or a list of runs --> | |
| 49 <conditional name="run_type"> | 51 <conditional name="run_type"> |
| 50 <param name="mode" type="select" label="Execution Mode" help="Run on a single accession or a list of accessions from a file."> | 52 <param name="mode" type="select" label="Execution Mode"> |
| 51 <option value="single" selected="true">Single Accession</option> | 53 <option value="single" selected="true">Single Accession</option> |
| 52 <option value="batch">Batch of Accessions</option> | 54 <option value="batch">Batch of Accessions</option> |
| 53 </param> | 55 </param> |
| 54 <when value="single"> | 56 <when value="single"> |
| 55 <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> | 57 <param name="accession" type="text" label="SRA Accession"/> |
| 56 </when> | 58 </when> |
| 57 <when value="batch"> | 59 <when value="batch"> |
| 58 <param name="accession_list" type="data" format="txt" label="List of SRA Accessions" help="A plain text file with one SRA accession per line."/> | 60 <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/> |
| 59 </when> | 61 </when> |
| 60 </conditional> | 62 </conditional> |
| 61 | 63 <param name="layout" type="select" label="Read layout"> |
| 62 <!-- This layout parameter is always required --> | |
| 63 <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> | |
| 64 <option value="paired" selected="true">Paired-end (R1 + R2)</option> | 64 <option value="paired" selected="true">Paired-end (R1 + R2)</option> |
| 65 <option value="single">Single-end</option> | 65 <option value="single">Single-end</option> |
| 66 </param> | 66 </param> |
| 67 </inputs> | 67 </inputs> |
| 68 | 68 |
| 69 <outputs> | 69 <outputs> |
| 70 <!-- These collections will gather all the files produced by the loop --> | 70 <!-- Outputs for Single Run Mode --> |
| 71 <collection name="output_r1" type="list" label="${run_type.accession or 'FASTQ Reads (R1)'}"> | 71 <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz"> |
| 72 <filter>run_type['mode'] == 'single'</filter> | |
| 73 </data> | |
| 74 <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz"> | |
| 75 <filter>run_type['mode'] == 'single' and layout == 'paired'</filter> | |
| 76 </data> | |
| 77 | |
| 78 <!-- Outputs for Batch Mode --> | |
| 79 <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)"> | |
| 72 <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> | 80 <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> |
| 81 <filter>run_type['mode'] == 'batch'</filter> | |
| 73 </collection> | 82 </collection> |
| 74 <collection name="output_r2" type="list" label="${run_type.accession or 'FASTQ Reads (R2)'}"> | 83 <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)"> |
| 75 <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> | 84 <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> |
| 76 <filter>layout == 'paired'</filter> | 85 <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter> |
| 77 </collection> | 86 </collection> |
| 78 </outputs> | 87 </outputs> |
| 79 | 88 |
| 80 <tests> | |
| 81 <test expect_num_outputs="2"> | |
| 82 <param name="mode" value="single"/> | |
| 83 <param name="accession" value="SRR13333333"/> | |
| 84 <param name="layout" value="paired"/> | |
| 85 <output_collection name="output_r1" type="list" count="1"> | |
| 86 <element name="SRR13333333_1" ftype="fastqsanger.gz"> | |
| 87 <assert_contents> | |
| 88 <has_text text="@SRR13333333"/> | |
| 89 </assert_contents> | |
| 90 </element> | |
| 91 </output_collection> | |
| 92 <output_collection name="output_r2" type="list" count="1"> | |
| 93 <element name="SRR13333333_2" ftype="fastqsanger.gz"> | |
| 94 <assert_contents> | |
| 95 <has_text text="@SRR13333333"/> | |
| 96 </assert_contents> | |
| 97 </element> | |
| 98 </output_collection> | |
| 99 </test> | |
| 100 </tests> | |
| 101 | |
| 102 <help><