Mercurial > repos > galaxytrakr > aws_sra
changeset 23:d7f68b3cde39 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit e9adf514c1b6b341c9e5bf8cc5a41c79b738d48e
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 23:55:36 +0000 |
| parents | 5ecb94ab82c3 |
| children | 7dbb60c48056 |
| files | aws_sra.xml |
| diffstat | 1 files changed, 43 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/aws_sra.xml Mon Mar 23 23:34:21 2026 +0000 +++ b/aws_sra.xml Mon Mar 23 23:55:36 2026 +0000 @@ -1,5 +1,5 @@ -<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4+gt_0.1" profile="23.0"> - <description>Fetches a single SRA run from AWS and converts it to FASTQ</description> +<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt+0.2" profile="23.0"> + <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description> <requirements> <requirement type="package" version="2.34.8">awscli</requirement> @@ -10,77 +10,74 @@ <version_command>fasterq-dump --version</version_command> <command detect_errors="aggressive"><![CDATA[ - #set $acc = str($accession).strip() - - echo "Processing single accession: $acc" && + ## Create a clean list of accessions from the user input + echo "$accession" | sed 's/,/\n/g; s/ \+/\n/g' | grep . > accessions.txt && - ## 1. Create temporary directories - mkdir -p sra_cache fastq_out && + ## Loop over each clean accession + for acc in $(cat accessions.txt); + do + echo "Processing accession: $acc" && - ## 2. Download the file from S3 using the discovered path format (no .sra) - aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && + ## 1. Create unique directories for this accession + mkdir -p sra_cache_${acc} fastq_out_${acc} && + + ## 2. Download the file from S3 using aws s3 cp + aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/${acc}/${acc}" ./sra_cache_${acc}/ && - ## 3. Convert with fasterq-dump, using the correct argument order - fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && + ## 3. Convert with fasterq-dump + fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && - ## 4. Compress with pigz - pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && + ## 4. Compress with pigz + pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && - ## 5. Move the final outputs to their Galaxy dataset paths - #if str($layout) == 'paired' - mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' && - mv ./fastq_out/${acc}_2.fastq.gz '$output_r2' - #else - # Be explicit about the single-end filename, removing the wildcard - mv ./fastq_out/${acc}.fastq.gz '$output_r1' - #end if + ## 5. Move outputs for collection discovery + #if str($layout) == 'paired' + mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && + mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' + #else + mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' + #end if && + + ## 6. Clean up + rm -rf sra_cache_${acc} fastq_out_${acc} + done ]]></command> <inputs> - <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> - <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> + <param name="accession" type="text" multiple="true" label="SRA Accession(s)" help="Provide one or more accession numbers (separated by commas, spaces, or newlines). This field accepts a dataset list of accessions in a workflow."/> + <param name="layout" type="select" label="Read layout" help="This setting is applied to all accessions."> <option value="paired" selected="true">Paired-end (R1 + R2)</option> <option value="single">Single-end</option> </param> </inputs> <outputs> - <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/> - <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz"> + <collection name="output_r1" type="list" label="FASTQ Reads (R1) for ${accession}"> + <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> + </collection> + <collection name="output_r2" type="list" label="FASTQ Reads (R2) for ${accession}"> + <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> <filter>layout == 'paired'</filter> - </data> + </collection> </outputs> <tests> <test expect_num_outputs="2"> <param name="accession" value="SRR13333333"/> <param name="layout" value="paired"/> - <output name="output_r1" ftype="fastqsanger.gz"> - <assert_contents> - <has_text text="@SRR13333333"/> - </assert_contents> - </output> - <output name="output_r2" ftype="fastqsanger.gz"> - <assert_contents> - <has_text text="@SRR13333333"/> - </assert_contents> - </output> - </test> - <test expect_num_outputs="1"> - <param name="accession" value="SRR11181815"/> <!-- A known single-end run --> - <param name="layout" value="single"/> - <output name="output_r1" ftype="fastqsanger.gz"> - <assert_contents> - <has_text text="@SRR11181815"/> - </assert_contents> - </output> + <output_collection name="output_r1" type="list" count="1"> + <element name="SRR13333333_1" ftype="fastqsanger.gz" has_text="@SRR13333333"/> + </output_collection> + <output_collection name="output_r2" type="list" count="1"> + <element name="SRR13333333_2" ftype="fastqsanger.gz" has_text="@SRR13333333"/> + </output_collection> </test> </tests> <help><![CDATA[ **NCBI SRA AWS Fetch** -Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`. +Fetches one or more SRA runs from the public `sra-pub-run-odp` S3 bucket and converts them to gzip-compressed FASTQ using `fasterq-dump`. This tool uses `aws s3 cp` for direct downloads within the AWS environment. ]]></help> <citations>
