Mercurial > repos > galaxytrakr > aws_sra
diff aws_sra.xml @ 21:02f45c03c306 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit f72940592c22c9ba88f1dcb23ef8bb5199ce434e
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 23:33:12 +0000 |
| parents | 2b4efa539c71 |
| children | 5ecb94ab82c3 |
line wrap: on
line diff
--- a/aws_sra.xml Mon Mar 23 22:09:06 2026 +0000 +++ b/aws_sra.xml Mon Mar 23 23:33:12 2026 +0000 @@ -1,97 +1,86 @@ -<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.20" profile="23.0"> - <description>Fetches SRA runs from AWS and converts them to FASTQ</description> +<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="1.3.0" profile="23.0"> + <description>Fetches a single SRA run from AWS and converts it to FASTQ</description> + <requirements> <requirement type="package" version="2.34.8">awscli</requirement> <requirement type="package" version="3.2.1">sra-tools</requirement> <requirement type="package" version="2.8">pigz</requirement> </requirements> + <version_command>fasterq-dump --version</version_command> <command detect_errors="aggressive"><![CDATA[ - ## Single Run Mode - #if $run_type.mode == 'single' - #set $acc = str($run_type.accession).strip() - echo "Processing single accession: $acc" && - mkdir -p sra_cache fastq_out && - aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && - fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && - pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && - #if str($layout) == 'paired' - mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' && - mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single' - #else - mv ./fastq_out/*.fastq.gz '$output_r1_single' - #end if + #set $acc = str($accession).strip() + + echo "Processing single accession: $acc" && + + ## 1. Create temporary directories + mkdir -p sra_cache fastq_out && + + ## 2. Download the file from S3 using the discovered path format (no .sra) + aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && - ## Batch Run Mode + ## 3. Convert with fasterq-dump, using the correct argument order + fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && + + ## 4. Compress with pigz + pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && + + ## 5. Move the final outputs to their Galaxy dataset paths + #if str($layout) == 'paired' + mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' && + mv ./fastq_out/${acc}_2.fastq.gz '$output_r2' #else - #for $acc in $run_type.accession_list.lines: - #set $acc = $acc.strip() - #if $acc: - echo "Processing batch accession: $acc" && - mkdir -p sra_cache_${acc} fastq_out_${acc} && - aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && - fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && - pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && - #if str($layout) == 'paired' - mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' && - mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz' - #else - mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz' - #end if && - rm -rf sra_cache_${acc} fastq_out_${acc} - #end if - # end for + # Be explicit about the single-end filename, removing the wildcard + mv ./fastq_out/${acc}.fastq.gz '$output_r1' #end if ]]></command> <inputs> - <conditional name="run_type"> - <param name="mode" type="select" label="Execution Mode"> - <option value="single" selected="true">Single Accession</option> - <option value="batch">Batch of Accessions</option> - </param> - <when value="single"> - <param name="accession" type="text" label="SRA Accession"/> - </when> - <when value="batch"> - <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/> - </when> - </conditional> - <param name="layout" type="select" label="Read layout"> + <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> + <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> <option value="paired" selected="true">Paired-end (R1 + R2)</option> <option value="single">Single-end</option> </param> </inputs> <outputs> - <!-- Outputs for Single Run Mode --> - <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz"> - <filter>run_type['mode'] == 'single'</filter> + <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/> + <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz"> + <filter>layout == 'paired'</filter> </data> - <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz"> - <filter>run_type['mode'] == 'single' and layout == 'paired'</filter> - </data> + </outputs> - <!-- Outputs for Batch Mode --> - <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)"> - <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> - <filter>run_type['mode'] == 'batch'</filter> - </collection> - <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)"> - <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> - <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter> - </collection> - </outputs> - + <tests> + <test expect_num_outputs="2"> + <param name="accession" value="SRR13333333"/> + <param name="layout" value="paired"/> + <output name="output_r1" ftype="fastqsanger.gz"> + <assert_contents> + <has_text text="@SRR13333333"/> + </assert_contents> + </output> + <output name="output_r2" ftype="fastqsanger.gz"> + <assert_contents> + <has_text text="@SRR13333333"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="accession" value="SRR11181815"/> <!-- A known single-end run --> + <param name="layout" value="single"/> + <output name="output_r1" ftype="fastqsanger.gz"> + <assert_contents> + <has_text text="@SRR11181815"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ **NCBI SRA AWS Fetch** -Fetches SRA runs from the public `sra-pub-run-odp` bucket on Amazon S3 and converts them to gzip-compressed FASTQ using `fasterq-dump`. - -This tool can be run on a single SRA accession or a list of accessions provided as a text file (one per line). - -Outputs are automatically organized into collections suitable for downstream analysis. +Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`. ]]></help> <citations>
