view aws_sra.xml @ 22:5ecb94ab82c3 draft

planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 36274160d2834d0b3338b3be5f900ce9bc5d7c99
author galaxytrakr
date Mon, 23 Mar 2026 23:34:21 +0000
parents 02f45c03c306
children d7f68b3cde39
line wrap: on
line source

<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4+gt_0.1" profile="23.0">
    <description>Fetches a single SRA run from AWS and converts it to FASTQ</description>

    <requirements>
        <requirement type="package" version="2.34.8">awscli</requirement>
        <requirement type="package" version="3.2.1">sra-tools</requirement>
        <requirement type="package" version="2.8">pigz</requirement>
    </requirements>

    <version_command>fasterq-dump --version</version_command>

    <command detect_errors="aggressive"><![CDATA[
        #set $acc = str($accession).strip()

        echo "Processing single accession: $acc" &&

        ## 1. Create temporary directories
        mkdir -p sra_cache fastq_out &&

        ## 2. Download the file from S3 using the discovered path format (no .sra)
        aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ &&

        ## 3. Convert with fasterq-dump, using the correct argument order
        fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} &&

        ## 4. Compress with pigz
        pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq &&

        ## 5. Move the final outputs to their Galaxy dataset paths
        #if str($layout) == 'paired'
            mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' &&
            mv ./fastq_out/${acc}_2.fastq.gz '$output_r2'
        #else
            # Be explicit about the single-end filename, removing the wildcard
            mv ./fastq_out/${acc}.fastq.gz '$output_r1'
        #end if
    ]]></command>

    <inputs>
        <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/>
        <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running.">
            <option value="paired" selected="true">Paired-end (R1 + R2)</option>
            <option value="single">Single-end</option>
        </param>
    </inputs>

    <outputs>
        <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/>
        <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz">
            <filter>layout == 'paired'</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="2">
            <param name="accession" value="SRR13333333"/>
            <param name="layout" value="paired"/>
            <output name="output_r1" ftype="fastqsanger.gz">
                <assert_contents>
                    <has_text text="@SRR13333333"/>
                </assert_contents>
            </output>
            <output name="output_r2" ftype="fastqsanger.gz">
                <assert_contents>
                    <has_text text="@SRR13333333"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="accession" value="SRR11181815"/> <!-- A known single-end run -->
            <param name="layout" value="single"/>
            <output name="output_r1" ftype="fastqsanger.gz">
                <assert_contents>
                    <has_text text="@SRR11181815"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
**NCBI SRA AWS Fetch**

Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`.
    ]]></help>

    <citations>
        <citation type="bibtex">
@misc{ncbi_sra_aws,
  title        = {{NCBI} {SRA} on {AWS} Open Data},
  author       = {{National Center for Biotechnology Information}},
  howpublished = {\\url{https://registry.opendata.aws/ncbi-sra/}},
  note         = {Accessed via AWS S3 without credentials}
}
        </citation>
        <citation type="bibtex">
@article{sra_toolkit,
  title   = {The {NCBI} {SRA} and portable data in biology},
  author  = {Leinonen, Rasko and Sugawara, Hideaki and Shumway, Martin and
             {International Nucleotide Sequence Database Collaboration}},
  journal = {Nucleic Acids Research},
  volume  = {39},
  number  = {suppl\\\_1},
  pages   = {D19--D21},
  year    = {2011},
  doi     = {10.1093/nar/gkq1019}
}
        </citation>
    </citations>
</tool>