Mercurial > repos > galaxytrakr > aws_sra
view aws_sra.xml @ 21:02f45c03c306 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit f72940592c22c9ba88f1dcb23ef8bb5199ce434e
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 23:33:12 +0000 |
| parents | 2b4efa539c71 |
| children | 5ecb94ab82c3 |
line wrap: on
line source
<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="1.3.0" profile="23.0"> <description>Fetches a single SRA run from AWS and converts it to FASTQ</description> <requirements> <requirement type="package" version="2.34.8">awscli</requirement> <requirement type="package" version="3.2.1">sra-tools</requirement> <requirement type="package" version="2.8">pigz</requirement> </requirements> <version_command>fasterq-dump --version</version_command> <command detect_errors="aggressive"><![CDATA[ #set $acc = str($accession).strip() echo "Processing single accession: $acc" && ## 1. Create temporary directories mkdir -p sra_cache fastq_out && ## 2. Download the file from S3 using the discovered path format (no .sra) aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && ## 3. Convert with fasterq-dump, using the correct argument order fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && ## 4. Compress with pigz pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && ## 5. Move the final outputs to their Galaxy dataset paths #if str($layout) == 'paired' mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' && mv ./fastq_out/${acc}_2.fastq.gz '$output_r2' #else # Be explicit about the single-end filename, removing the wildcard mv ./fastq_out/${acc}.fastq.gz '$output_r1' #end if ]]></command> <inputs> <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> <option value="paired" selected="true">Paired-end (R1 + R2)</option> <option value="single">Single-end</option> </param> </inputs> <outputs> <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/> <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz"> <filter>layout == 'paired'</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="accession" value="SRR13333333"/> <param name="layout" value="paired"/> <output name="output_r1" ftype="fastqsanger.gz"> <assert_contents> <has_text text="@SRR13333333"/> </assert_contents> </output> <output name="output_r2" ftype="fastqsanger.gz"> <assert_contents> <has_text text="@SRR13333333"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <param name="accession" value="SRR11181815"/> <!-- A known single-end run --> <param name="layout" value="single"/> <output name="output_r1" ftype="fastqsanger.gz"> <assert_contents> <has_text text="@SRR11181815"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ **NCBI SRA AWS Fetch** Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`. ]]></help> <citations> <citation type="bibtex"> @misc{ncbi_sra_aws, title = {{NCBI} {SRA} on {AWS} Open Data}, author = {{National Center for Biotechnology Information}}, howpublished = {\\url{https://registry.opendata.aws/ncbi-sra/}}, note = {Accessed via AWS S3 without credentials} } </citation> <citation type="bibtex"> @article{sra_toolkit, title = {The {NCBI} {SRA} and portable data in biology}, author = {Leinonen, Rasko and Sugawara, Hideaki and Shumway, Martin and {International Nucleotide Sequence Database Collaboration}}, journal = {Nucleic Acids Research}, volume = {39}, number = {suppl\\\_1}, pages = {D19--D21}, year = {2011}, doi = {10.1093/nar/gkq1019} } </citation> </citations> </tool>
