Mercurial > repos > galaxytrakr > aws_sra
view aws_sra.xml @ 19:a4186132e1c4 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit ba119cea4881b4ff3a27470d8b426902977290b3
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 22:01:24 +0000 |
| parents | 5680c31cd031 |
| children | 2b4efa539c71 |
line wrap: on
line source
<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.19" profile="23.0"> <description>Fetches SRA runs from AWS and converts them to FASTQ</description> <requirements> <requirement type="package" version="2.34.8">awscli</requirement> <requirement type="package" version="3.2.1">sra-tools</requirement> <requirement type="package" version="2.8">pigz</requirement> </requirements> <version_command>fasterq-dump --version</version_command> <command detect_errors="aggressive"><![CDATA[ ## Single Run Mode #if $run_type.mode == 'single' #set $acc = str($run_type.accession).strip() echo "Processing single accession: $acc" && mkdir -p sra_cache fastq_out && aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && #if str($layout) == 'paired' mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' && mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single' #else mv ./fastq_out/*.fastq.gz '$output_r1_single' #end if ## Batch Run Mode #else #for $acc in $run_type.accession_list.lines: #set $acc = $acc.strip() #if $acc: echo "Processing batch accession: $acc" && mkdir -p sra_cache_${acc} fastq_out_${acc} && aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && ( #if str($layout) == 'paired' mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' && mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz' #else mv ./fastq_out_${acc}/*.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz' #end if ) && rm -rf sra_cache_${acc} fastq_out_${acc} #end if #end for #end if ]]></command> <inputs> <conditional name="run_type"> <param name="mode" type="select" label="Execution Mode"> <option value="single" selected="true">Single Accession</option> <option value="batch">Batch of Accessions</option> </param> <when value="single"> <param name="accession" type="text" label="SRA Accession"/> </when> <when value="batch"> <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/> </when> </conditional> <param name="layout" type="select" label="Read layout"> <option value="paired" selected="true">Paired-end (R1 + R2)</option> <option value="single">Single-end</option> </param> </inputs> <outputs> <!-- Outputs for Single Run Mode --> <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz"> <filter>run_type['mode'] == 'single'</filter> </data> <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz"> <filter>run_type['mode'] == 'single' and layout == 'paired'</filter> </data> <!-- Outputs for Batch Mode --> <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)"> <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> <filter>run_type['mode'] == 'batch'</filter> </collection> <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)"> <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter> </collection> </outputs> <help><![CDATA[ **NCBI SRA AWS Fetch** Fetches SRA runs from the public `sra-pub-run-odp` bucket on Amazon S3 and converts them to gzip-compressed FASTQ using `fasterq-dump`. This tool can be run on a single SRA accession or a list of accessions provided as a text file (one per line). Outputs are automatically organized into collections suitable for downstream analysis. ]]></help> <citations> <citation type="bibtex"> @misc{ncbi_sra_aws, title = {{NCBI} {SRA} on {AWS} Open Data}, author = {{National Center for Biotechnology Information}}, howpublished = {\\url{https://registry.opendata.aws/ncbi-sra/}}, note = {Accessed via AWS S3 without credentials} } </citation> <citation type="bibtex"> @article{sra_toolkit, title = {The {NCBI} {SRA} and portable data in biology}, author = {Leinonen, Rasko and Sugawara, Hideaki and Shumway, Martin and {International Nucleotide Sequence Database Collaboration}}, journal = {Nucleic Acids Research}, volume = {39}, number = {suppl\\\_1}, pages = {D19--D21}, year = {2011}, doi = {10.1093/nar/gkq1019} } </citation> </citations> </tool>
