Mercurial > repos > galaxytrakr > aws_sra
view aws_sra.xml @ 31:90ae4d437133 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 616821dc026ebd3f34572b82059228672f7b67ec
| author | galaxytrakr |
|---|---|
| date | Tue, 24 Mar 2026 03:14:28 +0000 |
| parents | 73ee30eb273a |
| children | 8b8a63786853 |
line wrap: on
line source
<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt_1.0" profile="23.0"> <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description> <requirements> <requirement type="package" version="3.1.1">sra-tools</requirement> <requirement type="package" version="2.8">pigz</requirement> <requirement type="package" version="2.34.8">awscli</requirement> </requirements> <version_command>fasterq-dump --version</version_command> <command detect_errors="exit_code"><![CDATA[ #if $input.input_select == "accession_number": echo '${input.accession}' | sed -r 's/(\,|\;|__cn__)/\n/g' > accessions && #else: grep '^[[:space:]]*[ESD]RR[0-9]\{1,\}[[:space:]]*$' '${input.file_list}' > accessions && #end if mkdir -p output && mkdir -p outputOther && for acc in \$(cat ./accessions); do ( echo "Processing accession: \$acc" && mkdir -p sra_cache_\${acc} && aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/\${acc}/\${acc}" ./sra_cache_\${acc}/\${acc} && fasterq-dump -e \${GALAXY_SLOTS:-4} -t . --split-3 ./sra_cache_\${acc}/\${acc} && rm -rf sra_cache_\${acc} && count="\$(ls \${acc}*.fastq 2>/dev/null | wc -l)" && echo "Found \$count fastq file(s) for \$acc" && data=(\$(ls \${acc}*.fastq 2>/dev/null)) && if [ "\$count" -eq 1 ]; then pigz -cqp \${GALAXY_SLOTS:-4} "\${data[0]}" > output/"\${acc}".fastqsanger.gz && rm "\${data[0]}"; elif [ -e "\${acc}".fastq ]; then pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}".fastq > outputOther/"\${acc}"__single.fastqsanger.gz && pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_1.fastq > output/"\${acc}"_1.fastqsanger.gz && pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_2.fastq > output/"\${acc}"_2.fastqsanger.gz && rm "\${acc}"*.fastq; elif [ "\$count" -eq 2 ]; then pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_1.fastq > output/"\${acc}"_1.fastqsanger.gz && pigz -cqp \${GALAXY_SLOTS:-4} "\${acc}"_2.fastq > output/"\${acc}"_2.fastqsanger.gz && rm "\${acc}"*.fastq; else for file in \${data[*]}; do pigz -cqp \${GALAXY_SLOTS:-4} "\$file" > outputOther/"\$file"sanger.gz && rm "\$file"; done; fi ); done; echo "Done with all accessions." ]]></command> <inputs> <conditional name="input"> <param name="input_select" type="select" label="Select input type"> <option value="accession_number">SRA Accession number(s)</option> <option value="file_list">File containing accession list</option> </param> <when value="accession_number"> <param name="accession" type="text" label="SRA Accession(s)" help="One or more SRA run accessions (SRR, ERR, or DRR), separated by commas or spaces."> <validator type="empty_field" message="At least one SRA accession is required."/> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"> <add source="'" target="'"'"'"/> </mapping> </sanitizer> </param> </when> <when value="file_list"> <param name="file_list" type="data" format="txt,tabular" label="Accession list file" help="A text file with one SRA accession (SRR, ERR, or DRR) per line."/> </when> </conditional> </inputs> <outputs> <collection name="list_paired" type="list:paired" label="Paired-end FASTQ (aws_sra)"> <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[12])\.fastqsanger\.gz" directory="output" ext="fastqsanger.gz"/> </collection> <collection name="output_single" type="list" label="Single-end FASTQ (aws_sra)"> <discover_datasets pattern="(?P<designation>.+)\.fastqsanger\.gz" directory="output" ext="fastqsanger.gz"/> </collection> <collection name="output_other" type="list" label="Other FASTQ (aws_sra)"> <discover_datasets pattern="(?P<designation>.+)\.fastqsanger\.gz" directory="outputOther" format="fastqsanger.gz"/> </collection> </outputs> <tests> <test expect_num_outputs="3"> <conditional name="input"> <param name="input_select" value="accession_number"/> <param name="accession" value="SRR13333333"/> </conditional> <output_collection name="list_paired" type="list:paired" count="1"> <element name="SRR13333333"> <element name="forward" ftype="fastqsanger.gz"> <assert_contents> <has_text text="@SRR13333333"/> </assert_contents> </element> <element name="reverse" ftype="fastqsanger.gz"> <assert_contents> <has_text text="@SRR13333333"/> </assert_contents> </element> </element> </output_collection> </test> </tests> <help><![CDATA[ **NCBI SRA AWS Fetch** Fetches one or more SRA runs from the public ``sra-pub-run-odp`` S3 bucket and converts them to gzip-compressed FASTQ using ``fasterq-dump``. Downloads use ``aws s3 cp`` with no credentials required (public bucket). **Inputs** - **SRA Accession number(s)**: Type one or more accessions (SRR, ERR, or DRR) directly, comma- or space-separated. - **File containing accession list**: A text file with one accession per line. This option is connectable as a workflow input. **Outputs** Three collections are always created (some may be empty depending on the data): - **Paired-end FASTQ**: A nested ``list:paired`` collection — each accession contains a forward/reverse pair. - **Single-end FASTQ**: A flat list for reads confirmed single-end by ``--split-3``. - **Other FASTQ**: Reads that could not be cleanly classified. ]]></help> <citations> <citation type="bibtex"> @misc{ncbi_sra_aws, title = {{NCBI} {SRA} on {AWS} Open Data}, author = {{National Center for Biotechnology Information}}, howpublished = {\url{https://registry.opendata.aws/ncbi-sra/}}, note = {Accessed via AWS S3 without credentials} } </citation> <citation type="doi">10.1093/nar/gkq1019</citation> </citations> </tool>
