Mercurial > repos > galaxytrakr > aws_sra
comparison aws_sra.xml @ 23:d7f68b3cde39 draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit e9adf514c1b6b341c9e5bf8cc5a41c79b738d48e
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 23:55:36 +0000 |
| parents | 5ecb94ab82c3 |
| children | 7dbb60c48056 |
comparison
equal
deleted
inserted
replaced
| 22:5ecb94ab82c3 | 23:d7f68b3cde39 |
|---|---|
| 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4+gt_0.1" profile="23.0"> | 1 <tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.4.0+gt+0.2" profile="23.0"> |
| 2 <description>Fetches a single SRA run from AWS and converts it to FASTQ</description> | 2 <description>Fetches one or more SRA runs from AWS S3 and converts them to FASTQ</description> |
| 3 | 3 |
| 4 <requirements> | 4 <requirements> |
| 5 <requirement type="package" version="2.34.8">awscli</requirement> | 5 <requirement type="package" version="2.34.8">awscli</requirement> |
| 6 <requirement type="package" version="3.2.1">sra-tools</requirement> | 6 <requirement type="package" version="3.2.1">sra-tools</requirement> |
| 7 <requirement type="package" version="2.8">pigz</requirement> | 7 <requirement type="package" version="2.8">pigz</requirement> |
| 8 </requirements> | 8 </requirements> |
| 9 | 9 |
| 10 <version_command>fasterq-dump --version</version_command> | 10 <version_command>fasterq-dump --version</version_command> |
| 11 | 11 |
| 12 <command detect_errors="aggressive"><![CDATA[ | 12 <command detect_errors="aggressive"><![CDATA[ |
| 13 #set $acc = str($accession).strip() | 13 ## Create a clean list of accessions from the user input |
| 14 echo "$accession" | sed 's/,/\n/g; s/ \+/\n/g' | grep . > accessions.txt && | |
| 14 | 15 |
| 15 echo "Processing single accession: $acc" && | 16 ## Loop over each clean accession |
| 17 for acc in $(cat accessions.txt); | |
| 18 do | |
| 19 echo "Processing accession: $acc" && | |
| 16 | 20 |
| 17 ## 1. Create temporary directories | 21 ## 1. Create unique directories for this accession |
| 18 mkdir -p sra_cache fastq_out && | 22 mkdir -p sra_cache_${acc} fastq_out_${acc} && |
| 19 | 23 |
| 20 ## 2. Download the file from S3 using the discovered path format (no .sra) | 24 ## 2. Download the file from S3 using aws s3 cp |
| 21 aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && | 25 aws s3 cp --no-sign-request "s3://sra-pub-run-odp/sra/${acc}/${acc}" ./sra_cache_${acc}/ && |
| 22 | 26 |
| 23 ## 3. Convert with fasterq-dump, using the correct argument order | 27 ## 3. Convert with fasterq-dump |
| 24 fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && | 28 fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && |
| 25 | 29 |
| 26 ## 4. Compress with pigz | 30 ## 4. Compress with pigz |
| 27 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && | 31 pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && |
| 28 | 32 |
| 29 ## 5. Move the final outputs to their Galaxy dataset paths | 33 ## 5. Move outputs for collection discovery |
| 30 #if str($layout) == 'paired' | 34 #if str($layout) == 'paired' |
| 31 mv ./fastq_out/${acc}_1.fastq.gz '$output_r1' && | 35 mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && |
| 32 mv ./fastq_out/${acc}_2.fastq.gz '$output_r2' | 36 mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' |
| 33 #else | 37 #else |
| 34 # Be explicit about the single-end filename, removing the wildcard | 38 mv ./fastq_out_${acc}/${acc}.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' |
| 35 mv ./fastq_out/${acc}.fastq.gz '$output_r1' | 39 #end if && |
| 36 #end if | 40 |
| 41 ## 6. Clean up | |
| 42 rm -rf sra_cache_${acc} fastq_out_${acc} | |
| 43 done | |
| 37 ]]></command> | 44 ]]></command> |
| 38 | 45 |
| 39 <inputs> | 46 <inputs> |
| 40 <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> | 47 <param name="accession" type="text" multiple="true" label="SRA Accession(s)" help="Provide one or more accession numbers (separated by commas, spaces, or newlines). This field accepts a dataset list of accessions in a workflow."/> |
| 41 <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> | 48 <param name="layout" type="select" label="Read layout" help="This setting is applied to all accessions."> |
| 42 <option value="paired" selected="true">Paired-end (R1 + R2)</option> | 49 <option value="paired" selected="true">Paired-end (R1 + R2)</option> |
| 43 <option value="single">Single-end</option> | 50 <option value="single">Single-end</option> |
| 44 </param> | 51 </param> |
| 45 </inputs> | 52 </inputs> |
| 46 | 53 |
| 47 <outputs> | 54 <outputs> |
| 48 <data name="output_r1" format="fastqsanger.gz" label="${accession}_1.fastq.gz"/> | 55 <collection name="output_r1" type="list" label="FASTQ Reads (R1) for ${accession}"> |
| 49 <data name="output_r2" format="fastqsanger.gz" label="${accession}_2.fastq.gz"> | 56 <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> |
| 57 </collection> | |
| 58 <collection name="output_r2" type="list" label="FASTQ Reads (R2) for ${accession}"> | |
| 59 <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> | |
| 50 <filter>layout == 'paired'</filter> | 60 <filter>layout == 'paired'</filter> |
| 51 </data> | 61 </collection> |
| 52 </outputs> | 62 </outputs> |
| 53 | 63 |
| 54 <tests> | 64 <tests> |
| 55 <test expect_num_outputs="2"> | 65 <test expect_num_outputs="2"> |
| 56 <param name="accession" value="SRR13333333"/> | 66 <param name="accession" value="SRR13333333"/> |
| 57 <param name="layout" value="paired"/> | 67 <param name="layout" value="paired"/> |
| 58 <output name="output_r1" ftype="fastqsanger.gz"> | 68 <output_collection name="output_r1" type="list" count="1"> |
| 59 <assert_contents> | 69 <element name="SRR13333333_1" ftype="fastqsanger.gz" has_text="@SRR13333333"/> |
| 60 <has_text text="@SRR13333333"/> | 70 </output_collection> |
| 61 </assert_contents> | 71 <output_collection name="output_r2" type="list" count="1"> |
| 62 </output> | 72 <element name="SRR13333333_2" ftype="fastqsanger.gz" has_text="@SRR13333333"/> |
| 63 <output name="output_r2" ftype="fastqsanger.gz"> | 73 </output_collection> |
| 64 <assert_contents> | |
| 65 <has_text text="@SRR13333333"/> | |
| 66 </assert_contents> | |
| 67 </output> | |
| 68 </test> | |
| 69 <test expect_num_outputs="1"> | |
| 70 <param name="accession" value="SRR11181815"/> <!-- A known single-end run --> | |
| 71 <param name="layout" value="single"/> | |
| 72 <output name="output_r1" ftype="fastqsanger.gz"> | |
| 73 <assert_contents> | |
| 74 <has_text text="@SRR11181815"/> | |
| 75 </assert_contents> | |
| 76 </output> | |
| 77 </test> | 74 </test> |
| 78 </tests> | 75 </tests> |
| 79 | 76 |
| 80 <help><![CDATA[ | 77 <help><![CDATA[ |
| 81 **NCBI SRA AWS Fetch** | 78 **NCBI SRA AWS Fetch** |
| 82 | 79 |
| 83 Fetches a single SRA run from the public `sra-pub-run-odp` bucket on Amazon S3 and converts it to gzip-compressed FASTQ using `fasterq-dump`. | 80 Fetches one or more SRA runs from the public `sra-pub-run-odp` S3 bucket and converts them to gzip-compressed FASTQ using `fasterq-dump`. This tool uses `aws s3 cp` for direct downloads within the AWS environment. |
| 84 ]]></help> | 81 ]]></help> |
| 85 | 82 |
| 86 <citations> | 83 <citations> |
| 87 <citation type="bibtex"> | 84 <citation type="bibtex"> |
| 88 @misc{ncbi_sra_aws, | 85 @misc{ncbi_sra_aws, |
