Mercurial > repos > galaxytrakr > aws_sra
diff aws_sra.xml @ 17:9fb80e0392ce draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 9707fa5e3ca6db5b58f271d133484d078cf65390
| author | galaxytrakr |
|---|---|
| date | Mon, 23 Mar 2026 20:44:25 +0000 |
| parents | 58cc45662c63 |
| children | 5680c31cd031 |
line wrap: on
line diff
--- a/aws_sra.xml Mon Mar 23 20:23:58 2026 +0000 +++ b/aws_sra.xml Mon Mar 23 20:44:25 2026 +0000 @@ -1,104 +1,91 @@ -<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.16" profile="23.0"> +<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.17" profile="23.0"> <description>Fetches SRA runs from AWS and converts them to FASTQ</description> - <requirements> <requirement type="package" version="2.34.8">awscli</requirement> <requirement type="package" version="3.2.1">sra-tools</requirement> <requirement type="package" version="2.8">pigz</requirement> </requirements> - <version_command>fasterq-dump --version</version_command> <command detect_errors="aggressive"><![CDATA[ - ## This loop handles both 'single' and 'batch' modes. - #for $acc_line in $run_type.mode == 'single' and str($run_type.accession).split() or $run_type.accession_list.lines: - #set $acc = $acc_line.strip() - #if $acc: - - echo "Processing accession: $acc" && - - ## 1. Create unique directories for this accession - mkdir -p sra_cache_${acc} fastq_out_${acc} && - - ## 2. Download the file from S3 using the discovered path format - aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && - - ## 3. Convert with fasterq-dump, using the correct argument order - fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && + ## Single Run Mode + #if $run_type.mode == 'single' + #set $acc = str($run_type.accession).strip() + echo "Processing single accession: $acc" && + mkdir -p sra_cache fastq_out && + aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ && + fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} && + pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq && + #if str($layout) == 'paired' + # Move files directly to the single output datasets + mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' && + mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single' + #else + mv ./fastq_out/*.fastq.gz '$output_r1_single' + #end if - ## 4. Compress with pigz - pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && - - ## 5. Move outputs to special directories Galaxy can discover - #if $layout == 'paired' - mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' && - mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz' - #else - mv ./fastq_out_${acc}/*.fastq.gz '$output_r1.files_path/${acc}.fastq.gz' - #end if && - - ## 6. Clean up temporary files - rm -rf sra_cache_${acc} fastq_out_${acc} - - #end if - #end for + ## Batch Run Mode + #else + #for $acc in $run_type.accession_list.lines: + #set $acc = $acc.strip() + #if $acc: + echo "Processing batch accession: $acc" && + mkdir -p sra_cache_${acc} fastq_out_${acc} && + aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ && + fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} && + pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq && + #if str($layout) == 'paired' + # Move files to the special path for collection discovery + mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' && + mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz' + #else + mv ./fastq_out_${acc}/*.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz' + #end if && + rm -rf sra_cache_${acc} fastq_out_${acc} + #end if + #end for + #end if ]]></command> <inputs> - <!-- This conditional allows the user to choose a single run or a list of runs --> <conditional name="run_type"> - <param name="mode" type="select" label="Execution Mode" help="Run on a single accession or a list of accessions from a file."> + <param name="mode" type="select" label="Execution Mode"> <option value="single" selected="true">Single Accession</option> <option value="batch">Batch of Accessions</option> </param> <when value="single"> - <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/> + <param name="accession" type="text" label="SRA Accession"/> </when> <when value="batch"> - <param name="accession_list" type="data" format="txt" label="List of SRA Accessions" help="A plain text file with one SRA accession per line."/> + <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/> </when> </conditional> - - <!-- This layout parameter is always required --> - <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running."> + <param name="layout" type="select" label="Read layout"> <option value="paired" selected="true">Paired-end (R1 + R2)</option> <option value="single">Single-end</option> </param> </inputs> <outputs> - <!-- These collections will gather all the files produced by the loop --> - <collection name="output_r1" type="list" label="${run_type.accession or 'FASTQ Reads (R1)'}"> + <!-- Outputs for Single Run Mode --> + <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz"> + <filter>run_type['mode'] == 'single'</filter> + </data> + <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz"> + <filter>run_type['mode'] == 'single' and layout == 'paired'</filter> + </data> + + <!-- Outputs for Batch Mode --> + <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)"> <discover_datasets pattern="(?P<designation>.+)_1\.fastq\.gz" format="fastqsanger.gz" /> + <filter>run_type['mode'] == 'batch'</filter> </collection> - <collection name="output_r2" type="list" label="${run_type.accession or 'FASTQ Reads (R2)'}"> + <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)"> <discover_datasets pattern="(?P<designation>.+)_2\.fastq\.gz" format="fastqsanger.gz" /> - <filter>layout == 'paired'</filter> + <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter> </collection> </outputs> - - <tests> - <test expect_num_outputs="2"> - <param name="mode" value="single"/> - <param name="accession" value="SRR13333333"/> - <param name="layout" value="paired"/> - <output_collection name="output_r1" type="list" count="1"> - <element name="SRR13333333_1" ftype="fastqsanger.gz"> - <assert_contents> - <has_text text="@SRR13333333"/> - </assert_contents> - </element> - </output_collection> - <output_collection name="output_r2" type="list" count="1"> - <element name="SRR13333333_2" ftype="fastqsanger.gz"> - <assert_contents> - <has_text text="@SRR13333333"/> - </assert_contents> - </element> - </output_collection> - </test> - </tests> - + <help><