Mercurial > repos > galaxytrakr > aws_sra

diff aws_sra.xml @ 17:9fb80e0392ce draft
planemo upload for repository https://github.com/CFSAN-Biostatistics/galaxytrakr-tools commit 9707fa5e3ca6db5b58f271d133484d078cf65390
author: galaxytrakr
date: Mon, 23 Mar 2026 20:44:25 +0000
parents: 58cc45662c63
children: 5680c31cd031
--- a/aws_sra.xml	Mon Mar 23 20:23:58 2026 +0000
+++ b/aws_sra.xml	Mon Mar 23 20:44:25 2026 +0000
@@ -1,104 +1,91 @@
-<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.16" profile="23.0">
+<tool id="aws_sra" name="NCBI SRA AWS Fetch" version="0.3.0+gt_0.17" profile="23.0">
     <description>Fetches SRA runs from AWS and converts them to FASTQ</description>
-
     <requirements>
         <requirement type="package" version="2.34.8">awscli</requirement>
         <requirement type="package" version="3.2.1">sra-tools</requirement>
         <requirement type="package" version="2.8">pigz</requirement>
     </requirements>
-
     <version_command>fasterq-dump --version</version_command>
 
     <command detect_errors="aggressive"><![CDATA[
-        ## This loop handles both 'single' and 'batch' modes.
-        #for $acc_line in $run_type.mode == 'single' and str($run_type.accession).split() or $run_type.accession_list.lines:
-            #set $acc = $acc_line.strip()
-            #if $acc:
-
-                echo "Processing accession: $acc" &&
-
-                ## 1. Create unique directories for this accession
-                mkdir -p sra_cache_${acc} fastq_out_${acc} &&
-
-                ## 2. Download the file from S3 using the discovered path format
-                aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ &&
-
-                ## 3. Convert with fasterq-dump, using the correct argument order
-                fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} &&
+        ## Single Run Mode
+        #if $run_type.mode == 'single'
+            #set $acc = str($run_type.accession).strip()
+            echo "Processing single accession: $acc" &&
+            mkdir -p sra_cache fastq_out &&
+            aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache/ &&
+            fasterq-dump --outdir ./fastq_out --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache/${acc} &&
+            pigz -p \${GALAXY_SLOTS:-4} ./fastq_out/*.fastq &&
+            #if str($layout) == 'paired'
+                # Move files directly to the single output datasets
+                mv ./fastq_out/${acc}_1.fastq.gz '$output_r1_single' &&
+                mv ./fastq_out/${acc}_2.fastq.gz '$output_r2_single'
+            #else
+                mv ./fastq_out/*.fastq.gz '$output_r1_single'
+            #end if
 
-                ## 4. Compress with pigz
-                pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq &&
-
-                ## 5. Move outputs to special directories Galaxy can discover
-                #if $layout == 'paired'
-                    mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1.files_path/${acc}_1.fastq.gz' &&
-                    mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2.files_path/${acc}_2.fastq.gz'
-                #else
-                    mv ./fastq_out_${acc}/*.fastq.gz '$output_r1.files_path/${acc}.fastq.gz'
-                #end if &&
-
-                ## 6. Clean up temporary files
-                rm -rf sra_cache_${acc} fastq_out_${acc}
-
-            #end if
-        #end for
+        ## Batch Run Mode
+        #else
+            #for $acc in $run_type.accession_list.lines:
+                #set $acc = $acc.strip()
+                #if $acc:
+                    echo "Processing batch accession: $acc" &&
+                    mkdir -p sra_cache_${acc} fastq_out_${acc} &&
+                    aws s3 cp --no-sign-request 's3://sra-pub-run-odp/sra/${acc}/${acc}' ./sra_cache_${acc}/ &&
+                    fasterq-dump --outdir ./fastq_out_${acc} --temp . --threads \${GALAXY_SLOTS:-4} --split-files ./sra_cache_${acc}/${acc} &&
+                    pigz -p \${GALAXY_SLOTS:-4} ./fastq_out_${acc}/*.fastq &&
+                    #if str($layout) == 'paired'
+                        # Move files to the special path for collection discovery
+                        mv ./fastq_out_${acc}/${acc}_1.fastq.gz '$output_r1_batch.files_path/${acc}_1.fastq.gz' &&
+                        mv ./fastq_out_${acc}/${acc}_2.fastq.gz '$output_r2_batch.files_path/${acc}_2.fastq.gz'
+                    #else
+                        mv ./fastq_out_${acc}/*.fastq.gz '$output_r1_batch.files_path/${acc}.fastq.gz'
+                    #end if &&
+                    rm -rf sra_cache_${acc} fastq_out_${acc}
+                #end if
+            #end for
+        #end if
     ]]></command>
 
     <inputs>
-        <!-- This conditional allows the user to choose a single run or a list of runs -->
         <conditional name="run_type">
-            <param name="mode" type="select" label="Execution Mode" help="Run on a single accession or a list of accessions from a file.">
+            <param name="mode" type="select" label="Execution Mode">
                 <option value="single" selected="true">Single Accession</option>
                 <option value="batch">Batch of Accessions</option>
             </param>
             <when value="single">
-                <param name="accession" type="text" label="SRA Accession" help="e.g., SRR13333333"/>
+                <param name="accession" type="text" label="SRA Accession"/>
             </when>
             <when value="batch">
-                <param name="accession_list" type="data" format="txt" label="List of SRA Accessions" help="A plain text file with one SRA accession per line."/>
+                <param name="accession_list" type="data" format="txt" label="List of SRA Accessions"/>
             </when>
         </conditional>
-
-        <!-- This layout parameter is always required -->
-        <param name="layout" type="select" label="Read layout" help="Check the SRA record to confirm layout before running.">
+        <param name="layout" type="select" label="Read layout">
             <option value="paired" selected="true">Paired-end (R1 + R2)</option>
             <option value="single">Single-end</option>
         </param>
     </inputs>
 
     <outputs>
-        <!-- These collections will gather all the files produced by the loop -->
-        <collection name="output_r1" type="list" label="${run_type.accession or 'FASTQ Reads (R1)'}">
+        <!-- Outputs for Single Run Mode -->
+        <data name="output_r1_single" format="fastqsanger.gz" label="${run_type.accession}_1.fastq.gz">
+            <filter>run_type['mode'] == 'single'</filter>
+        </data>
+        <data name="output_r2_single" format="fastqsanger.gz" label="${run_type.accession}_2.fastq.gz">
+            <filter>run_type['mode'] == 'single' and layout == 'paired'</filter>
+        </data>
+
+        <!-- Outputs for Batch Mode -->
+        <collection name="output_r1_batch" type="list" label="FASTQ Reads (R1)">
             <discover_datasets pattern="(?P&lt;designation&gt;.+)_1\.fastq\.gz" format="fastqsanger.gz" />
+            <filter>run_type['mode'] == 'batch'</filter>
         </collection>
-        <collection name="output_r2" type="list" label="${run_type.accession or 'FASTQ Reads (R2)'}">
+        <collection name="output_r2_batch" type="list" label="FASTQ Reads (R2)">
             <discover_datasets pattern="(?P&lt;designation&gt;.+)_2\.fastq\.gz" format="fastqsanger.gz" />
-            <filter>layout == 'paired'</filter>
+            <filter>run_type['mode'] == 'batch' and layout == 'paired'</filter>
         </collection>
     </outputs>
-
-    <tests>
-        <test expect_num_outputs="2">
-            <param name="mode" value="single"/>
-            <param name="accession" value="SRR13333333"/>
-            <param name="layout" value="paired"/>
-            <output_collection name="output_r1" type="list" count="1">
-                <element name="SRR13333333_1" ftype="fastqsanger.gz">
-                    <assert_contents>
-                        <has_text text="@SRR13333333"/>
-                    </assert_contents>
-            </element>
-            </output_collection>
-            <output_collection name="output_r2" type="list" count="1">
-                <element name="SRR13333333_2" ftype="fastqsanger.gz">
-                    <assert_contents>
-                        <has_text text="@SRR13333333"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-        </test>
-    </tests>
-
+    
     <help><![CDATA[
 **NCBI SRA AWS Fetch**
author	galaxytrakr
date	Mon, 23 Mar 2026 20:44:25 +0000
parents	58cc45662c63
children	5680c31cd031