changeset 0:878e5d4becef

planemo upload
author jasmine_amir
date Thu, 02 Feb 2023 18:11:05 -0500
parents
children c9fb9ecae076
files LICENSE README.rst fasterq_dump.xml fastq_dump.xml macros.xml sam_dump.xml test-data/DRR015708_forward.fastqsanger test-data/DRR015708_reverse.fastqsanger test-data/ERR027433_forward.fastqsanger test-data/ERR027433_reverse.fastqsanger test-data/ERR086330_1.fastq.gz test-data/ERR086330_2.fastq.gz test-data/SRR002702_1.fastq.gz test-data/SRR002702_2.fastq.gz test-data/SRR11859153.fastq.gz test-data/SRR11953971_1.fastq.gz test-data/SRR11953971_2.fastq.gz test-data/SRR1993644.fastqsanger test-data/SRR522874.fastq.gz test-data/SRR522874.sra test-data/SRR522874.sra_1.fastq.gz test-data/SRR522874.sra_2.fastq.gz test-data/SRR522874.sra_3.fastq.gz test-data/SRR522874.sra_4.fastq.gz test-data/SRR522874_1.fastq.gz test-data/SRR522874_2.fastq.gz test-data/SRR522874_sam_dump_result.sam test-data/SRR6982805.fastqsanger.gz test-data/SRR925743_forward.fastqsanger test-data/SRR925743_reverse.fastqsanger test-data/SRR925743_sam_dump_result.sam test-data/bam_dump_result.bam test-data/list_pe test-data/list_pe2 test-data/list_se test-data/list_sra test-data/sra_manifest.tabular test-data/sra_pileup_result.pileup
diffstat 38 files changed, 1081 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,60 @@
+CONTENTS
+
+  Public Domain Notice
+  Exceptions (for bundled 3rd-party code)
+  Copyright F.A.Q.
+
+
+==============================================================
+                     PUBLIC DOMAIN NOTICE
+         National Center for Biotechnology Information
+
+With the exception of certain third-party files summarized below, this
+software is a "United States Government Work" under the terms of the
+United States Copyright Act.  It was written as part of the authors'
+official duties as United States Government employees and thus cannot
+be copyrighted.  This software is freely available to the public for
+use. The National Library of Medicine and the U.S. Government have not
+placed any restriction on its use or reproduction.
+
+Although all reasonable efforts have been taken to ensure the accuracy
+and reliability of the software and data, the NLM and the U.S.
+Government do not and cannot warrant the performance or results that
+may be obtained by using this software or data. The NLM and the U.S.
+Government disclaim all warranties, express or implied, including
+warranties of performance, merchantability or fitness for any
+particular purpose.
+
+Please cite the authors in any work or product based on this material.
+
+
+==============================================================
+Copyright F.A.Q.
+
+
+--------------------------------------------------------------
+Q. Our product makes use of the NCBI source code, and we made changes
+   and additions to that version of the NCBI code to better fit it to
+   our needs. Can we copyright the code, and how?
+
+A. You can copyright only the *changes* or the *additions* you made to the
+   NCBI source code. You should identify unambiguously those sections of
+   the code that were modified, e.g. by commenting any changes you made
+   in the code you distribute. Therefore, your license has to make clear
+   to users that your product is a combination of code that is public domain
+   within the U.S. (but may be subject to copyright by the U.S. in foreign
+   countries) and code that has been created or modified by you.
+
+--------------------------------------------------------------
+Q. Can we (re)license all or part of the NCBI source code?
+
+A. No, you cannot license or relicense the source code written by NCBI
+   since you cannot claim any copyright in the software that was developed
+   at NCBI as a 'government work' and consequently is in the public domain
+   within the U.S.
+
+--------------------------------------------------------------
+Q. What if these copyright guidelines are not clear enough or are not
+   applicable to my particular case?
+
+A. Contact us. Send your questions to 'sra-tools@ncbi.nlm.nih.gov'.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,32 @@
+The Galaxy tool wrappers contained in this tool shed repository rely on software developed by
+the NCBI: https://github.com/ncbi/sra-tools.
+
+NCBI Sequence Read Archive Toolkit: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software
+
+# Installation of Aspera connect ascp binary
+
+The sra-tools suite is ready to benefit from increased transfer speed and reliability by using Aspera Connect ascp.
+To benefit, download the ascp commandline client, and place ascp and the required ssh keys into a PATH accessible to galaxys job handler.
+
+A convenience package for linux and OS X is available at https://toolshed.g2.bx.psu.edu/view/mvdbeek/package_ascp_3/e109f0ec22c3 .
+It suffices to copy the contents of the $INSTALL_DIR/bin into PATH.
+
+Alternatively go to http://downloads.asperasoft.com/connect2/ .
+
+Aspera connect is not provided by the IUC due to its closed-source nature.
+
+# Firewall settings for highspeed transfer
+
+To benefit from increased transfer speeds using ascp3 your local firewall must permit UDP data transfer in both
+directions on ports 33001-33009 for the following IP ranges:
+
+    130.14.*.*
+
+    165.112.*.*
+
+The firewall must also allow ssh traffic outbound to NCBI.
+The wrapper will fall back to http download if these requirements are not met.
+
+CONTROLLED-ACCESS DATA
+
+Encrypted, controlled-access data is not supported.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasterq_dump.xml	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,231 @@
+<tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>format from NCBI SRA</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <version_command>fasterq-dump --version | tr -d $'\n'</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    set -o | grep -q pipefail && set -o pipefail;
+    @COPY_CONFIGFILE@
+    @CONFIGURE_RETRY@
+    @SET_ACCESSIONS@
+    while [ \$SRA_PREFETCH_ATTEMPT -le \$SRA_PREFETCH_RETRIES ] ; do
+        fasterq-dump "\$acc" -e \${GALAXY_SLOTS:-1}
+        --seq-defline '@\$sn/\$ri'
+        --qual-defline '+'
+        $adv.split
+        #if str( $adv.minlen ) != "":
+            --min-read-len "$adv.minlen"
+        #end if
+        $adv.skip_technical 2>&1 | tee -a '$log';
+        if [ \$? == 0 ] && [ \$(ls *.fastq | wc -l) -ge 1 ]; then
+            break ;
+        else
+            echo "Prefetch attempt \$SRA_PREFETCH_ATTEMPT of \$SRA_PREFETCH_RETRIES exited with code \$?" ;
+            SRA_PREFETCH_ATTEMPT=`expr \$SRA_PREFETCH_ATTEMPT + 1` ;
+            sleep 1 ;
+        fi ;
+    done &&
+    mkdir -p output &&
+    mkdir -p outputOther &&
+    count="\$(ls *.fastq | wc -l)" &&
+    echo "There are \$count fastq files" &&
+    data=(\$(ls *.fastq)) &&
+    if [ "\$count" -eq 1 ]; then
+        @COMPRESS@ "\${data[0]}" > output/"\${acc}"__single.fastqsanger.gz &&
+        rm "\${data[0]}";
+    elif [ "$adv.split" = "--split-3" ]; then
+        if [ -e "\${acc}".fastq ]; then
+            @COMPRESS@ "\${acc}".fastq > outputOther/"\${acc}"__single.fastqsanger.gz;
+        fi &&
+        @COMPRESS@ "\${acc}"_1.fastq > output/"\${acc}"_forward.fastqsanger.gz &&
+        @COMPRESS@ "\${acc}"_2.fastq > output/"\${acc}"_reverse.fastqsanger.gz &&
+        rm "\${acc}"*.fastq;
+    elif [ "\$count" -eq 2 ]; then
+        #if $adv.skip_technical:
+            @COMPRESS@ "\${data[0]}" > output/"\${acc}"_forward.fastqsanger.gz &&
+            @COMPRESS@ "\${data[1]}" > output/"\${acc}"_reverse.fastqsanger.gz &&
+        #else
+            @COMPRESS@ "\${data[0]}" > outputOther/"\${data[0]}"sanger.gz &&
+            @COMPRESS@ "\${data[1]}" > outputOther/"\${data[1]}"sanger.gz &&
+        #end if
+        rm "\${data[0]}" &&
+        rm "\${data[1]}";
+    else
+        for file in \${data[*]}; do
+            @COMPRESS@ "\$file" > outputOther/"\$file"sanger.gz &&
+            rm "\$file";
+        done;
+    fi;
+    
+    #if $input.input_select != "sra_file":
+        ); done;
+    #end if
+    echo "Done with all accessions."
+    ]]>
+    </command>
+    <expand macro="configfile_hack"/>
+    <inputs>
+        <expand macro="input_conditional"/>
+        <section name="adv" title="Advanced Options" expanded="False">
+            <param name="minlen" type="integer" label="Minimum read length" optional="true" help="Filter by sequence length. Will dump only reads longer or equal to this value." argument="--min-read-len"/>
+            <param name="split" type="select" display="radio" label="Select how to split the spots" help="This option will only be used when there are multiple reads per spot (for example paired-end).">
+                <option value="--split-3">--split-3: write properly paired biological reads into different files and single reads in another file</option>
+                <option value="--split-files">--split-files: write reads into different files (forward and reverse may not match if one read is empty)</option>
+                <option value="--split-spot">--split-spot: split spots into reads (only one output file)</option>
+                <option value="--concatenate-reads">--concatenate-reads: writes whole spots into one file</option>
+            </param>
+            <param name="skip_technical" type="boolean" truevalue="--skip-technical" falsevalue="--include-technical" checked="True" label="Dump only biological reads" help="Will not be used if --split-3 is selected." argument="--skip-technical/--include-technical"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="log" format="txt" label="fasterq-dump log"/>
+        <collection name="list_paired" type="list:paired" label="Pair-end data (fasterq-dump)">
+
+        <!-- Use named regex group to grab pattern
+             <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list
+             identifier in the nested collection and identifier_1 is either
+             forward or reverse (for instance samp1_forward.fq).
+        -->
+
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.gz" directory="output" ext="fastqsanger.gz" />
+        </collection>
+        <collection name="output_collection" type='list' label="Single-end data (fasterq-dump)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)__single\.fastqsanger.gz" directory="output" ext='fastqsanger.gz'/>
+        </collection>
+        <collection name="output_collection_other" type='list' label="Other data (fasterq-dump)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fastqsanger\.gz" directory="outputOther" format="fastqsanger.gz"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="ERR086330"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="ERR086330">
+                    <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="SRR002702"/>
+            <param name="split" value="--split-files"/>
+            <param name="skip_technical" value="False"/>
+            <output_collection name="output_collection_other" type="list" count="2">
+                <element name="SRR002702_1" file="SRR002702_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                <element name="SRR002702_2" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="ERR086330, SRR11953971"/>
+            <output_collection name="list_paired" type="list:paired" count="2">
+                <element name="ERR086330">
+                    <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+                <element name="SRR11953971">
+                    <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="sra_file"/>
+            <param name="sra_file" value="SRR522874.sra"/>
+            <param name="split" value="--split-files"/>
+            <param name="skip_technical" value="True"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="SRR522874.sra">
+                    <element name="forward" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR522874.sra_4.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="sra_file"/>
+            <param name="sra_file" value="SRR522874.sra"/>
+            <param name="split" value="--split-files"/>
+            <param name="skip_technical" value="False"/>
+            <output_collection name="output_collection_other" type="list" count="4">
+                <element name="SRR522874_1" file="SRR522874.sra_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                <element name="SRR522874_2" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                <element name="SRR522874_3" file="SRR522874.sra_3.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                <element name="SRR522874_4" file="SRR522874.sra_4.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="file_list"/>
+            <param name="file_list" value="list_sra"/>
+            <param name="minlen" value="21"/>
+            <output_collection name="output_collection_other" type="list" count="1">
+                <element name="SRR522874__single" file="SRR522874.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+            </output_collection>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="SRR522874">
+                    <element name="forward" file="SRR522874_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR522874_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+            <output_collection name="output_collection" type="list" count="1">
+                <element name="SRR002702" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="file_list"/>
+            <param name="file_list" value="sra_manifest.tabular" ftype="sra_manifest.tabular"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="SRR11953971">
+                    <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does?**
+
+This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit.  The following applies:
+
+ - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates.
+ - if data is single ended, each element of the collection will be a single fastq_ dataset.
+
+
+@HOW_TO_USE_IT@
+
+-----
+
+**Output**
+
+In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
+Some collections may be empty if the accessions provided in the list do not contain one of the type of data.
+
+.. class:: warningmark
+
+When you decide to dump technical reads (in Advanced Options Dump only biological reads is set to No), you will probably find your PAIRED data in the other data collection as it is impossible to determine if it was 2 biological reads or one biological and one technical.
+
+.. class:: warningmark
+
+By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
+To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
+
+@ACCESSION_LIST_HOWTO@
+
+-----
+
+
+.. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
+.. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump
+.. _collection: https://galaxyproject.org/tutorials/collections/
+.. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
+
+@SRATOOLS_ATTRRIBUTION@
+]]>
+    </help>
+    <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump.xml	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,270 @@
+<tool id="fastq_dump" name="Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>format from NCBI SRA</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <version_command>fastq-dump --version | tr -d $'\n'</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    @COPY_CONFIGFILE@
+    @SET_ACCESSIONS@
+
+    #if $input.input_select == "sra_file":
+        fastq-dump --log-level fatal --accession '${input.file.name}'
+    #else:
+        ## Do not use prefetch if region is specified, to avoid downloading
+        ## the complete sra file.
+        #if ( str( $adv.region ) == "" ) and ( str( $adv.minID ) == "" ) and ( str( $adv.maxID ) == "" ):
+            prefetch -X 200000000 "\$acc" &&
+        #end if
+        fastq-dump --accession "\$acc"
+        --split-files
+    #end if
+    --defline-seq '@\$sn[_\$rn]/\$ri'
+    --defline-qual '+'
+
+    $adv.split
+    #if str( $adv.alignments ) == "aligned":
+        --aligned
+    #end if
+    #if str( $adv.alignments ) == "unaligned":
+        --unaligned
+    #end if
+    #if str( $adv.minID ) != "":
+        --minSpotId "$adv.minID"
+    #end if
+    #if str( $adv.maxID ) != "":
+        --maxSpotId "$adv.maxID"
+    #end if
+    #if str( $adv.minlen ) != "":
+        --minReadLen "$adv.minlen"
+    #end if
+    #if str( $adv.readfilter ) != "":
+        --read-filter "$adv.readfilter"
+    #end if
+    #if str( $adv.region ) != "":
+        --aligned-region "$adv.region"
+    #end if
+    #if str( $adv.spotgroups ) != "":
+        --spot-groups "$adv.spotgroups"
+    #end if
+    #if str( $adv.matepairDist ) != "":
+        --matepair-distance "$adv.matepairDist"
+    #end if
+    $adv.clip
+    $adv.skip_technical
+
+    #if str( $outputformat ) == "fastqsanger.gz":
+        --gzip
+    #elif str( $outputformat ) == "fastqsanger.bz2":   
+        --bzip2
+    #end if
+
+    #if str($adv.table) != "":
+        --table $adv.table
+    #end if
+    ;
+    
+    mkdir -p output &&
+    data=(\$(ls ./*.fast*));
+    if [ \${\#data[@]} -eq 2 ]; then
+        mv "\${data[0]}" output/"\${data[0]}"_forward.$outputformat;
+        mv "\${data[1]}" output/"\${data[1]}"_reverse.$outputformat;
+    elif [ \${\#data[@]} -eq 1 ]; then
+        mv "\${data[0]}" output/"\${data[0]}"__single.$outputformat;
+    fi;
+    
+    #if $input.input_select != "sra_file":
+        ); done;
+    #end if
+    echo "Done with all accessions."
+    ]]>
+    </command>
+    <expand macro="configfile_hack"/>
+    <inputs>
+        <expand macro="input_conditional"/>
+        <param name="outputformat" type="select" display="radio" label="Select output format" help="Compression will greatly reduce the amount of space occupied by downloaded data. Downstream applications such as a short-read mappers will accept compressed data as input. Consider this example: an uncoimpressed 400 Mb fastq datasets compresses to 100 Mb or 80 Mb by gzip or bzip2, respectively. " argument="--gzip --bzip2">
+            <option value="fastqsanger.gz">gzip compressed fastq</option>
+            <option value="fastqsanger">Uncompressed fastq</option>
+            <option value="fastqsanger.bz2">bzip2 compressed fastq</option>
+        </param>
+        <section name="adv" title="Advanced Options" expanded="False">
+            <param name="minID" type="integer" label="Minimum spot ID" optional="true" help="Minimum spot id to be dumped." argument="--minSpotId"/>
+            <param name="maxID" type="integer" label="Maximum spot ID" optional="true" help="Maximum spot id to be dumped." argument="--maxSpotId"/>
+            <param name="minlen" type="integer" label="Minimum read length" optional="true" help="Filter by sequence length. Will dump only reads longer or equal to this value." argument="--minReadLen"/>
+            <param name="split" type="boolean" checked="true" truevalue="--split-spot" falsevalue="" label="Split spot by read pairs" help="Split spots into individual reads." argument="--split-spot"/>
+            <expand macro="alignments"/>
+            <expand macro="region"/>
+            <expand macro="matepairDist"/>
+            <param name="readfilter" type="select" value="" label="filter by value" argument="--read-filter">
+                <option value="">None</option>
+                <option value="pass">pass</option>
+                <option value="reject">reject</option>
+                <option value="criteria">criteria</option>
+                <option value="redacted">redacted</option>
+            </param>
+            <param name="spotgroups" type="text" label="Filter by spot-groups" optional="true" argument="--spot-groups"/>
+            <param type="boolean" truevalue="--clip" falsevalue="" argument="--clip" label="Apply left and right clips" />
+            <param type="boolean" truevalue="--skip-technical" falsevalue="" checked="False" label="Dump only biological reads" argument="--skip-technical"/>
+            <param label="Table name within cSRA object" type="text" value="" optional="true" help="For SRA of noisy long-reads put SEQUENCE" argument="--table"/>
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="list_paired" type="list:paired" label="Paired-end data (fastq-dump)">
+        <!-- Use named regex group to grab pattern
+             <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list
+             identifier in the nested collection and identifier_1 is either
+             forward or reverse (for instance samp1_forward.fq).
+        -->
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger" ext="fastqsanger" directory="output"/>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.gz_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.gz" ext="fastqsanger.gz" directory="output"/>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.bz2_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.bz2" ext="fastqsanger.bz2" directory="output"/>
+        </collection>
+        <collection name="list_single" type='list' label="Single-end data (fastq-dump)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq__single\.fastqsanger" directory="output" ext='fastqsanger'/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.gz__single\.fastqsanger.gz" directory="output" ext='fastqsanger.gz'/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.bz2__single\.fastqsanger.bz2" directory="output" ext='fastqsanger.bz2'/>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="accession" value="SRR044777"/>
+            <param name="skip_technical" value="True"/>
+            <output_collection name="list_single" type="list" count="1">
+                <element name="SRR044777">
+                    <assert_contents>
+                        <not_has_text text="rRNA_primer"/>
+                        <has_text text="F47USSH02GNP1D"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger.gz"/>
+            <param name="accession" value="SRR925743"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="SRR925743">
+                    <element name="forward" file="SRR925743_forward.fastqsanger" decompress="True"/>
+                    <element name="reverse" file="SRR925743_reverse.fastqsanger" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="accession" value="SRR925743"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="SRR925743">
+                    <element name="forward" file="SRR925743_forward.fastqsanger"/>
+                    <element name="reverse" file="SRR925743_reverse.fastqsanger"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_pe"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="DRR015708">
+                    <element name="forward" file="DRR015708_forward.fastqsanger"/>
+                    <element name="reverse" file="DRR015708_reverse.fastqsanger"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_pe2"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired" count="1">
+                <element name="ERR027433">
+                    <element name="forward" file="ERR027433_forward.fastqsanger"/>
+                    <element name="reverse" file="ERR027433_reverse.fastqsanger"/>
+                </element>
+            </output_collection>
+        </test>      
+        <test expect_num_outputs="2">
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_se"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_single" type="list" count="1">
+                <element name="SRR1993644" file="SRR1993644.fastqsanger"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger.gz"/>
+            <param name="accession" value="SRR6982805"/>
+            <param name="maxID" value="2"/>
+            <param name="table" value="SEQUENCE"/>
+            <output_collection name="list_single" type="list" count="1">
+                <element name="SRR6982805" file="SRR6982805.fastqsanger.gz" ftype="fastqsanger.gz" decompress="True"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger.gz"/>
+            <param name="accession" value="ERR086330, SRR11953971"/>
+            <output_collection name="list_paired" type="list:paired" count="2">
+                <element name="ERR086330">
+                    <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+                <element name="SRR11953971">
+                    <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does?**
+
+This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit.  The following applies:
+
+ - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates.
+ - if data is single ended, each element of the collection will be a single fastq_ dataset.
+
+
+@HOW_TO_USE_IT@
+
+-----
+
+**Output**
+
+In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
+Some collections may be empty if the accessions provided in the list do not contain one of the type of data.
+
+.. class:: warningmark
+
+When you decide to dump technical reads (in Advanced Options Dump only biological reads is set to No), you will probably find your PAIRED data in the other data collection as it is impossible to determine if it was 2 biological reads or one biological and one technical.
+
+.. class:: warningmark
+
+By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
+To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
+
+@ACCESSION_LIST_HOWTO@
+
+-----
+
+
+.. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
+.. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump
+.. _collection: https://galaxyproject.org/tutorials/collections/
+.. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
+
+@SRATOOLS_ATTRRIBUTION@
+]]>
+    </help>
+    <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,188 @@
+<macros>
+    <token name="@TOOL_VERSION@">3.0.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.09</token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_0622</edam_topic> <!-- Genomics -->
+            <edam_topic>topic_0091</edam_topic> <!-- Bioinformatics -->
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_2422</edam_operation> <!-- Data retrieval -->
+            <edam_operation>operation_0335</edam_operation> <!-- Formatting -->
+        </edam_operations>
+    </xml>
+    <macro name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">sra-tools</requirement>
+            <requirement type="package" version="2.6">pigz</requirement>
+            <requirement type="package" version="1.16.1">samtools</requirement>
+            <yield/>
+        </requirements>
+    </macro>
+    <token name="@ACCESSIONS_FROM_FILE@">
+        grep '^[[:space:]]*[E|S|D]RR[0-9]\{1,\}[[:space:]]*$'
+    </token>
+    <token name="@COMPRESS@"><![CDATA[pigz -cqp \${GALAXY_SLOTS:-1}]]></token>
+    <token name="@CONFIGURE_RETRY@"><![CDATA[
+        export SRA_PREFETCH_RETRIES=3 &&
+        export SRA_PREFETCH_ATTEMPT=1 &&
+    ]]></token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">sra-tools</xref>
+        </xrefs>
+    </xml>
+    <token name="@COPY_CONFIGFILE@"><![CDATA[
+        mkdir -p ~/.ncbi &&
+        cp '$user_settings_mkfg' ~/.ncbi/user-settings.mkfg &&
+        vdb-config -s "/repository/user/main/public/root=\$PWD" &&
+        vdb-config -s "/repository/user/ad/public/root=\$PWD" &&
+        vdb-config -s "/repository/user/default-path=\$PWD" &&
+        vdb-config -s "/repository/user/main/public/root=\$PWD" &&
+        vdb-config -s /http/timeout/read=10000 &&
+    ]]></token>
+    <token name="@SET_ACCESSIONS@"><![CDATA[
+        #if $input.input_select == "sra_file":
+            acc='${input.sra_file.name}' &&
+            ln -s '${input.sra_file}' "\$acc" &&
+        #else    
+            #if $input.input_select == "file_list":
+                #if $input.file_list.is_of_type('sra_manifest.tabular'):
+                    #set $column = $input.file_list.unsanitized.metadata.column_names.index('Run') + 1
+                    cut -f $column '$input.file_list'| tail -n +2 > accessions &&
+                #else
+                    @ACCESSIONS_FROM_FILE@ '$input.file_list' > accessions &&
+                #end if
+            #elif $input.input_select == "accession_number":
+                echo '${input.accession}' | sed -r 's/(\,|\;|__cn__)/\n/g' > accessions &&
+            #end if
+            for acc in \$(cat ./accessions);
+            do (
+                echo "Downloading accession: \$acc..." &&
+        #end if  
+    ]]></token>
+    <macro name="configfile_hack">
+        <configfiles>
+            <configfile name="user_settings_mkfg"><![CDATA[
+/LIBS/GUID = "3cdc38d0-711a-49ce-9536-f544eaf69eec"
+/config/default = "false"
+/libs/temp_cache = "."
+/tools/prefetch/download_to_cache = "false"
+            ]]></configfile>
+        </configfiles>
+    </macro>
+    <macro name="sanitize_query">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value=" "/>
+                <remove value="&apos;" />
+            </valid>
+            <mapping initial="none">
+                <add source=" " target=""/>
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+            </mapping>
+        </sanitizer>
+    </macro>
+    <macro name="input_conditional">
+        <conditional name="input">
+            <param name="input_select" type="select" label="select input type">
+                <option value="accession_number">SRR accession</option>
+                <option value="file_list">List of SRA accession, one per line</option>
+                <option value="sra_file">SRA archive in current history</option>
+            </param>
+            <when value="accession_number">
+                <param name="accession" type="text" label="Accession" multiple="true" help="Must start with SRR, DRR or ERR, e.g. SRR925743, ERR343809">
+                    <expand macro="sanitize_query"/>
+                    <validator type="empty_field" message="An accession is required"/>
+                </param>
+            </when>
+            <when value="sra_file">
+                <param format="sra" name="sra_file" type="data" label="sra archive"/>
+            </when>
+            <when value="file_list">
+                <param format="txt" name="file_list" type="data" label="sra accession list"/>
+            </when>
+        </conditional>
+    </macro>
+    <macro name="alignments">
+        <param name="alignments" type="select" value="both" label="Output aligned or unaligned reads" help="Output reads according to their alignment status." argument="--aligned and --unaligned">
+            <option value="both">both</option>
+            <option value="aligned">aligned only</option>
+            <option value="unaligned">unaligned only</option>
+        </param>
+    </macro>
+    <macro name="minMapq">
+        <param name="minMapq" type="integer" min="0" max="42" label="Minimum mapping quality" optional="true" help="Minimum mapping quality an alignment has to have, to be dumped." argument="--min-mapq"/>
+    </macro>
+    <macro name="region">
+        <param format="text" name="region" type="text" label="aligned region" optional="true"
+               help="Filter by position on genome. Can be either accession.version (ex: NC_000001.10), chromosome name (ex:chr1 or 1) or 1-based coordinates (ex: chr1:1-101)." argument="--aligned-region"/>
+    </macro>
+    <macro name="matepairDist">
+        <param name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)" optional="true"
+               help="Filter by distance between matepairs. Use unknown to find matepairs split between the references. Use from-to (inclusive) to limit matepair distance on the same reference" argument="--matepair-distance"/>
+    </macro>
+    <macro name="citation">
+        <citations>
+            <citation type="doi">10.1093/nar/gkq1019</citation>
+            <citation type="bibtex">
+@misc{github_sratools,
+  author = {NCBI},
+  title = {sra-tools},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/ncbi/sra-tools},
+}</citation>
+        </citations>
+    </macro>
+    <token name="@HOW_TO_USE_IT@">
+    **How to use it?**
+
+There are three ways in which you can download data:
+
+ 1. Plain text input of accession number(s)
+ 2. Providing a list of accessions from file
+ 3. Extracting data from an already uploaded SRA dataset
+
+Below we discuss each in detail.
+
+------
+
+**Plain text input of accession number(s)**
+
+When you type an accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch the data for you. You can also provide a list of multiple accession numbers (e.g. `SRR3141592, SRR271828, SRR112358`).
+
+-----
+
+**Providing a list of accessions from file**
+
+A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file:
+
+ 1. Upload it into your history using Galaxy's upload tool
+ 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown
+ 3. Choose uploaded file within the **sra accession list** field
+ 4. Click **Execute**
+
+-----
+
+**Extract data from an already uploaded SRA dataset**
+
+If an SRA dataset is already present in the history, the sequencing data can be extracted in a human-readable data format (fastq, sam, bam) by setting **select input type** drop-down to *SRA archive in current history*.
+    </token>
+    <token name="@ACCESSION_LIST_HOWTO@">
+-----
+
+**How to generate accession lists**
+
+ 1. Go to **SRA Run Selector** by clicking this link_
+ 2. Find the study you are interested in by typing a search term within the **Search** box. This can be a word (e.g., *mitochondria*) or an accession you have gotten from a paper (e.g., *SRR1582967*).
+ 3. Once you click on the study of interest you will see the number of datasets in this study within the **Related SRA data** box
+ 4. Click on the Runs number
+ 5. On the page that would open you will see **Accession List** button
+ 6. Clicking of this button will produce a file that you will need to upload into Galaxy and use as the input to this tool.
+    </token>
+    <token name="@SRATOOLS_ATTRRIBUTION@">
+For credits, information, support and bug reports, please refer ato https://github.com/galaxyproject/tools-iuc.
+    </token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_dump.xml	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,127 @@
+<tool id="sam_dump" name="Download and Extract Reads in BAM" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>format from NCBI SRA</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <version_command>sam-dump --version | tr -d $'\n'</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        @COPY_CONFIGFILE@
+        @SET_ACCESSIONS@
+
+        ## Do not use prefetch if region is specified, to avoid downloading
+        ## the complete sra file.
+
+        #if $input.input_select == "sra_file":
+            sam-dump --log-level fatal  --accession '\$acc'
+        #else:
+            #if ( str( $adv.region ) == "" ):
+                prefetch -X 200000000 "\$acc" &&
+            #end if
+            sam-dump --log-level fatal --disable-multithreading
+        #end if
+
+        #if str( $adv.region ) != "":
+            --aligned-region '$adv.region'
+        #end if
+        #if str( $adv.matepairDist ) != "":
+            --matepair-distance '$adv.matepairDist'
+        #end if
+        #if str( $adv.minMapq ) != "":
+            --min-mapq '$adv.minMapq'
+        #end if
+        --header
+        #if str( $adv.alignments ) == "both":
+            --unaligned
+        #end if
+
+        #if str( $adv.alignments ) == "unaligned":
+            --unaligned-spots-only
+        #end if
+        #if (str( $adv.primary ) == "yes") and (str ( $adv.alignments != "unaligned") ):
+            --primary
+        #end if
+        "\$acc"
+
+        #if str( $outputformat ) == "bam":
+            | samtools view -Sb - 2> /dev/null > "\$acc.bam"
+        #elif str( $outputformat ) == "sam":
+            > "\$acc.sam"
+        #end if
+        
+        #if $input.input_select != "sra_file":
+            ); done;
+        #end if
+        echo "Done with all accessions."
+        ]]>
+    </command>
+    <expand macro="configfile_hack"/>
+    <inputs>
+        <expand macro="input_conditional"/>
+        <param name="outputformat" type="select" display="radio" label="select output format" help="In vast majority of cases you want to download data in bam format. It is more compact and is accepted by all downstream tools.">
+            <option value="bam">bam</option>
+            <option value="sam">sam</option>
+        </param>
+        <section name="adv" title="Advanced Options" expanded="False">
+            <expand macro="alignments"/>
+            <expand macro="region"/>
+            <expand macro="matepairDist"/>
+            <param name="primary" type="select" value="no">
+                <label>only primary aligments</label>
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <expand macro="minMapq"></expand>
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="output_collection" type="list" label="sam-dump: Downloaded SAM/BAM data">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.bam" directory="." ext='bam'/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.sam" directory="." ext='sam'/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="SRR925743"/>
+            <param name="outputformat" value="sam"/>
+            <param name="region" value="17:41243452-41277500"/>
+            <output_collection name="output_collection" type="list" count="1">
+                <element name="SRR925743" file="SRR925743_sam_dump_result.sam" compare="contains" ftype="sam"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="SRR925743,SRR522874"/>
+            <param name="outputformat" value="sam"/>
+            <param name="region" value="17:41243452-41277500"/>
+            <output_collection name="output_collection" type="list" count="2">
+                <element name="SRR522874" file="SRR522874_sam_dump_result.sam" compare="contains" ftype="sam"/>
+                <element name="SRR925743" file="SRR925743_sam_dump_result.sam" compare="contains" ftype="sam"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**What it does?**
+
+This tool extracts data (in BAM_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the sam-dump_ utility of the SRA Toolkit and returns a collection of NGS data containing one file for each accession number provided.
+
+
+@HOW_TO_USE_IT@
+
+@ACCESSION_LIST_HOWTO@
+
+-----
+
+.. _sam-dump: https://github.com/ncbi/sra-tools
+.. _BAM: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _collection: https://galaxyproject.org/tutorials/collections/
+.. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
+
+@SRATOOLS_ATTRRIBUTION@
+    ]]></help>
+    <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DRR015708_forward.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938/1
+NTATTGTAGCGCACTATGAGGTCGCTCATTANTNTACATCNNCATCCAAATTTCAGCATNANTTTNNNNNTGTTTGATATTCTCAAAGNGATNAACGTTT
++
+#1=DDFFFHGHHHJIIIIGIJFHIJJJJJJI#0#08DFII##-7@FHIIJJJJGEHEEE#,#,,;#####,,;?DCCDEFEEFCDDDD#+2+#+2<?<CC
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930/1
+NTTCTAAACTCATTCCAGAAGTAATTGATGCGTCACCAATAATTGCAATATGTTTTCTATTGATTTGATTGATTTTATCTGCTGTTGCCATCCCAACAAT
++
+#4=DDFFFHHHHHJJJJJJIIHIJJJJJJJJIHJJJJJIJJJJJJJJJJJJJIJJJJJJJJJJJJJJIIIICHHGEHHGIFEHHHHFFDFFFDDCEB@AC
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936/1
+NTCACCTACAACTCGAATATTCGAATGATATGTGATATTGTTAGATGATTTTGGCATGCTTGCAGAAAGTGCATAAACACCTGTATTGATTCCCGAATTT
++
+#4=DFFFFHHHHHIJIJJIHIIJIIJJJJIJIHIIGGIIJHIEHGGHGHIIJJJIIJJJJJHHCHGGGE@FHGHIIJIEHHEDCCDCCDEEED@AB/=A#
+@HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864/1
+NCTTAAAACTTATTAAATGAATCAATTAGATAAAAGACTTTTTTTGTTAGATGCTTATGCCTTAATTTTTAGAGGATATTATGCATTTATCAAAAATCCA
++
+#4=DFFFFHHHHHJJJIJJJJJJJJJJJJJJJJIHGDGHIJJJJJJGHHJGIJIJJJJJJJIJIJJJJJHFFHGFDDFEEFEECCEEDEEDDCC3<A5>C
+@HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887/1
+NTAAAGGTTATTGTGCTTCACAGAAAATGCATTATTATGGGTATAAACTTCATGCGGTTTGCTCAGCGGAAGGTGTCTTTCAAAGTTTGGATATTAGTCC
++
+#1=DDFFEHHHHHGIJJJJGIJJJJJJJJJJJJIJJJJJJJCFGFIIHJIJHIJJJJHIJJJJIJIJIFHBDE;@ACEEEDDCCCCCDDDCCDCCFECCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DRR015708_reverse.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938/2
+CATTANNAGNNANNNTTNNNNGANNNNNTCGANNNNNANTTTTAANCGNNNNNCTCTTNGAGAATATCAAANAGNTNAAAAATTANGCTGANNNTNGNAT
++
+CCCFF##4<##2###32####22#####11?F#####0#07CHGI#.;#####--;BB#,;?ADDDEEDDD#,8#,#,58<?BD:#+2?@?#########
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930/2
+GAAAGATGGTATTTCTGGTTTTCCAACGCGTACAGAAAGTGAGTTTGATACATTTGGAACGGGACATTCTTCTACATCTATTTCAGCAATTGTTGGGATG
++
+BBCFFDFFHCFHHJJJJJIJJJJJJJIJJJJJJJIHGIIDHGIFIIJIIGHIIHIIJDHIIJH=DFFFFFEEEEEDEDDDEDEDDDDDCDDDCDDDBBDB
+@HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936/2
+AAATATAATTGGAGAATCAACTTTATTTTCGAATACAATTCCAAATCAAAAAGAAGATAAAACACTAGAATTATCTCAAAAGAATTCAACTCAAAAAGAT
++
+CCCFFFFFHHHHGJIJIJJIJJJJJJJJJJJIJJJJJJJJJJJJEIJJJJHIJJIJJJIJJJIJJIJJJIJIJJJHHHGEHFFDFF>C@C>CEC;A538:
+@HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864/2
+ATTGAAACCTTTTGAGTTGATTCGTGGATTTTTGATAAATGCATAATATCCTCTAAAAATTAAGGCATAAGCATCTAACAAAAAAAGTCTTTTATCTAAT
++
+CCCFFFFFHHHHHJEGGHIHIIJJHHJHIJIJJJGJIJJJJJJJJJJJJJIIIJIJJIJIJJIJJJJIGHEHIIHHHHFHFFEDDDDCDEEDDDDDDEDD
+@HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887/2
+TTGGTATTCTGCGGATAAATATCCTTTATCAGCTAAGAGCGTACAATTTTTAAACTGCTGTTTTATATCTTTCAGATAATGAATGTCATGCACTGAAGCT
++
+BCCFBDFFHHHHHJJJJJJJJJJJJJJJJJJJJJIJIFIJJHHIIIJJJJGHJJJJJJJJHIIJJIJJHHHHHHGEFFFFFFCDCEEEFDDDDCDD>CDD
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ERR027433_forward.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@IL4_4847:5:1:1051:7109/1
+GCCGCCAACGTCCGACGGCGCGTCCCGCACGACTTGAACCGATCTCACCGAGACAGAACTAT
++
+C@CC@@@C2BBCBB:>@8@@5@>>@@@@@>@97@@==<@>>@4<'@8>:35=%&B#######
+@IL4_4847:5:1:1058:16093/1
+AAGGTCGGGCATTCGTTCGAGCCGACGACCGCGAGGAAGCGGTTCGGCCGGGCGTAGAATCC
++
+?ABBABB;<@>@=<?@CBBB:@2>BCBB,,?):7@/3$54'818->1-+=+()$6--C8+?1
+@IL4_4847:5:1:1111:15034/1
+TCGATCCGCAGGGCAATCTGTGGATCCCGGCGTTCGACGACGGCGGGCGGGTGGCTCGGCAT
++
+2524;@@@/4<@6@;9?C@@93@>BAB2>>C@>B>C4BC4@-+B<@525++%=)########
+@IL4_4847:5:1:1122:6048/1
+ATCTCCTCGACGCGCACCAGATAGCGGTCGTAGCAATCGCCGTCCACGCCGACGGGCCCACT
++
+BBB@@@@@@@AB.<@2)7;B-@==:@>@@@A7@<AA4:A3'@9+4;&00'9+AC5--DA5,?
+@IL4_4847:5:1:1138:20693/1
+CTGCTCGCGCGAGAACGCGCGCAAGCTCGGGGGCGGTGGCGGGGTCGGGCGACACCGATGTA
++
+CB<C7@>);9@@@@=1B@@@@2@=*:4@@@0%(-$,'3%%'%$%$,44%3&$=$C;CC-C##
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ERR027433_reverse.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@IL4_4847:5:1:1051:7109/3
+AACGCACTGCGCGCGGACTCGCCCCCGCGGCCACTCCGTGGGCCGGCGTACGGA
++
+)(664;(>>/:1<+/'())0)',8%'*7'+9904)'(+()'''':+4+'.7)'(
+@IL4_4847:5:1:1058:16093/3
+GCATAGCTGCCCTCGCCTTCTACTCGCACCGAACAAGCGGCCCGCCTACGCGGG
++
+;=>1(/2(:/=@(:'9/<(*'&.(?.6)(';+.5''-3.&''7(96%0:+($2$
+@IL4_4847:5:1:1111:15034/3
+GAGTGGAGCCGCGGACGGGAGCGCGGCGGCTGCCTCACAGCACCCGGGGGGTCG
++
+::3+:2655(*('((3*&&&4+')6'3/2,+++*.+')-/)4((<-+&&'&%<(
+@IL4_4847:5:1:1122:6048/3
+GAGGTCGGCGTTCTGGATGACAGGCGCGAAGCCCCCGCCGTCGGTGCCTTCACG
++
+=8+(-(,(3.5*=99+;).)8'(,(/(+(-6@'-3<6&,%/4++)')1/)>(.&
+@IL4_4847:5:1:1138:20693/3
+TGGACGTTGTGGTGGTTGTAAGAGATTTCGCTCCCCATGTTGGCGAGCTGCGAT
++
+;)?5;=9996@((((097:41=,A((+)5>9,:''''67+9)=(968-(8;8)3
Binary file test-data/ERR086330_1.fastq.gz has changed
Binary file test-data/ERR086330_2.fastq.gz has changed
Binary file test-data/SRR002702_1.fastq.gz has changed
Binary file test-data/SRR002702_2.fastq.gz has changed
Binary file test-data/SRR11859153.fastq.gz has changed
Binary file test-data/SRR11953971_1.fastq.gz has changed
Binary file test-data/SRR11953971_2.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR1993644.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@1/2
+TATACTACTGTCATGTTTGCTTTTTTCGTGCTCATTACCTTATCGTATGCTTCCATCCAAAGATCTAGTTGTTTATAATATTCTCCCGGCCTTACTTCAAC
++
+?@<DDDADDFAAC@>GG<FDCBHIIGB:8??DEAHGAFEDF<B?F9DBFFCDHF>48BBCF)8877=@=@C7=E??EBD:AADA;>C3;BB6:A@>>@:3>
+@2/2
+AAAAAAACTTTCTTTACAGGCGTAAAGAAAGTGAAATTGACAGTATTTATACATGAAATAGCAATGTCTTTCCCACTTCCCTACGCTGGCATTAACCAGAT
++
+<@@DA6DDF42ABGF9F?F@C<EDDDFBGI>04BGC>BFF><?*88BDFDEAFFDGCGEFEEFIFFFEF>EBDBB@@:ACCCAB8@?=;B<@BABBBBB?#
+@3/2
+GTGCCATCATTTTCTATCCATTATTATGGATTATTGGCTCATCGTTTAATCCGGGTGATAGTTTATCTGGATCAAGTATTATTCCACAAAATGCAACGTTA
++
+=BBFFFFFHHHHHJJJJJJJJJJJJIJIJEIIGIJJJHGIGGIIGHIDIJGHIIJ?FFHGIIJJJJJJJJHGIHHHCEHFFFFFFFFEAECCDADDDDDDD
+@4/2
+TTCAAAACACATAAAGCTAATTGCCGCATATGACAATATTGCTAAAATAATTTTTTTACCAGATATCGGTGTTAATCGAAATAATGTACTTTCGGTCATTT
++
+BBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJIIJJJJJJJIJJHHHHHFFDDCDEEEDDDDDDDDEDDDFFFEDDDDDDDDE
+@5/2
+ACTTGCCAATGCGATGCACCAATCTTTTCAGCAATAATCGGCAAAATTGGGTCGACTACTCCTATACCTGAAAAGGCAAGGAAAGTAGCCAACACTGTAAT
++
+BCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIHIIJIJHHHHFFFFFFEEEEEEDDDDDDDDDDDDDCDDDDDDDDDDDDDED
Binary file test-data/SRR522874.fastq.gz has changed
Binary file test-data/SRR522874.sra has changed
Binary file test-data/SRR522874.sra_1.fastq.gz has changed
Binary file test-data/SRR522874.sra_2.fastq.gz has changed
Binary file test-data/SRR522874.sra_3.fastq.gz has changed
Binary file test-data/SRR522874.sra_4.fastq.gz has changed
Binary file test-data/SRR522874_1.fastq.gz has changed
Binary file test-data/SRR522874_2.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR522874_sam_dump_result.sam	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,6 @@
+ETOOIVN07D9GPH	13	*	0	0	*	*	0	0	GAATCCCGATATCATCATGAA	2L5AW?.A@BAB?A@ABBCN8
+ETOOIVN07D9GPH	141	*	0	0	*	*	0	0	CATTGCTGAAAAACTCGGCGGCTGAGCGGGCTGGCAAGGC	8CN8=BC?]C7.%BA?I5?K7@>AA@AT@/A=K8BK8K7@
+ETOOIVN07ED00L	13	*	0	0	*	*	0	0	ACTGAACACCACGAAGTAGA	5B@@G/:BN8A>AM6CCAA@
+ETOOIVN07ED00L	141	*	0	0	*	*	0	0	AGTCGTACAGACGACGGTTGTCTGAGCGGGCTGGCAAGGC	B7@A?CB>>ABB=BCM6N8==BBBA=AV@1=@K8AJ7K8A
+ETOOIVN07EE1GA	13	*	0	0	*	*	0	0	GGAATTTTTCCGTTGCTGAT	7#K5]B7-$N8BM7BBCB>B
+ETOOIVN07EE1GA	141	*	0	0	*	*	0	0	GCCAGGTGGACGTTAAATATCTGAGCGGGCTGGCAAGGC	9M7BK7AK4A>:N8Z@1<CAC@BA??S?-?@K7AK8K8@
Binary file test-data/SRR6982805.fastqsanger.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR925743_forward.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@1/1
+AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGT
++
+EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<EDDE?2@?AEEDEED=D+C?5@A=6:>BA8:>@:4+36945&4354445>/B>@
+@2/1
+AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGT
++
+HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFGGEEAEEEDD0ADDBD9BDBDDA@6?BA;?CD=:-7<<::)1;5,6-6A@?=9
+@3/1
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCA
++
+HHHHFHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHGEFHGEGGFGGGGGGHHHHEFEIDDEEEEEDD4DD;??:6>6<@.<==@?.@@<?#####
+@4/1
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACACTAACCCTAACCCTAACCCTAACCCTA
++
+HHGBHHHHGFHHHHHFDHHFHGEHHHHHEFHHHHEGEGEFFFAFFFDCFGF?@GCDFGFEFHHEFDF*F6DC4DD:A8>@D@CD8->=>=<@CB@#####
+@5/1
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACACTAACCCTCACACTCACCCTA
++
+GGGFGFFGGGFBGGEGGFFGGGCFFGGGGGEGFFFFFGFFGFFFDFFB+FGGFEE?FCF::7B5A?+BB###############################
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR925743_reverse.fastqsanger	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,20 @@
+@1/2
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA
++
+GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF?EHGGHHHGHFHEHDEHGHFFEEE9BDDBEBAD.AD:ACBBC=4@>?5>=+?
+@2/2
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA
++
+HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHHHHHHHHHHGHHHGHHBHGHHFDBDDED5FCFCEEGF<@>>>@,<5<@@?>;D
+@3/2
+ATGGATGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGT
++
+HHHHHHHHHFIEGFHFHDHHHHGFFHGEGDIGGEGGHHHAGEGGGDHHHHHHHHHHHFGDGGGEGDFFF>BEEEE3B;BB;>B7C@DA=DFBDD.BEE=9
+@4/2
+ATGGATGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGT
++
+GFFFHHFFHHHFHHFEFGGBGEEEE?<9?6=>>:=DDDD@DBGDB;DBDBA.ADD8<2<>6A@=A5>550=>>>>BE;EEEDEEE79+336<68=BCEB@
+@5/2
+ATGGATGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGGTAGGGT
++
+479<.>><:<A7BABBE8@=:<<:@BB?C75:2?;.5<<3FEFGEEC88FEDEE=AB><AA@B<ABDC8.27<9:58.58??6<:@>+?=9@########
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRR925743_sam_dump_result.sam	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,10 @@
+44155511	165	*	0	0	*	17	41275978	0	CATTAATGCTATGCAGATAATCATAGGAATCCCAAATTAATACACTCTTGTGCTGACTTACCAGATGGGACACTCTAAGATTTTCTGCATAGCATTAATG	HHHHHHHHHHHHHHHHHGHDHHHHHHHHHHHHHEHHHHHHHGHHHGHHHHHHHHHHHHHHHHHHHGFHHFHHHHHFHEBGHHHHHD<EFGBBBCAGFGE;	RG:Z:0
+44155516	165	*	0	0	*	17	41275988	0	CATTAATGCTATGCAGATAATCATAGGAATCCCAAATTAATACACTCTTGTGCTGACTTACCAGATGGGACACTCTAAGATTTTCTGCATAGCATTAATG	HHHHHHHHHHHHHHHHFGHHHHHHGHHHHHHHHGHFHHHHHHHEHHHHHFHHHHHFHHHHHHHHHFCCDDHFFHGFHHHBBHFHHFFF@FEFCCBBEE=:	RG:Z:0
+44155520	133	*	0	0	*	17	41276001	0	ATCCCAAATTAATACACTCTTAGAGTGTCCCATCTGGTAAGTCAGCACAAGAGTGTATTAATTTGGGATAGATCGGAAGAGCGTCGTGTAGGGAAAGAGG	HHHHHHHHHHHHHHHHHHHHHGHFHDHFHFHHHHHHHFHBHEHHFHHHHHGGGBGEHGGIHHHHHDHEHHEHHHHBHHHHHFFFFFEHEECHEBDEFEF#	RG:Z:0
+44155522	133	*	0	0	*	17	41276005	0	CAAATTAATACACTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGGTAAGTCAGCACAAGAGTGTA	HHHHHHHHHHHGHHHGHHHHGHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHFHEHGHHHHGHHHBHEHFHHHEF3FF######	RG:Z:0
+44155523	133	*	0	0	*	17	41276005	0	CAAATTAATACACTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGGTAAGTCAGCACAAGAGTGTA	FGDBGEFFDGGDEFGFFGFG=EACE>CBDDFCFBBDBCCDEGGFEEEE=ECADDFFGD@BGFFEEC8EEE=EEGDBDDDEGBDFG7@B>BAGBADGDEEB	RG:Z:0
+44155531	165	*	0	0	*	17	41276036	0	AAGTTCATTGGGACACTCTAAGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTGTTCCAAT	HFHHEHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHACFIFGIGHHHFHFGHHHHHFHHFF5HH	RG:Z:0
+44155532	165	*	0	0	*	17	41276056	0	AGAAAGAAATGGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTAGATCGGAAGAGCGTCGT	HHHHHHHHHHHHHHHHHGHHHHHHHHHHHFHHHHHHGHHHHFGHHGHHHHHHHHHHHHHHHEHHFGBGGGFHHHHHHDHHHHHHFGHHC:EA9BEEDDGB	RG:Z:0
+44155533	165	*	0	0	*	17	41276058	0	AGAAAGAAATGGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTAGATCGGAAGAGCGTCGT	HHHHHHHHHHHHHHHHHHHHHHDEHEHHHHEHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHGHEHHEHHHHHHHHHHHHEHHHHHFHHFHHHEEHF9	RG:Z:0
+44155535	165	*	0	0	*	17	41276061	0	AGAAAGAAATGGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTAGATCGGAAGAGCGTCGT	HHHHHHFHHHHHHHHHHHHGHHHFHHHHHFHHHHHFHHHHHHHHHHFHHHGFHHFGHHHHHHHHHEFHHHHHGHHGGHHGHHHHEGH=CHG@E<G@CEA?	RG:Z:0
+44155536	165	*	0	0	*	17	41276063	0	AACAGAAAGAAATGGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTGTTAGATCGGAAGAG	HHHHHHHHHFHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHHHHFHHHHEHEHHHEHGHHHFEHFHHHHHHHHHFHEHHGHFHHHHFBFHHHHHF	RG:Z:0
Binary file test-data/bam_dump_result.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list_pe	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,1 @@
+DRR015708
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list_pe2	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,1 @@
+ERR027433
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list_se	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,1 @@
+SRR1993644
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list_sra	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,2 @@
+SRR522874
+SRR002702
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sra_manifest.tabular	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,2 @@
+Run	Assay Type	BioProject	BioSample	Center Name	Consent	DATASTORE filetype	DATASTORE provider	DATASTORE region	Experiment	Instrument	LibraryLayout	LibrarySelection	LibrarySource	Organism	Platform	Sample Name	SRA Study	Bytes	ReleaseDate	Library Name	Collection_Date	isolate	AvgSpotLen	Bases	BioSampleModel	Host	geo_loc_name_country	geo_loc_name_country_continent	geo_loc_name	host_disease	collected_by	isolation_source	lat_lon	host_sex	passage_history	strain	host_age	host_subject_id	ENA-FIRST-PUBLIC (run)	ENA-FIRST-PUBLIC	ENA-LAST-UPDATE (run)	ENA-LAST-UPDATE	geographic_location_(country_and/or_sea)	host_health_state	collecting_institution	collector_name	geographic_location_(region_and_locality)	host_common_name	host_scientific_name	gisaid_id	isolation_source_host-associated	sample_capture_status	isolate (run)	ARTIC_barcode_identifiers	host_tissue_sampled	library_id	AssemblyName	GEO_Accession (exp)	source_name	tissue	barcode_identifiers	Cell_type	host_description	sample_type	age_event	Age	Biomaterial_provider	cell_number	cell_processing_protocol	cell_quality	disease	disease_stage	ETHNICITY	health_state	sex	single_cell	synthetic	tissue_processing	cell_subsets	patient_group	disease_state	subject_status	tissue/cell_type	sample_origin	organ	host_disease_stage	stock_production_date	culture_collection	COUNTRY	Extraction-method	host_disease_outcome	identification_method	Laboratory_Host	link_addit_analys	ref_biomaterial	datatype (run)
+SRR11953971	RNA-Seq	PRJNA622837	SAMN14938999	BROAD INSTITUTE OF HARVARD AND MIT	public	bam,sra	gs,ncbi,s3	gs.US,ncbi.public,s3.us-east-1	SRX8498148	Illumina NovaSeq 6000	PAIRED	cDNA	VIRAL RNA	Severe acute respiratory syndrome coronavirus 2	ILLUMINA	MA_MGH_00524	SRP266465	207470	2020-06-08T00:00:00Z	SAMN14938999_ERCC-00040_RandomHexamer_NexteraXT	2020-03-17	not applicable	202	415514	Pathogen.cl	Homo sapiens	USA	North America	USA: Massachusetts	COVID-19	Massachusetts General Hospital	nasopharyngeal swab	missing		not applicable	not applicable		MA_MGH_00524																																																										
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sra_pileup_result.pileup	Thu Feb 02 18:11:05 2023 -0500
@@ -0,0 +1,10 @@
+CM000679.1	41277491	A	8	....,,,,	E4BHHHHG
+CM000679.1	41277492	T	8	....,,,,	G<FHCHEF
+CM000679.1	41277493	C	8	....,,,,	H7HGHEE8
+CM000679.1	41277494	A	8	....,,,,	ECGFHHHG
+CM000679.1	41277495	A	8	....,,,,	F@GGHHHE
+CM000679.1	41277496	G	8	....,,,,	DAGFHHHG
+CM000679.1	41277497	G	8	....,,,,	=AE@HHHF
+CM000679.1	41277498	T	8	....,,,,	F.?GFGFG
+CM000679.1	41277499	A	8	....,,,,	G:EGHGH=
+CM000679.1	41277500	C	8	....,,,,	E:EGHGGE