Mercurial > repos > jpayne > seqsero2s
diff SeqSero2S/bin/deinterleave_fastq.sh @ 19:cfc91e1d2c9b draft
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
| author | jpayne |
|---|---|
| date | Fri, 15 May 2026 17:50:45 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SeqSero2S/bin/deinterleave_fastq.sh Fri May 15 17:50:45 2026 +0000 @@ -0,0 +1,30 @@ +#!/bin/bash +# Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress] +# +# Deinterleaves a FASTQ file of paired reads into two FASTQ +# files specified on the command line. Optionally GZip compresses the output +# FASTQ files using pigz if the 3rd command line argument is the word "compress" +# +# Can deinterleave 100 million paired reads (200 million total +# reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) +# +# Latest code: https://gist.github.com/3521724 +# Also see my interleaving script: https://gist.github.com/4544979 +# +# Inspired by Torsten Seemann's blog post: +# http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html + +# Set up some defaults +GZIP_OUTPUT=0 +PIGZ_COMPRESSION_THREADS=10 + +# If the third argument is the word "compress" then we'll compress the output using pigz +if [[ $3 == "compress" ]]; then + GZIP_OUTPUT=1 +fi + +if [[ ${GZIP_OUTPUT} == 0 ]]; then + paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2 +else + paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2 +fi
