estrain@2: #!/bin/bash
estrain@2: # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress]
estrain@2: # 
estrain@2: # Deinterleaves a FASTQ file of paired reads into two FASTQ
estrain@2: # files specified on the command line. Optionally GZip compresses the output
estrain@2: # FASTQ files using pigz if the 3rd command line argument is the word "compress"
estrain@2: # 
estrain@2: # Can deinterleave 100 million paired reads (200 million total
estrain@2: # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s)
estrain@2: # 
estrain@2: # Latest code: https://gist.github.com/3521724
estrain@2: # Also see my interleaving script: https://gist.github.com/4544979
estrain@2: # 
estrain@2: # Inspired by Torsten Seemann's blog post:
estrain@2: # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html
estrain@2: 
estrain@2: # Set up some defaults
estrain@2: GZIP_OUTPUT=0
estrain@2: PIGZ_COMPRESSION_THREADS=10
estrain@2: 
estrain@2: # If the third argument is the word "compress" then we'll compress the output using pigz
estrain@2: if [[ $3 == "compress" ]]; then
estrain@2:   GZIP_OUTPUT=1
estrain@2: fi
estrain@2: 
estrain@2: if [[ ${GZIP_OUTPUT} == 0 ]]; then
estrain@2:   paste - - - - - - - -  | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2
estrain@2: else
estrain@2:   paste - - - - - - - -  | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2
estrain@2: fi