annotate SeqSero2/bin/deinterleave_fastq.sh @ 18:a573df21536d tip

planemo upload
author jpayne
date Wed, 26 Mar 2025 00:28:55 -0400
parents 03f7b358d57f
children
rev   line source
jpayne@17 1 #!/bin/bash
jpayne@17 2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress]
jpayne@17 3 #
jpayne@17 4 # Deinterleaves a FASTQ file of paired reads into two FASTQ
jpayne@17 5 # files specified on the command line. Optionally GZip compresses the output
jpayne@17 6 # FASTQ files using pigz if the 3rd command line argument is the word "compress"
jpayne@17 7 #
jpayne@17 8 # Can deinterleave 100 million paired reads (200 million total
jpayne@17 9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s)
jpayne@17 10 #
jpayne@17 11 # Latest code: https://gist.github.com/3521724
jpayne@17 12 # Also see my interleaving script: https://gist.github.com/4544979
jpayne@17 13 #
jpayne@17 14 # Inspired by Torsten Seemann's blog post:
jpayne@17 15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html
jpayne@17 16
jpayne@17 17 # Set up some defaults
jpayne@17 18 GZIP_OUTPUT=0
jpayne@17 19 PIGZ_COMPRESSION_THREADS=10
jpayne@17 20
jpayne@17 21 # If the third argument is the word "compress" then we'll compress the output using pigz
jpayne@17 22 if [[ $3 == "compress" ]]; then
jpayne@17 23 GZIP_OUTPUT=1
jpayne@17 24 fi
jpayne@17 25
jpayne@17 26 if [[ ${GZIP_OUTPUT} == 0 ]]; then
jpayne@17 27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2
jpayne@17 28 else
jpayne@17 29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2
jpayne@17 30 fi