Mercurial > repos > jpayne > seqsero_v2
annotate SeqSero2/deinterleave_fastq.sh @ 14:496f5f2b5e75
Uploaded
author | estrain |
---|---|
date | Tue, 14 Feb 2023 12:11:27 -0500 |
parents | fae43708974d |
children |
rev | line source |
---|---|
jpayne@1 | 1 #!/bin/bash |
jpayne@1 | 2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress] |
jpayne@1 | 3 # |
jpayne@1 | 4 # Deinterleaves a FASTQ file of paired reads into two FASTQ |
jpayne@1 | 5 # files specified on the command line. Optionally GZip compresses the output |
jpayne@1 | 6 # FASTQ files using pigz if the 3rd command line argument is the word "compress" |
jpayne@1 | 7 # |
jpayne@1 | 8 # Can deinterleave 100 million paired reads (200 million total |
jpayne@1 | 9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) |
jpayne@1 | 10 # |
jpayne@1 | 11 # Latest code: https://gist.github.com/3521724 |
jpayne@1 | 12 # Also see my interleaving script: https://gist.github.com/4544979 |
jpayne@1 | 13 # |
jpayne@1 | 14 # Inspired by Torsten Seemann's blog post: |
jpayne@1 | 15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html |
jpayne@1 | 16 |
jpayne@1 | 17 # Set up some defaults |
jpayne@1 | 18 GZIP_OUTPUT=0 |
jpayne@1 | 19 PIGZ_COMPRESSION_THREADS=10 |
jpayne@1 | 20 |
jpayne@1 | 21 # If the third argument is the word "compress" then we'll compress the output using pigz |
jpayne@1 | 22 if [[ $3 == "compress" ]]; then |
jpayne@1 | 23 GZIP_OUTPUT=1 |
jpayne@1 | 24 fi |
jpayne@1 | 25 |
jpayne@1 | 26 if [[ ${GZIP_OUTPUT} == 0 ]]; then |
jpayne@1 | 27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2 |
jpayne@1 | 28 else |
jpayne@1 | 29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2 |
jpayne@1 | 30 fi |