Mercurial > repos > jpayne > seqsero2s
comparison SeqSero2S/bin/deinterleave_fastq.sh @ 19:cfc91e1d2c9b draft
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
| author | jpayne |
|---|---|
| date | Fri, 15 May 2026 17:50:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 18:6ae6c7a51b22 | 19:cfc91e1d2c9b |
|---|---|
| 1 #!/bin/bash | |
| 2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress] | |
| 3 # | |
| 4 # Deinterleaves a FASTQ file of paired reads into two FASTQ | |
| 5 # files specified on the command line. Optionally GZip compresses the output | |
| 6 # FASTQ files using pigz if the 3rd command line argument is the word "compress" | |
| 7 # | |
| 8 # Can deinterleave 100 million paired reads (200 million total | |
| 9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) | |
| 10 # | |
| 11 # Latest code: https://gist.github.com/3521724 | |
| 12 # Also see my interleaving script: https://gist.github.com/4544979 | |
| 13 # | |
| 14 # Inspired by Torsten Seemann's blog post: | |
| 15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html | |
| 16 | |
| 17 # Set up some defaults | |
| 18 GZIP_OUTPUT=0 | |
| 19 PIGZ_COMPRESSION_THREADS=10 | |
| 20 | |
| 21 # If the third argument is the word "compress" then we'll compress the output using pigz | |
| 22 if [[ $3 == "compress" ]]; then | |
| 23 GZIP_OUTPUT=1 | |
| 24 fi | |
| 25 | |
| 26 if [[ ${GZIP_OUTPUT} == 0 ]]; then | |
| 27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2 | |
| 28 else | |
| 29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2 | |
| 30 fi |
