Mercurial > repos > jpayne > seqsero_v2
comparison SeqSero2/deinterleave_fastq.sh @ 1:fae43708974d
planemo upload
author | jpayne |
---|---|
date | Fri, 09 Nov 2018 11:30:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:4ff2aee11e5b | 1:fae43708974d |
---|---|
1 #!/bin/bash | |
2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress] | |
3 # | |
4 # Deinterleaves a FASTQ file of paired reads into two FASTQ | |
5 # files specified on the command line. Optionally GZip compresses the output | |
6 # FASTQ files using pigz if the 3rd command line argument is the word "compress" | |
7 # | |
8 # Can deinterleave 100 million paired reads (200 million total | |
9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) | |
10 # | |
11 # Latest code: https://gist.github.com/3521724 | |
12 # Also see my interleaving script: https://gist.github.com/4544979 | |
13 # | |
14 # Inspired by Torsten Seemann's blog post: | |
15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html | |
16 | |
17 # Set up some defaults | |
18 GZIP_OUTPUT=0 | |
19 PIGZ_COMPRESSION_THREADS=10 | |
20 | |
21 # If the third argument is the word "compress" then we'll compress the output using pigz | |
22 if [[ $3 == "compress" ]]; then | |
23 GZIP_OUTPUT=1 | |
24 fi | |
25 | |
26 if [[ ${GZIP_OUTPUT} == 0 ]]; then | |
27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2 | |
28 else | |
29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2 | |
30 fi |