jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified March 6, 2015
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Splits Nextera LMP libraries into subsets based on linker orientation:
|
jpayne@69
|
9 LMP, fragment, unknown, and singleton.
|
jpayne@69
|
10 Please read bbmap/docs/guides/SplitNexteraGuide.txt for more information.
|
jpayne@69
|
11
|
jpayne@69
|
12 Usage: splitnextera.sh in=<file> out=<file> outf=<file> outu=<file> outs=<file>
|
jpayne@69
|
13
|
jpayne@69
|
14 For pairs in two files, use in1, in2, out1, out2, etc.
|
jpayne@69
|
15
|
jpayne@69
|
16 *** Note ***
|
jpayne@69
|
17 For maximal speed, before running splitnextera, the linkers can be replaced with a constant first.
|
jpayne@69
|
18
|
jpayne@69
|
19 In other words, you can either do this (which is slightly faster):
|
jpayne@69
|
20 bbduk.sh in=reads.fq out=replaced.fq ktmask=J k=19 hdist=1 mink=11 hdist2=0 literal=CTGTCTCTTATACACATCTAGATGTGTATAAGAGACAG
|
jpayne@69
|
21 splitnextera.sh in=replaced.fq out=longmate.fq outf=frag.fq outu=unknown.fq outs=singleton.fq
|
jpayne@69
|
22
|
jpayne@69
|
23 Or this:
|
jpayne@69
|
24 splitnextera.sh in=reads.fq out=longmate.fq outf=frag.fq outu=unknown.fq outs=singleton.fq mask=t
|
jpayne@69
|
25
|
jpayne@69
|
26
|
jpayne@69
|
27 I/O parameters:
|
jpayne@69
|
28 in=<file> Input reads. Set to 'stdin.fq' to read from stdin.
|
jpayne@69
|
29 out=<file> Output for pairs with LMP orientation.
|
jpayne@69
|
30 outf=<file> Output for pairs with fragment orientation.
|
jpayne@69
|
31 outu=<file> Pairs with unknown orientation.
|
jpayne@69
|
32 outs=<file> Singleton output.
|
jpayne@69
|
33 ow=f (overwrite) Overwrites files that already exist.
|
jpayne@69
|
34 app=f (append) Append to files that already exist.
|
jpayne@69
|
35 zl=4 (ziplevel) Set compression level, 1 (low) to 9 (max).
|
jpayne@69
|
36 int=f (interleaved) Determines whether INPUT file is considered interleaved.
|
jpayne@69
|
37 qin=auto ASCII offset for input quality. May be 33 (Sanger), 64 (Illumina), or auto.
|
jpayne@69
|
38 qout=auto ASCII offset for output quality. May be 33 (Sanger), 64 (Illumina), or auto (same as input).
|
jpayne@69
|
39
|
jpayne@69
|
40 Processing Parameters:
|
jpayne@69
|
41 mask=f Set to true if you did not already convert junctions to some symbol, and it will be done automatically.
|
jpayne@69
|
42 junction=J Look for this symbol to designate the junction bases.
|
jpayne@69
|
43 innerlmp=f Generate long mate pairs from the inner pair also, when the junction is found in both reads.
|
jpayne@69
|
44 rename=t Rename read 2 of output when using single-ended input.
|
jpayne@69
|
45 minlength=40 (ml) Do not output reads shorter than this.
|
jpayne@69
|
46 merge=f Attempt to merge overlapping reads before looking for junctions.
|
jpayne@69
|
47 testmerge=0.0 If nonzero, only merge reads if at least the fraction of input reads are mergable.
|
jpayne@69
|
48
|
jpayne@69
|
49 Sampling parameters:
|
jpayne@69
|
50
|
jpayne@69
|
51 reads=-1 Set to a positive number to only process this many INPUT reads (or pairs), then quit.
|
jpayne@69
|
52 samplerate=1 Randomly output only this fraction of reads; 1 means sampling is disabled.
|
jpayne@69
|
53 sampleseed=-1 Set to a positive number to use that prng seed for sampling (allowing deterministic sampling).
|
jpayne@69
|
54
|
jpayne@69
|
55 Java Parameters:
|
jpayne@69
|
56 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
57 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
|
jpayne@69
|
58 The max is typically 85% of physical memory.
|
jpayne@69
|
59 -eoom This flag will cause the process to exit if an out-of-memory
|
jpayne@69
|
60 exception occurs. Requires Java 8u92+.
|
jpayne@69
|
61 -da Disable assertions.
|
jpayne@69
|
62
|
jpayne@69
|
63 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
64 "
|
jpayne@69
|
65 }
|
jpayne@69
|
66
|
jpayne@69
|
67 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
68 pushd . > /dev/null
|
jpayne@69
|
69 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
70 while [ -h "$DIR" ]; do
|
jpayne@69
|
71 cd "$(dirname "$DIR")"
|
jpayne@69
|
72 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
73 done
|
jpayne@69
|
74 cd "$(dirname "$DIR")"
|
jpayne@69
|
75 DIR="$(pwd)/"
|
jpayne@69
|
76 popd > /dev/null
|
jpayne@69
|
77
|
jpayne@69
|
78 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
79 CP="$DIR""current/"
|
jpayne@69
|
80
|
jpayne@69
|
81 z="-Xmx200m"
|
jpayne@69
|
82 set=0
|
jpayne@69
|
83
|
jpayne@69
|
84 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
85 usage
|
jpayne@69
|
86 exit
|
jpayne@69
|
87 fi
|
jpayne@69
|
88
|
jpayne@69
|
89 calcXmx () {
|
jpayne@69
|
90 source "$DIR""/calcmem.sh"
|
jpayne@69
|
91 setEnvironment
|
jpayne@69
|
92 parseXmx "$@"
|
jpayne@69
|
93 }
|
jpayne@69
|
94 calcXmx "$@"
|
jpayne@69
|
95
|
jpayne@69
|
96 function splitnextera() {
|
jpayne@69
|
97 local CMD="java $EA $EOOM $z -cp $CP jgi.SplitNexteraLMP $@"
|
jpayne@69
|
98 echo $CMD >&2
|
jpayne@69
|
99 eval $CMD
|
jpayne@69
|
100 }
|
jpayne@69
|
101
|
jpayne@69
|
102 splitnextera "$@"
|