jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified August 1, 2017
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Runs stats.sh on multiple assemblies to produce one output line per file.
|
jpayne@69
|
9
|
jpayne@69
|
10 Usage: statswrapper.sh in=<input file>
|
jpayne@69
|
11
|
jpayne@69
|
12 Parameters:
|
jpayne@69
|
13 in=<file> Specify the input fasta file, or stdin. For multiple files a, b, and c: 'statswrapper.sh in=a,b,c'.
|
jpayne@69
|
14 'in=' may be omitted if this is the first arg, and asterisks may be used; e.g. statswrapper.sh *.fa
|
jpayne@69
|
15 gc=<file> Writes ACGTN content per scaffold to a file.
|
jpayne@69
|
16 gchist=<file> Filename to output scaffold gc content histogram.
|
jpayne@69
|
17 gcbins=<200> Number of bins for gc histogram.
|
jpayne@69
|
18 n=<10> Number of contiguous Ns to signify a break between contigs.
|
jpayne@69
|
19 k=<13> Estimate memory usage of BBMap with this kmer length.
|
jpayne@69
|
20 minscaf=<0> Ignore scaffolds shorter than this.
|
jpayne@69
|
21 n_=<t> This flag will prefix the terms 'contigs' and 'scaffolds' with 'n_' in formats 3-6.
|
jpayne@69
|
22 addname=<t> Adds a column for input file name, for formats 3-6.
|
jpayne@69
|
23
|
jpayne@69
|
24 format=<1 through 6> Format of the stats information. Default is format=3.
|
jpayne@69
|
25 format=1 uses variable units like MB and KB, and is designed for compatibility with existing tools.
|
jpayne@69
|
26 format=2 uses only whole numbers of bases, with no commas in numbers, and is designed for machine parsing.
|
jpayne@69
|
27 format=3 outputs stats in 2 rows of tab-delimited columns: a header row and a data row.
|
jpayne@69
|
28 format=4 is like 3 but with scaffold data only.
|
jpayne@69
|
29 format=5 is like 3 but with contig data only.
|
jpayne@69
|
30 format=6 is like 3 but the header starts with a #.
|
jpayne@69
|
31
|
jpayne@69
|
32 gcformat=<1 or 2> Select GC output format.
|
jpayne@69
|
33 gcformat=1: name start stop A C G T N GC
|
jpayne@69
|
34 gcformat=2: name GC
|
jpayne@69
|
35 Note that in gcformat 1, A+C+G+T=1 even when N is nonzero.
|
jpayne@69
|
36 "
|
jpayne@69
|
37 }
|
jpayne@69
|
38
|
jpayne@69
|
39 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
40 pushd . > /dev/null
|
jpayne@69
|
41 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
42 while [ -h "$DIR" ]; do
|
jpayne@69
|
43 cd "$(dirname "$DIR")"
|
jpayne@69
|
44 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
45 done
|
jpayne@69
|
46 cd "$(dirname "$DIR")"
|
jpayne@69
|
47 DIR="$(pwd)/"
|
jpayne@69
|
48 popd > /dev/null
|
jpayne@69
|
49
|
jpayne@69
|
50 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
51 CP="$DIR""current/"
|
jpayne@69
|
52
|
jpayne@69
|
53 z="-Xmx200m"
|
jpayne@69
|
54 set=0
|
jpayne@69
|
55
|
jpayne@69
|
56 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
57 usage
|
jpayne@69
|
58 exit
|
jpayne@69
|
59 fi
|
jpayne@69
|
60
|
jpayne@69
|
61 calcXmx () {
|
jpayne@69
|
62 source "$DIR""/calcmem.sh"
|
jpayne@69
|
63 setEnvironment
|
jpayne@69
|
64 parseXmx "$@"
|
jpayne@69
|
65 }
|
jpayne@69
|
66 calcXmx "$@"
|
jpayne@69
|
67
|
jpayne@69
|
68 stats() {
|
jpayne@69
|
69 local CMD="java $EA $EOOM $z -cp $CP jgi.AssemblyStatsWrapper format=3 $@"
|
jpayne@69
|
70 echo $CMD >&2
|
jpayne@69
|
71 eval $CMD
|
jpayne@69
|
72 }
|
jpayne@69
|
73
|
jpayne@69
|
74 stats "$@"
|