jpayne@69: #!/bin/bash
jpayne@69:
jpayne@69: usage(){
jpayne@69: echo "
jpayne@69: Written by Brian Bushnell
jpayne@69: Last modified August 1, 2017
jpayne@69:
jpayne@69: Description: Runs stats.sh on multiple assemblies to produce one output line per file.
jpayne@69:
jpayne@69: Usage: statswrapper.sh in=
jpayne@69:
jpayne@69: Parameters:
jpayne@69: in= Specify the input fasta file, or stdin. For multiple files a, b, and c: 'statswrapper.sh in=a,b,c'.
jpayne@69: 'in=' may be omitted if this is the first arg, and asterisks may be used; e.g. statswrapper.sh *.fa
jpayne@69: gc= Writes ACGTN content per scaffold to a file.
jpayne@69: gchist= Filename to output scaffold gc content histogram.
jpayne@69: gcbins=<200> Number of bins for gc histogram.
jpayne@69: n=<10> Number of contiguous Ns to signify a break between contigs.
jpayne@69: k=<13> Estimate memory usage of BBMap with this kmer length.
jpayne@69: minscaf=<0> Ignore scaffolds shorter than this.
jpayne@69: n_= This flag will prefix the terms 'contigs' and 'scaffolds' with 'n_' in formats 3-6.
jpayne@69: addname= Adds a column for input file name, for formats 3-6.
jpayne@69:
jpayne@69: format=<1 through 6> Format of the stats information. Default is format=3.
jpayne@69: format=1 uses variable units like MB and KB, and is designed for compatibility with existing tools.
jpayne@69: format=2 uses only whole numbers of bases, with no commas in numbers, and is designed for machine parsing.
jpayne@69: format=3 outputs stats in 2 rows of tab-delimited columns: a header row and a data row.
jpayne@69: format=4 is like 3 but with scaffold data only.
jpayne@69: format=5 is like 3 but with contig data only.
jpayne@69: format=6 is like 3 but the header starts with a #.
jpayne@69:
jpayne@69: gcformat=<1 or 2> Select GC output format.
jpayne@69: gcformat=1: name start stop A C G T N GC
jpayne@69: gcformat=2: name GC
jpayne@69: Note that in gcformat 1, A+C+G+T=1 even when N is nonzero.
jpayne@69: "
jpayne@69: }
jpayne@69:
jpayne@69: #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69: pushd . > /dev/null
jpayne@69: DIR="${BASH_SOURCE[0]}"
jpayne@69: while [ -h "$DIR" ]; do
jpayne@69: cd "$(dirname "$DIR")"
jpayne@69: DIR="$(readlink "$(basename "$DIR")")"
jpayne@69: done
jpayne@69: cd "$(dirname "$DIR")"
jpayne@69: DIR="$(pwd)/"
jpayne@69: popd > /dev/null
jpayne@69:
jpayne@69: #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69: CP="$DIR""current/"
jpayne@69:
jpayne@69: z="-Xmx200m"
jpayne@69: set=0
jpayne@69:
jpayne@69: if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
jpayne@69: usage
jpayne@69: exit
jpayne@69: fi
jpayne@69:
jpayne@69: calcXmx () {
jpayne@69: source "$DIR""/calcmem.sh"
jpayne@69: setEnvironment
jpayne@69: parseXmx "$@"
jpayne@69: }
jpayne@69: calcXmx "$@"
jpayne@69:
jpayne@69: stats() {
jpayne@69: local CMD="java $EA $EOOM $z -cp $CP jgi.AssemblyStatsWrapper format=3 $@"
jpayne@69: echo $CMD >&2
jpayne@69: eval $CMD
jpayne@69: }
jpayne@69:
jpayne@69: stats "$@"