jpayne@69: #!/bin/bash jpayne@69: jpayne@69: usage(){ jpayne@69: echo " jpayne@69: Written by Brian Bushnell jpayne@69: Last modified February 10, 2020 jpayne@69: jpayne@69: Description: Finds orfs and calls genes in unspliced prokaryotes. jpayne@69: This includes bacteria, archaea, viruses, and mitochondria. jpayne@69: Can also predict 16S, 23S, 5S, and tRNAs. jpayne@69: jpayne@69: Usage: callgenes.sh in=contigs.fa out=calls.gff outa=aminos.faa out16S=16S.fa jpayne@69: jpayne@69: File parameters: jpayne@69: in= A fasta file; the only required parameter. jpayne@69: out= Output gff file. jpayne@69: outa= Amino acid output. jpayne@69: out16s= 16S output. jpayne@69: model= A pgm file or comma-delimited list. jpayne@69: If unspecified a default model will be used. jpayne@69: stats=stderr Stats output (may be stderr, stdin, a file, or null). jpayne@69: hist=null Gene length histogram. jpayne@69: compareto= Optional reference gff file to compare with the gene calls. jpayne@69: 'auto' will name it based on the input file name. jpayne@69: jpayne@69: Formatting parameters: jpayne@69: json=false Print stats in JSON. jpayne@69: binlen=20 Histogram bin length. jpayne@69: bins=2000 Maximum histogram bins. jpayne@69: pz=f (printzero) Print histogram lines with zero count. jpayne@69: jpayne@69: jpayne@69: jpayne@69: Other parameters: jpayne@69: minlen=60 Don't call genes shorter than this. jpayne@69: trd=f (trimreaddescription) Set to true to trim read headers after jpayne@69: the first whitespace. Necessary for IGV. jpayne@69: merge=f For paired reads, merge before calling. jpayne@69: detranslate=f Output canonical nucleotide sequences instead of amino acids. jpayne@69: recode=f Re-encode nucleotide sequences over called genes, leaving jpayne@69: non-coding regions unchanged. jpayne@69: jpayne@69: Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. jpayne@69: " jpayne@69: } jpayne@69: jpayne@69: #This block allows symlinked shellscripts to correctly set classpath. jpayne@69: pushd . > /dev/null jpayne@69: DIR="${BASH_SOURCE[0]}" jpayne@69: while [ -h "$DIR" ]; do jpayne@69: cd "$(dirname "$DIR")" jpayne@69: DIR="$(readlink "$(basename "$DIR")")" jpayne@69: done jpayne@69: cd "$(dirname "$DIR")" jpayne@69: DIR="$(pwd)/" jpayne@69: popd > /dev/null jpayne@69: jpayne@69: #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" jpayne@69: CP="$DIR""current/" jpayne@69: jpayne@69: z="-Xmx6g" jpayne@69: z2="-Xms6g" jpayne@69: set=0 jpayne@69: jpayne@69: if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then jpayne@69: usage jpayne@69: exit jpayne@69: fi jpayne@69: jpayne@69: calcXmx () { jpayne@69: source "$DIR""/calcmem.sh" jpayne@69: setEnvironment jpayne@69: parseXmx "$@" jpayne@69: } jpayne@69: calcXmx "$@" jpayne@69: jpayne@69: function callgenes() { jpayne@69: local CMD="java $EA $EOOM $z $z2 -cp $CP prok.CallGenes $@" jpayne@69: #Too long to echo sometimes since wildcards can be expanded jpayne@69: #echo $CMD >&2 jpayne@69: eval $CMD jpayne@69: } jpayne@69: jpayne@69: callgenes "$@"