jpayne@69: #!/bin/bash jpayne@69: jpayne@69: usage(){ jpayne@69: echo " jpayne@69: Written by Brian Bushnell jpayne@69: Last modified October 15, 2019 jpayne@69: jpayne@69: Description: Cuts out features defined by a gff file, and writes them jpayne@69: to a new fasta. Features are output in their sense strand. jpayne@69: jpayne@69: Usage: cutgff.sh in= gff= out= jpayne@69: jpayne@69: in= is optional, and gff filenames will be automaitically assumed based on jpayne@69: the fasta name if not specified. This allows running on multiple files jpayne@69: like this: jpayne@69: jpayne@69: cutgff.sh types=rRNA out=16S.fa minlen=1440 maxlen=1620 attributes=16S bacteria/*.fna.gz jpayne@69: jpayne@69: jpayne@69: File Parameters: jpayne@69: in= Input FNA (fasta) file. jpayne@69: gff= Input GFF file (optional). jpayne@69: out= Output FNA file. jpayne@69: jpayne@69: Other Parameters: jpayne@69: types=CDS Types of features to cut. jpayne@69: invert=false Invert selection: rather outputting the features, jpayne@69: mask them with Ns in the original sequences. jpayne@69: attributes= A comma-delimited list of strings. If present, one of jpayne@69: these strings must be in the gff line attributes. jpayne@69: bannedattributes= A comma-delimited list of banned strings. jpayne@69: banpartial=t Ignore lines with 'partial=true' in attributes. jpayne@69: minlen=1 Ignore lines shorter than this. jpayne@69: maxlen=2147483647 Ignore lines longer than this. jpayne@69: renamebytaxid=f Rename sequences with their taxID. Input sequences jpayne@69: must be named appropriately, e.g. in NCBI format. jpayne@69: taxmode=accession Valid modes are: jpayne@69: accession: Sequence names must start with an accession. jpayne@69: gi: Seqence names must start with gi|number jpayne@69: taxid: Sequence names must start with tid|number jpayne@69: header: Best effort for various header formats. jpayne@69: requirepresent=t Crash if a taxID cannot be found for a sequence. jpayne@69: oneperfile=f Only output one sequence per file. jpayne@69: align=f Align ribosomal sequences to consensus (if available); jpayne@69: discard those with low identity, and flip those jpayne@69: annotated on the wrong strand. jpayne@69: maxns=-1 If non-negative, ignore features with more than this many jpayne@69: undefined bases (Ns or IUPAC symbols). jpayne@69: maxnfraction=-1.0 If non-negative, ignore features with more than this jpayne@69: fraction of undefined bases (Ns or IUPAC symbols). jpayne@69: Should be 0.0 to 1.0. jpayne@69: " jpayne@69: } jpayne@69: jpayne@69: #This block allows symlinked shellscripts to correctly set classpath. jpayne@69: pushd . > /dev/null jpayne@69: DIR="${BASH_SOURCE[0]}" jpayne@69: while [ -h "$DIR" ]; do jpayne@69: cd "$(dirname "$DIR")" jpayne@69: DIR="$(readlink "$(basename "$DIR")")" jpayne@69: done jpayne@69: cd "$(dirname "$DIR")" jpayne@69: DIR="$(pwd)/" jpayne@69: popd > /dev/null jpayne@69: jpayne@69: #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" jpayne@69: CP="$DIR""current/" jpayne@69: jpayne@69: z="-Xmx200m" jpayne@69: set=0 jpayne@69: jpayne@69: if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then jpayne@69: usage jpayne@69: exit jpayne@69: fi jpayne@69: jpayne@69: calcXmx () { jpayne@69: source "$DIR""/calcmem.sh" jpayne@69: setEnvironment jpayne@69: parseXmx "$@" jpayne@69: } jpayne@69: calcXmx "$@" jpayne@69: jpayne@69: gff() { jpayne@69: local CMD="java $EA $EOOM $z -cp $CP gff.CutGff $@" jpayne@69: # echo $CMD >&2 jpayne@69: eval $CMD jpayne@69: } jpayne@69: jpayne@69: gff "$@"