annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/cutgff.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/bin/bash
jpayne@69 2
jpayne@69 3 usage(){
jpayne@69 4 echo "
jpayne@69 5 Written by Brian Bushnell
jpayne@69 6 Last modified October 15, 2019
jpayne@69 7
jpayne@69 8 Description: Cuts out features defined by a gff file, and writes them
jpayne@69 9 to a new fasta. Features are output in their sense strand.
jpayne@69 10
jpayne@69 11 Usage: cutgff.sh in=<fna file> gff=<gff file> out=<fna file>
jpayne@69 12
jpayne@69 13 in= is optional, and gff filenames will be automaitically assumed based on
jpayne@69 14 the fasta name if not specified. This allows running on multiple files
jpayne@69 15 like this:
jpayne@69 16
jpayne@69 17 cutgff.sh types=rRNA out=16S.fa minlen=1440 maxlen=1620 attributes=16S bacteria/*.fna.gz
jpayne@69 18
jpayne@69 19
jpayne@69 20 File Parameters:
jpayne@69 21 in=<file> Input FNA (fasta) file.
jpayne@69 22 gff=<file> Input GFF file (optional).
jpayne@69 23 out=<file> Output FNA file.
jpayne@69 24
jpayne@69 25 Other Parameters:
jpayne@69 26 types=CDS Types of features to cut.
jpayne@69 27 invert=false Invert selection: rather outputting the features,
jpayne@69 28 mask them with Ns in the original sequences.
jpayne@69 29 attributes= A comma-delimited list of strings. If present, one of
jpayne@69 30 these strings must be in the gff line attributes.
jpayne@69 31 bannedattributes= A comma-delimited list of banned strings.
jpayne@69 32 banpartial=t Ignore lines with 'partial=true' in attributes.
jpayne@69 33 minlen=1 Ignore lines shorter than this.
jpayne@69 34 maxlen=2147483647 Ignore lines longer than this.
jpayne@69 35 renamebytaxid=f Rename sequences with their taxID. Input sequences
jpayne@69 36 must be named appropriately, e.g. in NCBI format.
jpayne@69 37 taxmode=accession Valid modes are:
jpayne@69 38 accession: Sequence names must start with an accession.
jpayne@69 39 gi: Seqence names must start with gi|number
jpayne@69 40 taxid: Sequence names must start with tid|number
jpayne@69 41 header: Best effort for various header formats.
jpayne@69 42 requirepresent=t Crash if a taxID cannot be found for a sequence.
jpayne@69 43 oneperfile=f Only output one sequence per file.
jpayne@69 44 align=f Align ribosomal sequences to consensus (if available);
jpayne@69 45 discard those with low identity, and flip those
jpayne@69 46 annotated on the wrong strand.
jpayne@69 47 maxns=-1 If non-negative, ignore features with more than this many
jpayne@69 48 undefined bases (Ns or IUPAC symbols).
jpayne@69 49 maxnfraction=-1.0 If non-negative, ignore features with more than this
jpayne@69 50 fraction of undefined bases (Ns or IUPAC symbols).
jpayne@69 51 Should be 0.0 to 1.0.
jpayne@69 52 "
jpayne@69 53 }
jpayne@69 54
jpayne@69 55 #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69 56 pushd . > /dev/null
jpayne@69 57 DIR="${BASH_SOURCE[0]}"
jpayne@69 58 while [ -h "$DIR" ]; do
jpayne@69 59 cd "$(dirname "$DIR")"
jpayne@69 60 DIR="$(readlink "$(basename "$DIR")")"
jpayne@69 61 done
jpayne@69 62 cd "$(dirname "$DIR")"
jpayne@69 63 DIR="$(pwd)/"
jpayne@69 64 popd > /dev/null
jpayne@69 65
jpayne@69 66 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69 67 CP="$DIR""current/"
jpayne@69 68
jpayne@69 69 z="-Xmx200m"
jpayne@69 70 set=0
jpayne@69 71
jpayne@69 72 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
jpayne@69 73 usage
jpayne@69 74 exit
jpayne@69 75 fi
jpayne@69 76
jpayne@69 77 calcXmx () {
jpayne@69 78 source "$DIR""/calcmem.sh"
jpayne@69 79 setEnvironment
jpayne@69 80 parseXmx "$@"
jpayne@69 81 }
jpayne@69 82 calcXmx "$@"
jpayne@69 83
jpayne@69 84 gff() {
jpayne@69 85 local CMD="java $EA $EOOM $z -cp $CP gff.CutGff $@"
jpayne@69 86 # echo $CMD >&2
jpayne@69 87 eval $CMD
jpayne@69 88 }
jpayne@69 89
jpayne@69 90 gff "$@"