jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified October 15, 2019
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Cuts out features defined by a gff file, and writes them
|
jpayne@69
|
9 to a new fasta. Features are output in their sense strand.
|
jpayne@69
|
10
|
jpayne@69
|
11 Usage: cutgff.sh in=<fna file> gff=<gff file> out=<fna file>
|
jpayne@69
|
12
|
jpayne@69
|
13 in= is optional, and gff filenames will be automaitically assumed based on
|
jpayne@69
|
14 the fasta name if not specified. This allows running on multiple files
|
jpayne@69
|
15 like this:
|
jpayne@69
|
16
|
jpayne@69
|
17 cutgff.sh types=rRNA out=16S.fa minlen=1440 maxlen=1620 attributes=16S bacteria/*.fna.gz
|
jpayne@69
|
18
|
jpayne@69
|
19
|
jpayne@69
|
20 File Parameters:
|
jpayne@69
|
21 in=<file> Input FNA (fasta) file.
|
jpayne@69
|
22 gff=<file> Input GFF file (optional).
|
jpayne@69
|
23 out=<file> Output FNA file.
|
jpayne@69
|
24
|
jpayne@69
|
25 Other Parameters:
|
jpayne@69
|
26 types=CDS Types of features to cut.
|
jpayne@69
|
27 invert=false Invert selection: rather outputting the features,
|
jpayne@69
|
28 mask them with Ns in the original sequences.
|
jpayne@69
|
29 attributes= A comma-delimited list of strings. If present, one of
|
jpayne@69
|
30 these strings must be in the gff line attributes.
|
jpayne@69
|
31 bannedattributes= A comma-delimited list of banned strings.
|
jpayne@69
|
32 banpartial=t Ignore lines with 'partial=true' in attributes.
|
jpayne@69
|
33 minlen=1 Ignore lines shorter than this.
|
jpayne@69
|
34 maxlen=2147483647 Ignore lines longer than this.
|
jpayne@69
|
35 renamebytaxid=f Rename sequences with their taxID. Input sequences
|
jpayne@69
|
36 must be named appropriately, e.g. in NCBI format.
|
jpayne@69
|
37 taxmode=accession Valid modes are:
|
jpayne@69
|
38 accession: Sequence names must start with an accession.
|
jpayne@69
|
39 gi: Seqence names must start with gi|number
|
jpayne@69
|
40 taxid: Sequence names must start with tid|number
|
jpayne@69
|
41 header: Best effort for various header formats.
|
jpayne@69
|
42 requirepresent=t Crash if a taxID cannot be found for a sequence.
|
jpayne@69
|
43 oneperfile=f Only output one sequence per file.
|
jpayne@69
|
44 align=f Align ribosomal sequences to consensus (if available);
|
jpayne@69
|
45 discard those with low identity, and flip those
|
jpayne@69
|
46 annotated on the wrong strand.
|
jpayne@69
|
47 maxns=-1 If non-negative, ignore features with more than this many
|
jpayne@69
|
48 undefined bases (Ns or IUPAC symbols).
|
jpayne@69
|
49 maxnfraction=-1.0 If non-negative, ignore features with more than this
|
jpayne@69
|
50 fraction of undefined bases (Ns or IUPAC symbols).
|
jpayne@69
|
51 Should be 0.0 to 1.0.
|
jpayne@69
|
52 "
|
jpayne@69
|
53 }
|
jpayne@69
|
54
|
jpayne@69
|
55 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
56 pushd . > /dev/null
|
jpayne@69
|
57 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
58 while [ -h "$DIR" ]; do
|
jpayne@69
|
59 cd "$(dirname "$DIR")"
|
jpayne@69
|
60 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
61 done
|
jpayne@69
|
62 cd "$(dirname "$DIR")"
|
jpayne@69
|
63 DIR="$(pwd)/"
|
jpayne@69
|
64 popd > /dev/null
|
jpayne@69
|
65
|
jpayne@69
|
66 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
67 CP="$DIR""current/"
|
jpayne@69
|
68
|
jpayne@69
|
69 z="-Xmx200m"
|
jpayne@69
|
70 set=0
|
jpayne@69
|
71
|
jpayne@69
|
72 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
73 usage
|
jpayne@69
|
74 exit
|
jpayne@69
|
75 fi
|
jpayne@69
|
76
|
jpayne@69
|
77 calcXmx () {
|
jpayne@69
|
78 source "$DIR""/calcmem.sh"
|
jpayne@69
|
79 setEnvironment
|
jpayne@69
|
80 parseXmx "$@"
|
jpayne@69
|
81 }
|
jpayne@69
|
82 calcXmx "$@"
|
jpayne@69
|
83
|
jpayne@69
|
84 gff() {
|
jpayne@69
|
85 local CMD="java $EA $EOOM $z -cp $CP gff.CutGff $@"
|
jpayne@69
|
86 # echo $CMD >&2
|
jpayne@69
|
87 eval $CMD
|
jpayne@69
|
88 }
|
jpayne@69
|
89
|
jpayne@69
|
90 gff "$@"
|