comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/cutgff.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/bin/bash
2
3 usage(){
4 echo "
5 Written by Brian Bushnell
6 Last modified October 15, 2019
7
8 Description: Cuts out features defined by a gff file, and writes them
9 to a new fasta. Features are output in their sense strand.
10
11 Usage: cutgff.sh in=<fna file> gff=<gff file> out=<fna file>
12
13 in= is optional, and gff filenames will be automaitically assumed based on
14 the fasta name if not specified. This allows running on multiple files
15 like this:
16
17 cutgff.sh types=rRNA out=16S.fa minlen=1440 maxlen=1620 attributes=16S bacteria/*.fna.gz
18
19
20 File Parameters:
21 in=<file> Input FNA (fasta) file.
22 gff=<file> Input GFF file (optional).
23 out=<file> Output FNA file.
24
25 Other Parameters:
26 types=CDS Types of features to cut.
27 invert=false Invert selection: rather outputting the features,
28 mask them with Ns in the original sequences.
29 attributes= A comma-delimited list of strings. If present, one of
30 these strings must be in the gff line attributes.
31 bannedattributes= A comma-delimited list of banned strings.
32 banpartial=t Ignore lines with 'partial=true' in attributes.
33 minlen=1 Ignore lines shorter than this.
34 maxlen=2147483647 Ignore lines longer than this.
35 renamebytaxid=f Rename sequences with their taxID. Input sequences
36 must be named appropriately, e.g. in NCBI format.
37 taxmode=accession Valid modes are:
38 accession: Sequence names must start with an accession.
39 gi: Seqence names must start with gi|number
40 taxid: Sequence names must start with tid|number
41 header: Best effort for various header formats.
42 requirepresent=t Crash if a taxID cannot be found for a sequence.
43 oneperfile=f Only output one sequence per file.
44 align=f Align ribosomal sequences to consensus (if available);
45 discard those with low identity, and flip those
46 annotated on the wrong strand.
47 maxns=-1 If non-negative, ignore features with more than this many
48 undefined bases (Ns or IUPAC symbols).
49 maxnfraction=-1.0 If non-negative, ignore features with more than this
50 fraction of undefined bases (Ns or IUPAC symbols).
51 Should be 0.0 to 1.0.
52 "
53 }
54
55 #This block allows symlinked shellscripts to correctly set classpath.
56 pushd . > /dev/null
57 DIR="${BASH_SOURCE[0]}"
58 while [ -h "$DIR" ]; do
59 cd "$(dirname "$DIR")"
60 DIR="$(readlink "$(basename "$DIR")")"
61 done
62 cd "$(dirname "$DIR")"
63 DIR="$(pwd)/"
64 popd > /dev/null
65
66 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
67 CP="$DIR""current/"
68
69 z="-Xmx200m"
70 set=0
71
72 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
73 usage
74 exit
75 fi
76
77 calcXmx () {
78 source "$DIR""/calcmem.sh"
79 setEnvironment
80 parseXmx "$@"
81 }
82 calcXmx "$@"
83
84 gff() {
85 local CMD="java $EA $EOOM $z -cp $CP gff.CutGff $@"
86 # echo $CMD >&2
87 eval $CMD
88 }
89
90 gff "$@"