jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell.
|
jpayne@69
|
6 Last modified December 19, 2019
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Renames sequences to indicate their NCBI taxIDs.
|
jpayne@69
|
9 The headers must be in NCBI or Silva format with gi numbers,
|
jpayne@69
|
10 accessions, or organism names. Only supports fasta and gff files.
|
jpayne@69
|
11
|
jpayne@69
|
12 Usage: gi2taxid.sh in=<file> out=<file> server
|
jpayne@69
|
13
|
jpayne@69
|
14 Parameters:
|
jpayne@69
|
15 in=<file> Input sequences; required parameter. Must be fasta.
|
jpayne@69
|
16 This can alternatively be a comma-delimited list,
|
jpayne@69
|
17 or just a bunch of space-delimited filenames, e.g.:
|
jpayne@69
|
18 gi2taxid.sh x.fa y.fa z.fa out=tid.fa tree=auto table=auto
|
jpayne@69
|
19 out=<file> Destination for renamed sequences.
|
jpayne@69
|
20 invalid=<file> Destination for headers with no taxid.
|
jpayne@69
|
21 keepall=t Keep sequences with no taxid in normal output.
|
jpayne@69
|
22 prefix=t Append the taxid as a prefix to the old header, but keep
|
jpayne@69
|
23 the old header.
|
jpayne@69
|
24 title=tid Set the title of the new number (e.g. ncbi, taxid, tid).
|
jpayne@69
|
25 ziplevel=2 (zl) Compression level for gzip output.
|
jpayne@69
|
26 pigz=t Spawn a pigz (parallel gzip) process for faster
|
jpayne@69
|
27 compression than Java. Requires pigz to be installed.
|
jpayne@69
|
28 silva=f Parse headers in Silva format.
|
jpayne@69
|
29 shrinknames=f Replace multiple concatenated headers with the first.
|
jpayne@69
|
30 deleteinvalid=f Delete the output file if there are any invalid headers.
|
jpayne@69
|
31
|
jpayne@69
|
32 Taxonomy file flags:
|
jpayne@69
|
33 server=f Use the taxonomy server instead of local files.
|
jpayne@69
|
34 Server mode only works for accessions (like RefSeq).
|
jpayne@69
|
35 tree= Specify a taxtree file. On Genepool, use 'auto'.
|
jpayne@69
|
36 gi= Specify a gitable file. On Genepool, use 'auto'.
|
jpayne@69
|
37 accession= Specify one or more comma-delimited NCBI accession to
|
jpayne@69
|
38 taxid files. On Genepool, use 'auto'.
|
jpayne@69
|
39
|
jpayne@69
|
40 Java Parameters:
|
jpayne@69
|
41 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
42 -Xmx20g will specify 20 gigs of RAM, and -Xmx800m will specify 800 megs.
|
jpayne@69
|
43 The max is typically 85% of physical memory.
|
jpayne@69
|
44 -eoom This flag will cause the process to exit if an out-of-memory
|
jpayne@69
|
45 exception occurs. Requires Java 8u92+.
|
jpayne@69
|
46 -da Disable assertions.
|
jpayne@69
|
47
|
jpayne@69
|
48 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
49 "
|
jpayne@69
|
50 }
|
jpayne@69
|
51
|
jpayne@69
|
52 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
53 pushd . > /dev/null
|
jpayne@69
|
54 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
55 while [ -h "$DIR" ]; do
|
jpayne@69
|
56 cd "$(dirname "$DIR")"
|
jpayne@69
|
57 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
58 done
|
jpayne@69
|
59 cd "$(dirname "$DIR")"
|
jpayne@69
|
60 DIR="$(pwd)/"
|
jpayne@69
|
61 popd > /dev/null
|
jpayne@69
|
62
|
jpayne@69
|
63 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
64 CP="$DIR""current/"
|
jpayne@69
|
65 JNI="-Djava.library.path=""$DIR""jni/"
|
jpayne@69
|
66 JNI=""
|
jpayne@69
|
67
|
jpayne@69
|
68 z="-Xmx7g"
|
jpayne@69
|
69 z2="-Xms7g"
|
jpayne@69
|
70 set=0
|
jpayne@69
|
71
|
jpayne@69
|
72 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
73 usage
|
jpayne@69
|
74 exit
|
jpayne@69
|
75 fi
|
jpayne@69
|
76
|
jpayne@69
|
77 calcXmx () {
|
jpayne@69
|
78 source "$DIR""/calcmem.sh"
|
jpayne@69
|
79 setEnvironment
|
jpayne@69
|
80 parseXmx "$@"
|
jpayne@69
|
81 if [[ $set == 1 ]]; then
|
jpayne@69
|
82 return
|
jpayne@69
|
83 fi
|
jpayne@69
|
84 freeRam 7000m 84
|
jpayne@69
|
85 z="-Xmx${RAM}m"
|
jpayne@69
|
86 z2="-Xms${RAM}m"
|
jpayne@69
|
87 }
|
jpayne@69
|
88 calcXmx "$@"
|
jpayne@69
|
89
|
jpayne@69
|
90
|
jpayne@69
|
91 gi2taxid() {
|
jpayne@69
|
92 local CMD="java $EA $EOOM $z $z2 -cp $CP tax.RenameGiToTaxid $@"
|
jpayne@69
|
93 echo $CMD >&2
|
jpayne@69
|
94 eval $CMD
|
jpayne@69
|
95 }
|
jpayne@69
|
96
|
jpayne@69
|
97 gi2taxid "$@"
|