kkonganti@0
|
1 #!/usr/bin/env bash
|
kkonganti@0
|
2
|
kkonganti@0
|
3 ##########################################################
|
kkonganti@0
|
4 # Constants
|
kkonganti@0
|
5 ##########################################################
|
kkonganti@0
|
6 GREEN=$(tput setaf 2)
|
kkonganti@0
|
7 RED=$(tput setaf 1)
|
kkonganti@0
|
8 CYAN=$(tput setaf 6)
|
kkonganti@0
|
9 CLRESET=$(tput sgr0)
|
kkonganti@0
|
10 prog_name="nowayout"
|
kkonganti@0
|
11 dbBuild="03182024"
|
kkonganti@0
|
12 dbPath="/hpc/db/${prog_name}/$dbBuild"
|
kkonganti@0
|
13 taxonomyPath="$dbPath/taxonomy"
|
kkonganti@0
|
14
|
kkonganti@0
|
15 usage()
|
kkonganti@0
|
16 {
|
kkonganti@0
|
17 echo
|
kkonganti@0
|
18 echo usage: "$0" [-h]
|
kkonganti@0
|
19 echo
|
kkonganti@0
|
20 echo "Check for species presence in ${prog_name} database(s)."
|
kkonganti@0
|
21 echo
|
kkonganti@0
|
22 echo 'Example usage:'
|
kkonganti@0
|
23 echo
|
kkonganti@0
|
24 echo 'dbcheck -l'
|
kkonganti@0
|
25 echo 'dbcheck -g Cathartus'
|
kkonganti@0
|
26 echo 'dbcheck -d mitomine -g Cathartus'
|
kkonganti@0
|
27 echo 'dbcheck -d mitomine -s "Cathartus quadriculus"'
|
kkonganti@0
|
28 echo
|
kkonganti@0
|
29 echo 'Options:'
|
kkonganti@0
|
30 echo " -l : List ${prog_name} databases"
|
kkonganti@0
|
31 echo ' -d : Search this database. Default: mitomine.'
|
kkonganti@0
|
32 echo ' -g : Genus to search for.'
|
kkonganti@0
|
33 echo ' -s : "Genus Species" to search for.'
|
kkonganti@0
|
34 echo ' -h : Show this help message and exit'
|
kkonganti@0
|
35 echo
|
kkonganti@0
|
36 echo "$1"
|
kkonganti@0
|
37 }
|
kkonganti@0
|
38
|
kkonganti@0
|
39 while getopts ":d:g:s:l" OPT; do
|
kkonganti@0
|
40 case "${OPT}" in
|
kkonganti@0
|
41 l)
|
kkonganti@0
|
42 listdb="list"
|
kkonganti@0
|
43 ;;
|
kkonganti@0
|
44 d)
|
kkonganti@0
|
45 dbname=${OPTARG}
|
kkonganti@0
|
46 ;;
|
kkonganti@0
|
47 g)
|
kkonganti@0
|
48 genus=${OPTARG}
|
kkonganti@0
|
49 ;;
|
kkonganti@0
|
50 s)
|
kkonganti@0
|
51 species=${OPTARG}
|
kkonganti@0
|
52 ;;
|
kkonganti@0
|
53 ?)
|
kkonganti@0
|
54 usage
|
kkonganti@0
|
55 exit 0
|
kkonganti@0
|
56 ;;
|
kkonganti@0
|
57 esac
|
kkonganti@0
|
58 done
|
kkonganti@0
|
59
|
kkonganti@0
|
60
|
kkonganti@0
|
61
|
kkonganti@0
|
62 if [ -n "$listdb" ]; then
|
kkonganti@0
|
63 num_dbs=$(find "$taxonomyPath" -type d | tail -n+2 | wc -l)
|
kkonganti@0
|
64 echo "=============================================="
|
kkonganti@0
|
65
|
kkonganti@0
|
66 db_num="1"
|
kkonganti@0
|
67 find $taxonomyPath -type d | tail -n+2 | while read -r db; do
|
kkonganti@0
|
68 dbName=$(basename "$db")
|
kkonganti@0
|
69 echo "${db_num}. $dbName"
|
kkonganti@0
|
70 db_num=$(( db_num + 1 ))
|
kkonganti@0
|
71 done
|
kkonganti@0
|
72 echo "=============================================="
|
kkonganti@0
|
73 echo "Number of ${prog_name} databases: $num_dbs"
|
kkonganti@0
|
74 echo "=============================================="
|
kkonganti@0
|
75
|
kkonganti@0
|
76 exit 0
|
kkonganti@0
|
77 fi
|
kkonganti@0
|
78
|
kkonganti@0
|
79
|
kkonganti@0
|
80
|
kkonganti@0
|
81 if [ -z "$dbname" ]; then
|
kkonganti@0
|
82 dbname="mitomine"
|
kkonganti@0
|
83 fi
|
kkonganti@0
|
84
|
kkonganti@0
|
85 if [[ -n "$genus" && -n "$species" ]]; then
|
kkonganti@0
|
86 usage "ERROR: Only one of -g or -s needs to be defined!"
|
kkonganti@0
|
87 exit 1
|
kkonganti@0
|
88 elif [ -n "$genus" ]; then
|
kkonganti@0
|
89 check="$genus"
|
kkonganti@0
|
90 elif [ -n "$species" ]; then
|
kkonganti@0
|
91 check="$species"
|
kkonganti@0
|
92 else
|
kkonganti@0
|
93 check=""
|
kkonganti@0
|
94 fi
|
kkonganti@0
|
95
|
kkonganti@0
|
96 if [ -z "$check" ]; then
|
kkonganti@0
|
97 usage "ERROR: -g or -s is required! check:$check"
|
kkonganti@0
|
98 exit 1
|
kkonganti@0
|
99 fi
|
kkonganti@0
|
100
|
kkonganti@0
|
101 lineages="$taxonomyPath/$dbname/lineages.csv"
|
kkonganti@0
|
102
|
kkonganti@0
|
103 echo
|
kkonganti@0
|
104 echo -e "Checking ${dbname} for ${CYAN}${check}${CLRESET}...\nPlease wait..."
|
kkonganti@0
|
105 echo
|
kkonganti@0
|
106
|
kkonganti@0
|
107 num=$(grep -F ",$check," "$lineages" | cut -f1 -d, | sort -u | wc -l)
|
kkonganti@0
|
108 num_species=$(tail -n+2 "$lineages" | cut -f8 -d, | sort -u | wc -l)
|
kkonganti@0
|
109 num_entries=$(tail -n+2 "$lineages" | wc -l)
|
kkonganti@0
|
110
|
kkonganti@0
|
111 echo "$dbname brief stats"
|
kkonganti@0
|
112 echo "=============================================="
|
kkonganti@0
|
113 echo "DB Build: $dbBuild"
|
kkonganti@0
|
114 echo "Number of unique species: $num_species"
|
kkonganti@0
|
115 echo "Number of accessions in database: $num_entries"
|
kkonganti@0
|
116 echo "=============================================="
|
kkonganti@0
|
117
|
kkonganti@0
|
118
|
kkonganti@0
|
119 if [ "$num" -gt 0 ]; then
|
kkonganti@0
|
120 echo
|
kkonganti@0
|
121 echo "${GREEN}$check is present in ${dbname}${CLRESET}."
|
kkonganti@0
|
122 echo "Number of accessions representing $check: $num"
|
kkonganti@0
|
123 echo "=============================================="
|
kkonganti@0
|
124 else
|
kkonganti@0
|
125 echo "${RED}$check is absent in ${dbname}${CLRESET}."
|
kkonganti@0
|
126 echo -e "No worries. Please request the developer of\n${prog_name} to augment the database!"
|
kkonganti@0
|
127 echo "=============================================="
|
kkonganti@0
|
128 fi |