diff 0.5.0/dbcheck @ 0:97cd2f532efe

planemo upload
author kkonganti
date Mon, 31 Mar 2025 14:50:40 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0.5.0/dbcheck	Mon Mar 31 14:50:40 2025 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env bash
+
+##########################################################
+# Constants
+##########################################################
+GREEN=$(tput setaf 2)
+RED=$(tput setaf 1)
+CYAN=$(tput setaf 6)
+CLRESET=$(tput sgr0)
+prog_name="nowayout"
+dbBuild="03182024"
+dbPath="/hpc/db/${prog_name}/$dbBuild"
+taxonomyPath="$dbPath/taxonomy"
+
+usage()
+{
+    echo
+    echo usage: "$0" [-h] 
+    echo
+    echo "Check for species presence in ${prog_name} database(s)."
+    echo
+    echo 'Example usage:'
+    echo
+    echo 'dbcheck -l'
+    echo 'dbcheck -g Cathartus'
+    echo 'dbcheck -d mitomine -g Cathartus'
+    echo 'dbcheck -d mitomine -s "Cathartus quadriculus"'
+    echo
+    echo 'Options:'
+    echo " -l        : List ${prog_name} databases"
+    echo ' -d        : Search this database. Default: mitomine.'
+    echo ' -g        : Genus to search for.'
+    echo ' -s        : "Genus Species" to search for.'
+    echo ' -h        : Show this help message and exit'
+    echo
+    echo "$1"
+}
+
+while getopts ":d:g:s:l" OPT; do
+    case "${OPT}" in
+        l)
+            listdb="list"
+            ;;
+        d)
+            dbname=${OPTARG}
+            ;;
+        g)
+            genus=${OPTARG}
+            ;;
+        s)
+            species=${OPTARG}
+            ;;
+        ?)
+            usage
+            exit 0
+            ;;
+    esac
+done
+
+
+
+if [ -n "$listdb" ]; then
+    num_dbs=$(find "$taxonomyPath" -type d | tail -n+2 | wc -l)
+    echo "=============================================="
+    
+    db_num="1"
+    find $taxonomyPath -type d | tail -n+2 | while read -r db; do
+        dbName=$(basename "$db")
+        echo "${db_num}. $dbName"
+        db_num=$(( db_num + 1 ))
+    done
+    echo "=============================================="
+    echo "Number of ${prog_name} databases: $num_dbs"
+    echo "=============================================="
+
+    exit 0
+fi
+
+
+
+if [ -z "$dbname" ]; then
+    dbname="mitomine"
+fi
+
+if [[ -n "$genus" && -n "$species" ]]; then
+    usage "ERROR: Only one of -g or -s needs to be defined!"
+    exit 1
+elif [ -n "$genus" ]; then
+    check="$genus"
+elif [ -n "$species" ]; then
+    check="$species"
+else 
+    check=""
+fi
+
+if [ -z "$check" ]; then
+    usage "ERROR: -g or -s is required! check:$check"
+    exit 1
+fi
+
+lineages="$taxonomyPath/$dbname/lineages.csv"
+
+echo
+echo -e "Checking ${dbname} for ${CYAN}${check}${CLRESET}...\nPlease wait..."
+echo
+
+num=$(grep -F ",$check," "$lineages" | cut -f1 -d, | sort -u | wc -l)
+num_species=$(tail -n+2 "$lineages" | cut -f8 -d, | sort -u | wc -l)
+num_entries=$(tail -n+2 "$lineages" | wc -l)
+
+echo "$dbname brief stats"
+echo "=============================================="
+echo "DB Build: $dbBuild"
+echo "Number of unique species: $num_species"
+echo "Number of accessions in database: $num_entries"
+echo "=============================================="
+
+
+if [ "$num" -gt 0 ]; then
+    echo
+    echo "${GREEN}$check is present in ${dbname}${CLRESET}."
+    echo "Number of accessions representing $check: $num"
+    echo "=============================================="
+else
+    echo "${RED}$check is absent in ${dbname}${CLRESET}."
+    echo -e "No worries. Please request the developer of\n${prog_name} to augment the database!"
+    echo "=============================================="
+fi 
\ No newline at end of file