view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line source
// Copyright © 2015, Battelle National Biodefense Institute (BNBI);
// all rights reserved. Authored by: Brian Ondov, Todd Treangen,
// Sergey Koren, and Adam Phillippy
//
// See the LICENSE.txt file included with this software for license information.

#ifndef INCLUDED_CommandScreen
#define INCLUDED_CommandScreen

#include "Command.h"
#include "Sketch.h"
#include <list>
#include <string>
#include <vector>
#include <atomic>
#include "robin_hood.h"
#include "MinHashHeap.h"

namespace mash {

struct HashTableEntry
{
	HashTableEntry() : count(0) {}
	
	uint32_t count;
	robin_hood::unordered_set<uint64_t> indices;
};

//typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;

static const robin_hood::unordered_map< std::string, char > codons =
{
	{"AAA",	'K'},
	{"AAC",	'N'},
	{"AAG",	'K'},
	{"AAT",	'N'},
	{"ACA",	'T'},
	{"ACC",	'T'},
	{"ACG",	'T'},
	{"ACT",	'T'},
	{"AGA",	'R'},
	{"AGC",	'S'},
	{"AGG",	'R'},
	{"AGT",	'S'},
	{"ATA",	'I'},
	{"ATC",	'I'},
	{"ATG",	'M'},
	{"ATT",	'I'},
	{"CAA",	'Q'},
	{"CAC",	'H'},
	{"CAG",	'Q'},
	{"CAT",	'H'},
	{"CCA",	'P'},
	{"CCC",	'P'},
	{"CCG",	'P'},
	{"CCT",	'P'},
	{"CGA",	'R'},
	{"CGC",	'R'},
	{"CGG",	'R'},
	{"CGT",	'R'},
	{"CTA",	'L'},
	{"CTC",	'L'},
	{"CTG",	'L'},
	{"CTT",	'L'},
	{"GAA",	'E'},
	{"GAC",	'D'},
	{"GAG",	'E'},
	{"GAT",	'D'},
	{"GCA",	'A'},
	{"GCC",	'A'},
	{"GCG",	'A'},
	{"GCT",	'A'},
	{"GGA",	'G'},
	{"GGC",	'G'},
	{"GGG",	'G'},
	{"GGT",	'G'},
	{"GTA",	'V'},
	{"GTC",	'V'},
	{"GTG",	'V'},
	{"GTT",	'V'},
	{"TAA",	'*'},
	{"TAC",	'Y'},
	{"TAG",	'*'},
	{"TAT",	'Y'},
	{"TCA",	'S'},
	{"TCC",	'S'},
	{"TCG",	'S'},
	{"TCT",	'S'},
	{"TGA",	'*'},
	{"TGC",	'C'},
	{"TGG",	'W'},
	{"TGT",	'C'},
	{"TTA",	'L'},
	{"TTC",	'F'},
	{"TTG",	'L'},
	{"TTT",	'F'}
};

class CommandScreen : public Command
{
public:
    
    struct HashInput
    {
    	HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
    	:
    	hashCounts(hashCountsNew),
    	minHashHeap(minHashHeapNew),
    	seq(seqNew),
    	length(lengthNew),
    	parameters(parametersNew),
    	trans(transNew)
    	{}
    	
    	~HashInput()
    	{
    		if ( seq != 0 )
    		{
	    		delete [] seq;
	    	}
    	}
    	
    	std::string fileName;
    	
    	char * seq;
    	uint64_t length;
    	bool trans;
    	
    	Sketch::Parameters parameters;
		robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
		MinHashHeap * minHashHeap;
    };
    
    struct HashOutput
    {
    	HashOutput(MinHashHeap * minHashHeapNew)
    	:
    	minHashHeap(minHashHeapNew)
    	{}
    	
		MinHashHeap * minHashHeap;
    };
    
    CommandScreen();
    
    int run() const; // override

private:
	
	struct Reference
	{
		Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew)
		: amerCount(amerCountNew), name(nameNew), comment(commentNew) {}
		
		uint64_t amerCount;
		std::string name;
		std::string comment;
	};
};

char aaFromCodon(const char * codon);
double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace);
CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
void translate(const char * src, char * dst, uint64_t len);
void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);

} // namespace mash

#endif