diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/ResultLineParser.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/ResultLineParser.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,211 @@
+package sketch;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import fileIO.ByteStreamWriter;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Tools;
+import structures.FloatList;
+import tax.TaxNode;
+import tax.TaxTree;
+
+class ResultLineParser {
+
+	ResultLineParser(int mode_, TaxTree tree_, ByteStreamWriter bswBad_, ArrayList<RecordSet> recordSets_, boolean keepText_){
+		mode=mode_;
+		tree=tree_;
+		bswBad=bswBad_;
+		recordSets=recordSets_;
+		keepText=keepText_ || bswBad!=null;
+		for(int i=0; i<AnalyzeSketchResults.taxLevels; i++){
+			aniLists[i]=new FloatList();
+			ssuLists[i]=new FloatList();
+		}
+	}
+
+	void parse(byte[] line){
+		if(keepText){text=line;}
+		if(line[0]!='#'){
+			if(mode==AnalyzeSketchResults.BBSKETCH_MODE){
+				parseData(line);
+			}else if(mode==AnalyzeSketchResults.MASH_MODE){
+				parseDataMash(line);
+			}else{
+				assert(false) : "Bad mode: "+mode;
+			}
+		}else{
+			parseHeader(line);
+			if(bswBad!=null){bswBad.println(line);}
+		}
+	}
+
+	private synchronized void parseHeader(byte[] line){
+		ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
+		for(int col=0; col<split.size(); col++){
+			byte[] array=split.get(col);
+			if(Tools.equals(array, "ANI") || Tools.equals(array, "AAI")){
+				aniColumn=col;
+			}else if(Tools.equals(array, "QTaxID")){
+				qTaxIDColumn=col;
+			}else if(Tools.equals(array, "RTaxID")){
+				rTaxIDColumn=col;
+			}else if(Tools.equals(array, "SSU")){
+				ssuColumn=col;
+			}else if(Tools.equals(array, "CALevel")){
+				caLevelColumn=col;
+			}
+
+			else if(Tools.equals(array, "QSize")){
+				qSizeColumn=col;
+			}else if(Tools.equals(array, "RefSize") || Tools.equals(array, "RSize")){
+				rSizeColumn=col;
+			}else if(Tools.equals(array, "QBases")){
+				qBasesColumn=col;
+			}else if(Tools.equals(array, "RBases")){
+				rBasesColumn=col;
+			}
+		}
+	}
+
+	private void parseData(byte[] line){
+		ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
+		qTaxID=Parse.parseInt(split.get(qTaxIDColumn), 0);
+		rTaxID=Parse.parseInt(split.get(rTaxIDColumn), 0);
+		qBases=Parse.parseLong(split.get(qBasesColumn), 0);
+		rBases=Parse.parseLong(split.get(rBasesColumn), 0);
+		qSize=Parse.parseLong(split.get(qSizeColumn), 0);
+		rSize=Parse.parseLong(split.get(rSizeColumn), 0);
+		ani=Parse.parseDouble(split.get(aniColumn), 0);
+		byte[] ssuArray=split.get(ssuColumn);
+		ssu=ssuArray.length==1 && ssuArray[0]=='.' ? -1 : Parse.parseDouble(ssuArray, 0);
+		taxLevelExtended=TaxTree.stringToLevelExtended(new String(split.get(caLevelColumn)));
+		if(taxLevelExtended<0) {
+			System.err.println(new String(split.get(caLevelColumn)));
+			taxLevelExtended=0;
+		}
+		processed=false;
+	}
+	
+	private TaxNode getTaxNode(String fname){
+		String name=ReadWrite.stripToCore(fname);
+		if(name.startsWith("tid_")){
+			int idx2=fname.indexOf('_', 4);
+			int x=Parse.parseInt(fname, 4, idx2);
+			return x>0 ? tree.getNode(x) : null;
+			//name=name.substring(idx2+1); //This would allow fall-through to name parsing
+		}
+		try {
+			return tree.getNodeByName(name);
+		} catch (Throwable e) {
+			return null;
+		}
+	}
+
+	private void parseDataMash(byte[] line){
+		///dev/shm/tid_123_Zymomonas_mobilis.fna.gz	/dev/shm/tid_456_bacterium_endosymbiont_of_Bathymodiolus_sp._5_South.fna.gz	0.43859	0.00515848	1/20000
+
+		String[] split=new String(line).split("\t");
+
+		String fraction=split[split.length-1];
+		int numerator=Integer.parseInt(fraction.split("/")[0]);
+		if(numerator<MIN_HITS){return;}
+		int denominator=Integer.parseInt(fraction.split("/")[1]);
+
+		//The default ordering is reversed since mash output is ordered first by ref, then query
+		//The normal ordering (as below) requires a linux sort
+		{
+			TaxNode qNode=getTaxNode(split[0]);
+			TaxNode rNode=getTaxNode(split[1]);
+
+			if(qNode==null || rNode==null){return;}
+			qTaxID=qNode.id;
+			rTaxID=rNode.id;
+			TaxNode ancestor=tree.commonAncestor(qNode, rNode);
+			taxLevelExtended=ancestor.levelExtended;
+		}
+		
+		ani=numerator/(float)denominator;
+		ssu=-1;
+		if(taxLevelExtended<0){taxLevelExtended=0;}
+		processed=false;
+	}
+
+	//Returns a complete set when a new set is started
+	RecordSet processData(HashMap<Long, Float> map, boolean saveRecord){
+		RecordSet old=null;
+		if(processed){return null;}
+		levelAniSums[taxLevelExtended]+=ani;
+		levelCounts[taxLevelExtended]++;
+		aniLists[taxLevelExtended].add((float)ani);
+
+		if(ssu>0){
+			levelSSUSums[taxLevelExtended]+=ssu;
+			levelCountsSSU[taxLevelExtended]++;
+			ssuLists[taxLevelExtended].add((float)ssu);
+		}
+		if(map!=null){
+			long key=(((long)qTaxID)<<32)|rTaxID;
+			map.put(key, (float)ani);
+		}
+		if(saveRecord){
+			if(currentSet==null || currentSet.qID!=qTaxID){
+				old=currentSet;
+				currentSet=new RecordSet(qTaxID);
+				if(recordSets!=null){
+					recordSets.add(currentSet);
+				}
+			}
+			currentSet.records.add(new Record(this));
+		}
+		processed=true;
+		return old;
+	}
+
+	/*--------------------------------------------------------------*/
+
+	//		final static int taxLevels=TaxTree.numTaxaNamesExtended;
+	final long[] levelCounts=new long[AnalyzeSketchResults.taxLevels];
+	final long[] levelCountsSSU=new long[AnalyzeSketchResults.taxLevels];
+
+	final double[] levelAniSums=new double[AnalyzeSketchResults.taxLevels];
+	final double[] levelSSUSums=new double[AnalyzeSketchResults.taxLevels];
+
+	final FloatList[] aniLists=new FloatList[AnalyzeSketchResults.taxLevels];
+	final FloatList[] ssuLists=new FloatList[AnalyzeSketchResults.taxLevels];
+
+	final ArrayList<RecordSet> recordSets;
+
+	final int mode;
+	final TaxTree tree;
+	final ByteStreamWriter bswBad;
+
+	int qTaxID=-1;
+	int rTaxID=-1;
+	long qBases;
+	long rBases;
+	long qSize;
+	long rSize;
+	double ani=-1;
+	double ssu=-1;
+	int taxLevelExtended=-1;
+	boolean processed=true;
+	RecordSet currentSet=null;
+	final boolean keepText;
+
+	byte[] text=null;
+
+	private static int qTaxIDColumn=7;
+	private static int rTaxIDColumn=8;
+	private static int qSizeColumn=3;
+	private static int rSizeColumn=4;
+	private static int qBasesColumn=5;
+	private static int rBasesColumn=6;
+	private static int aniColumn=2;
+	private static int ssuColumn=11;
+	private static int caLevelColumn=12;
+
+	static int MIN_HITS=3;
+	
+}
\ No newline at end of file