annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/ResultLineParser.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.HashMap;
jpayne@68 5
jpayne@68 6 import fileIO.ByteStreamWriter;
jpayne@68 7 import fileIO.ReadWrite;
jpayne@68 8 import shared.Parse;
jpayne@68 9 import shared.Tools;
jpayne@68 10 import structures.FloatList;
jpayne@68 11 import tax.TaxNode;
jpayne@68 12 import tax.TaxTree;
jpayne@68 13
jpayne@68 14 class ResultLineParser {
jpayne@68 15
jpayne@68 16 ResultLineParser(int mode_, TaxTree tree_, ByteStreamWriter bswBad_, ArrayList<RecordSet> recordSets_, boolean keepText_){
jpayne@68 17 mode=mode_;
jpayne@68 18 tree=tree_;
jpayne@68 19 bswBad=bswBad_;
jpayne@68 20 recordSets=recordSets_;
jpayne@68 21 keepText=keepText_ || bswBad!=null;
jpayne@68 22 for(int i=0; i<AnalyzeSketchResults.taxLevels; i++){
jpayne@68 23 aniLists[i]=new FloatList();
jpayne@68 24 ssuLists[i]=new FloatList();
jpayne@68 25 }
jpayne@68 26 }
jpayne@68 27
jpayne@68 28 void parse(byte[] line){
jpayne@68 29 if(keepText){text=line;}
jpayne@68 30 if(line[0]!='#'){
jpayne@68 31 if(mode==AnalyzeSketchResults.BBSKETCH_MODE){
jpayne@68 32 parseData(line);
jpayne@68 33 }else if(mode==AnalyzeSketchResults.MASH_MODE){
jpayne@68 34 parseDataMash(line);
jpayne@68 35 }else{
jpayne@68 36 assert(false) : "Bad mode: "+mode;
jpayne@68 37 }
jpayne@68 38 }else{
jpayne@68 39 parseHeader(line);
jpayne@68 40 if(bswBad!=null){bswBad.println(line);}
jpayne@68 41 }
jpayne@68 42 }
jpayne@68 43
jpayne@68 44 private synchronized void parseHeader(byte[] line){
jpayne@68 45 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
jpayne@68 46 for(int col=0; col<split.size(); col++){
jpayne@68 47 byte[] array=split.get(col);
jpayne@68 48 if(Tools.equals(array, "ANI") || Tools.equals(array, "AAI")){
jpayne@68 49 aniColumn=col;
jpayne@68 50 }else if(Tools.equals(array, "QTaxID")){
jpayne@68 51 qTaxIDColumn=col;
jpayne@68 52 }else if(Tools.equals(array, "RTaxID")){
jpayne@68 53 rTaxIDColumn=col;
jpayne@68 54 }else if(Tools.equals(array, "SSU")){
jpayne@68 55 ssuColumn=col;
jpayne@68 56 }else if(Tools.equals(array, "CALevel")){
jpayne@68 57 caLevelColumn=col;
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 else if(Tools.equals(array, "QSize")){
jpayne@68 61 qSizeColumn=col;
jpayne@68 62 }else if(Tools.equals(array, "RefSize") || Tools.equals(array, "RSize")){
jpayne@68 63 rSizeColumn=col;
jpayne@68 64 }else if(Tools.equals(array, "QBases")){
jpayne@68 65 qBasesColumn=col;
jpayne@68 66 }else if(Tools.equals(array, "RBases")){
jpayne@68 67 rBasesColumn=col;
jpayne@68 68 }
jpayne@68 69 }
jpayne@68 70 }
jpayne@68 71
jpayne@68 72 private void parseData(byte[] line){
jpayne@68 73 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
jpayne@68 74 qTaxID=Parse.parseInt(split.get(qTaxIDColumn), 0);
jpayne@68 75 rTaxID=Parse.parseInt(split.get(rTaxIDColumn), 0);
jpayne@68 76 qBases=Parse.parseLong(split.get(qBasesColumn), 0);
jpayne@68 77 rBases=Parse.parseLong(split.get(rBasesColumn), 0);
jpayne@68 78 qSize=Parse.parseLong(split.get(qSizeColumn), 0);
jpayne@68 79 rSize=Parse.parseLong(split.get(rSizeColumn), 0);
jpayne@68 80 ani=Parse.parseDouble(split.get(aniColumn), 0);
jpayne@68 81 byte[] ssuArray=split.get(ssuColumn);
jpayne@68 82 ssu=ssuArray.length==1 && ssuArray[0]=='.' ? -1 : Parse.parseDouble(ssuArray, 0);
jpayne@68 83 taxLevelExtended=TaxTree.stringToLevelExtended(new String(split.get(caLevelColumn)));
jpayne@68 84 if(taxLevelExtended<0) {
jpayne@68 85 System.err.println(new String(split.get(caLevelColumn)));
jpayne@68 86 taxLevelExtended=0;
jpayne@68 87 }
jpayne@68 88 processed=false;
jpayne@68 89 }
jpayne@68 90
jpayne@68 91 private TaxNode getTaxNode(String fname){
jpayne@68 92 String name=ReadWrite.stripToCore(fname);
jpayne@68 93 if(name.startsWith("tid_")){
jpayne@68 94 int idx2=fname.indexOf('_', 4);
jpayne@68 95 int x=Parse.parseInt(fname, 4, idx2);
jpayne@68 96 return x>0 ? tree.getNode(x) : null;
jpayne@68 97 //name=name.substring(idx2+1); //This would allow fall-through to name parsing
jpayne@68 98 }
jpayne@68 99 try {
jpayne@68 100 return tree.getNodeByName(name);
jpayne@68 101 } catch (Throwable e) {
jpayne@68 102 return null;
jpayne@68 103 }
jpayne@68 104 }
jpayne@68 105
jpayne@68 106 private void parseDataMash(byte[] line){
jpayne@68 107 ///dev/shm/tid_123_Zymomonas_mobilis.fna.gz /dev/shm/tid_456_bacterium_endosymbiont_of_Bathymodiolus_sp._5_South.fna.gz 0.43859 0.00515848 1/20000
jpayne@68 108
jpayne@68 109 String[] split=new String(line).split("\t");
jpayne@68 110
jpayne@68 111 String fraction=split[split.length-1];
jpayne@68 112 int numerator=Integer.parseInt(fraction.split("/")[0]);
jpayne@68 113 if(numerator<MIN_HITS){return;}
jpayne@68 114 int denominator=Integer.parseInt(fraction.split("/")[1]);
jpayne@68 115
jpayne@68 116 //The default ordering is reversed since mash output is ordered first by ref, then query
jpayne@68 117 //The normal ordering (as below) requires a linux sort
jpayne@68 118 {
jpayne@68 119 TaxNode qNode=getTaxNode(split[0]);
jpayne@68 120 TaxNode rNode=getTaxNode(split[1]);
jpayne@68 121
jpayne@68 122 if(qNode==null || rNode==null){return;}
jpayne@68 123 qTaxID=qNode.id;
jpayne@68 124 rTaxID=rNode.id;
jpayne@68 125 TaxNode ancestor=tree.commonAncestor(qNode, rNode);
jpayne@68 126 taxLevelExtended=ancestor.levelExtended;
jpayne@68 127 }
jpayne@68 128
jpayne@68 129 ani=numerator/(float)denominator;
jpayne@68 130 ssu=-1;
jpayne@68 131 if(taxLevelExtended<0){taxLevelExtended=0;}
jpayne@68 132 processed=false;
jpayne@68 133 }
jpayne@68 134
jpayne@68 135 //Returns a complete set when a new set is started
jpayne@68 136 RecordSet processData(HashMap<Long, Float> map, boolean saveRecord){
jpayne@68 137 RecordSet old=null;
jpayne@68 138 if(processed){return null;}
jpayne@68 139 levelAniSums[taxLevelExtended]+=ani;
jpayne@68 140 levelCounts[taxLevelExtended]++;
jpayne@68 141 aniLists[taxLevelExtended].add((float)ani);
jpayne@68 142
jpayne@68 143 if(ssu>0){
jpayne@68 144 levelSSUSums[taxLevelExtended]+=ssu;
jpayne@68 145 levelCountsSSU[taxLevelExtended]++;
jpayne@68 146 ssuLists[taxLevelExtended].add((float)ssu);
jpayne@68 147 }
jpayne@68 148 if(map!=null){
jpayne@68 149 long key=(((long)qTaxID)<<32)|rTaxID;
jpayne@68 150 map.put(key, (float)ani);
jpayne@68 151 }
jpayne@68 152 if(saveRecord){
jpayne@68 153 if(currentSet==null || currentSet.qID!=qTaxID){
jpayne@68 154 old=currentSet;
jpayne@68 155 currentSet=new RecordSet(qTaxID);
jpayne@68 156 if(recordSets!=null){
jpayne@68 157 recordSets.add(currentSet);
jpayne@68 158 }
jpayne@68 159 }
jpayne@68 160 currentSet.records.add(new Record(this));
jpayne@68 161 }
jpayne@68 162 processed=true;
jpayne@68 163 return old;
jpayne@68 164 }
jpayne@68 165
jpayne@68 166 /*--------------------------------------------------------------*/
jpayne@68 167
jpayne@68 168 // final static int taxLevels=TaxTree.numTaxaNamesExtended;
jpayne@68 169 final long[] levelCounts=new long[AnalyzeSketchResults.taxLevels];
jpayne@68 170 final long[] levelCountsSSU=new long[AnalyzeSketchResults.taxLevels];
jpayne@68 171
jpayne@68 172 final double[] levelAniSums=new double[AnalyzeSketchResults.taxLevels];
jpayne@68 173 final double[] levelSSUSums=new double[AnalyzeSketchResults.taxLevels];
jpayne@68 174
jpayne@68 175 final FloatList[] aniLists=new FloatList[AnalyzeSketchResults.taxLevels];
jpayne@68 176 final FloatList[] ssuLists=new FloatList[AnalyzeSketchResults.taxLevels];
jpayne@68 177
jpayne@68 178 final ArrayList<RecordSet> recordSets;
jpayne@68 179
jpayne@68 180 final int mode;
jpayne@68 181 final TaxTree tree;
jpayne@68 182 final ByteStreamWriter bswBad;
jpayne@68 183
jpayne@68 184 int qTaxID=-1;
jpayne@68 185 int rTaxID=-1;
jpayne@68 186 long qBases;
jpayne@68 187 long rBases;
jpayne@68 188 long qSize;
jpayne@68 189 long rSize;
jpayne@68 190 double ani=-1;
jpayne@68 191 double ssu=-1;
jpayne@68 192 int taxLevelExtended=-1;
jpayne@68 193 boolean processed=true;
jpayne@68 194 RecordSet currentSet=null;
jpayne@68 195 final boolean keepText;
jpayne@68 196
jpayne@68 197 byte[] text=null;
jpayne@68 198
jpayne@68 199 private static int qTaxIDColumn=7;
jpayne@68 200 private static int rTaxIDColumn=8;
jpayne@68 201 private static int qSizeColumn=3;
jpayne@68 202 private static int rSizeColumn=4;
jpayne@68 203 private static int qBasesColumn=5;
jpayne@68 204 private static int rBasesColumn=6;
jpayne@68 205 private static int aniColumn=2;
jpayne@68 206 private static int ssuColumn=11;
jpayne@68 207 private static int caLevelColumn=12;
jpayne@68 208
jpayne@68 209 static int MIN_HITS=3;
jpayne@68 210
jpayne@68 211 }