Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/ResultLineParser.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.HashMap; | |
5 | |
6 import fileIO.ByteStreamWriter; | |
7 import fileIO.ReadWrite; | |
8 import shared.Parse; | |
9 import shared.Tools; | |
10 import structures.FloatList; | |
11 import tax.TaxNode; | |
12 import tax.TaxTree; | |
13 | |
14 class ResultLineParser { | |
15 | |
16 ResultLineParser(int mode_, TaxTree tree_, ByteStreamWriter bswBad_, ArrayList<RecordSet> recordSets_, boolean keepText_){ | |
17 mode=mode_; | |
18 tree=tree_; | |
19 bswBad=bswBad_; | |
20 recordSets=recordSets_; | |
21 keepText=keepText_ || bswBad!=null; | |
22 for(int i=0; i<AnalyzeSketchResults.taxLevels; i++){ | |
23 aniLists[i]=new FloatList(); | |
24 ssuLists[i]=new FloatList(); | |
25 } | |
26 } | |
27 | |
28 void parse(byte[] line){ | |
29 if(keepText){text=line;} | |
30 if(line[0]!='#'){ | |
31 if(mode==AnalyzeSketchResults.BBSKETCH_MODE){ | |
32 parseData(line); | |
33 }else if(mode==AnalyzeSketchResults.MASH_MODE){ | |
34 parseDataMash(line); | |
35 }else{ | |
36 assert(false) : "Bad mode: "+mode; | |
37 } | |
38 }else{ | |
39 parseHeader(line); | |
40 if(bswBad!=null){bswBad.println(line);} | |
41 } | |
42 } | |
43 | |
44 private synchronized void parseHeader(byte[] line){ | |
45 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t'); | |
46 for(int col=0; col<split.size(); col++){ | |
47 byte[] array=split.get(col); | |
48 if(Tools.equals(array, "ANI") || Tools.equals(array, "AAI")){ | |
49 aniColumn=col; | |
50 }else if(Tools.equals(array, "QTaxID")){ | |
51 qTaxIDColumn=col; | |
52 }else if(Tools.equals(array, "RTaxID")){ | |
53 rTaxIDColumn=col; | |
54 }else if(Tools.equals(array, "SSU")){ | |
55 ssuColumn=col; | |
56 }else if(Tools.equals(array, "CALevel")){ | |
57 caLevelColumn=col; | |
58 } | |
59 | |
60 else if(Tools.equals(array, "QSize")){ | |
61 qSizeColumn=col; | |
62 }else if(Tools.equals(array, "RefSize") || Tools.equals(array, "RSize")){ | |
63 rSizeColumn=col; | |
64 }else if(Tools.equals(array, "QBases")){ | |
65 qBasesColumn=col; | |
66 }else if(Tools.equals(array, "RBases")){ | |
67 rBasesColumn=col; | |
68 } | |
69 } | |
70 } | |
71 | |
72 private void parseData(byte[] line){ | |
73 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t'); | |
74 qTaxID=Parse.parseInt(split.get(qTaxIDColumn), 0); | |
75 rTaxID=Parse.parseInt(split.get(rTaxIDColumn), 0); | |
76 qBases=Parse.parseLong(split.get(qBasesColumn), 0); | |
77 rBases=Parse.parseLong(split.get(rBasesColumn), 0); | |
78 qSize=Parse.parseLong(split.get(qSizeColumn), 0); | |
79 rSize=Parse.parseLong(split.get(rSizeColumn), 0); | |
80 ani=Parse.parseDouble(split.get(aniColumn), 0); | |
81 byte[] ssuArray=split.get(ssuColumn); | |
82 ssu=ssuArray.length==1 && ssuArray[0]=='.' ? -1 : Parse.parseDouble(ssuArray, 0); | |
83 taxLevelExtended=TaxTree.stringToLevelExtended(new String(split.get(caLevelColumn))); | |
84 if(taxLevelExtended<0) { | |
85 System.err.println(new String(split.get(caLevelColumn))); | |
86 taxLevelExtended=0; | |
87 } | |
88 processed=false; | |
89 } | |
90 | |
91 private TaxNode getTaxNode(String fname){ | |
92 String name=ReadWrite.stripToCore(fname); | |
93 if(name.startsWith("tid_")){ | |
94 int idx2=fname.indexOf('_', 4); | |
95 int x=Parse.parseInt(fname, 4, idx2); | |
96 return x>0 ? tree.getNode(x) : null; | |
97 //name=name.substring(idx2+1); //This would allow fall-through to name parsing | |
98 } | |
99 try { | |
100 return tree.getNodeByName(name); | |
101 } catch (Throwable e) { | |
102 return null; | |
103 } | |
104 } | |
105 | |
106 private void parseDataMash(byte[] line){ | |
107 ///dev/shm/tid_123_Zymomonas_mobilis.fna.gz /dev/shm/tid_456_bacterium_endosymbiont_of_Bathymodiolus_sp._5_South.fna.gz 0.43859 0.00515848 1/20000 | |
108 | |
109 String[] split=new String(line).split("\t"); | |
110 | |
111 String fraction=split[split.length-1]; | |
112 int numerator=Integer.parseInt(fraction.split("/")[0]); | |
113 if(numerator<MIN_HITS){return;} | |
114 int denominator=Integer.parseInt(fraction.split("/")[1]); | |
115 | |
116 //The default ordering is reversed since mash output is ordered first by ref, then query | |
117 //The normal ordering (as below) requires a linux sort | |
118 { | |
119 TaxNode qNode=getTaxNode(split[0]); | |
120 TaxNode rNode=getTaxNode(split[1]); | |
121 | |
122 if(qNode==null || rNode==null){return;} | |
123 qTaxID=qNode.id; | |
124 rTaxID=rNode.id; | |
125 TaxNode ancestor=tree.commonAncestor(qNode, rNode); | |
126 taxLevelExtended=ancestor.levelExtended; | |
127 } | |
128 | |
129 ani=numerator/(float)denominator; | |
130 ssu=-1; | |
131 if(taxLevelExtended<0){taxLevelExtended=0;} | |
132 processed=false; | |
133 } | |
134 | |
135 //Returns a complete set when a new set is started | |
136 RecordSet processData(HashMap<Long, Float> map, boolean saveRecord){ | |
137 RecordSet old=null; | |
138 if(processed){return null;} | |
139 levelAniSums[taxLevelExtended]+=ani; | |
140 levelCounts[taxLevelExtended]++; | |
141 aniLists[taxLevelExtended].add((float)ani); | |
142 | |
143 if(ssu>0){ | |
144 levelSSUSums[taxLevelExtended]+=ssu; | |
145 levelCountsSSU[taxLevelExtended]++; | |
146 ssuLists[taxLevelExtended].add((float)ssu); | |
147 } | |
148 if(map!=null){ | |
149 long key=(((long)qTaxID)<<32)|rTaxID; | |
150 map.put(key, (float)ani); | |
151 } | |
152 if(saveRecord){ | |
153 if(currentSet==null || currentSet.qID!=qTaxID){ | |
154 old=currentSet; | |
155 currentSet=new RecordSet(qTaxID); | |
156 if(recordSets!=null){ | |
157 recordSets.add(currentSet); | |
158 } | |
159 } | |
160 currentSet.records.add(new Record(this)); | |
161 } | |
162 processed=true; | |
163 return old; | |
164 } | |
165 | |
166 /*--------------------------------------------------------------*/ | |
167 | |
168 // final static int taxLevels=TaxTree.numTaxaNamesExtended; | |
169 final long[] levelCounts=new long[AnalyzeSketchResults.taxLevels]; | |
170 final long[] levelCountsSSU=new long[AnalyzeSketchResults.taxLevels]; | |
171 | |
172 final double[] levelAniSums=new double[AnalyzeSketchResults.taxLevels]; | |
173 final double[] levelSSUSums=new double[AnalyzeSketchResults.taxLevels]; | |
174 | |
175 final FloatList[] aniLists=new FloatList[AnalyzeSketchResults.taxLevels]; | |
176 final FloatList[] ssuLists=new FloatList[AnalyzeSketchResults.taxLevels]; | |
177 | |
178 final ArrayList<RecordSet> recordSets; | |
179 | |
180 final int mode; | |
181 final TaxTree tree; | |
182 final ByteStreamWriter bswBad; | |
183 | |
184 int qTaxID=-1; | |
185 int rTaxID=-1; | |
186 long qBases; | |
187 long rBases; | |
188 long qSize; | |
189 long rSize; | |
190 double ani=-1; | |
191 double ssu=-1; | |
192 int taxLevelExtended=-1; | |
193 boolean processed=true; | |
194 RecordSet currentSet=null; | |
195 final boolean keepText; | |
196 | |
197 byte[] text=null; | |
198 | |
199 private static int qTaxIDColumn=7; | |
200 private static int rTaxIDColumn=8; | |
201 private static int qSizeColumn=3; | |
202 private static int rSizeColumn=4; | |
203 private static int qBasesColumn=5; | |
204 private static int rBasesColumn=6; | |
205 private static int aniColumn=2; | |
206 private static int ssuColumn=11; | |
207 private static int caLevelColumn=12; | |
208 | |
209 static int MIN_HITS=3; | |
210 | |
211 } |