comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/ResultLineParser.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5
6 import fileIO.ByteStreamWriter;
7 import fileIO.ReadWrite;
8 import shared.Parse;
9 import shared.Tools;
10 import structures.FloatList;
11 import tax.TaxNode;
12 import tax.TaxTree;
13
14 class ResultLineParser {
15
16 ResultLineParser(int mode_, TaxTree tree_, ByteStreamWriter bswBad_, ArrayList<RecordSet> recordSets_, boolean keepText_){
17 mode=mode_;
18 tree=tree_;
19 bswBad=bswBad_;
20 recordSets=recordSets_;
21 keepText=keepText_ || bswBad!=null;
22 for(int i=0; i<AnalyzeSketchResults.taxLevels; i++){
23 aniLists[i]=new FloatList();
24 ssuLists[i]=new FloatList();
25 }
26 }
27
28 void parse(byte[] line){
29 if(keepText){text=line;}
30 if(line[0]!='#'){
31 if(mode==AnalyzeSketchResults.BBSKETCH_MODE){
32 parseData(line);
33 }else if(mode==AnalyzeSketchResults.MASH_MODE){
34 parseDataMash(line);
35 }else{
36 assert(false) : "Bad mode: "+mode;
37 }
38 }else{
39 parseHeader(line);
40 if(bswBad!=null){bswBad.println(line);}
41 }
42 }
43
44 private synchronized void parseHeader(byte[] line){
45 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
46 for(int col=0; col<split.size(); col++){
47 byte[] array=split.get(col);
48 if(Tools.equals(array, "ANI") || Tools.equals(array, "AAI")){
49 aniColumn=col;
50 }else if(Tools.equals(array, "QTaxID")){
51 qTaxIDColumn=col;
52 }else if(Tools.equals(array, "RTaxID")){
53 rTaxIDColumn=col;
54 }else if(Tools.equals(array, "SSU")){
55 ssuColumn=col;
56 }else if(Tools.equals(array, "CALevel")){
57 caLevelColumn=col;
58 }
59
60 else if(Tools.equals(array, "QSize")){
61 qSizeColumn=col;
62 }else if(Tools.equals(array, "RefSize") || Tools.equals(array, "RSize")){
63 rSizeColumn=col;
64 }else if(Tools.equals(array, "QBases")){
65 qBasesColumn=col;
66 }else if(Tools.equals(array, "RBases")){
67 rBasesColumn=col;
68 }
69 }
70 }
71
72 private void parseData(byte[] line){
73 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
74 qTaxID=Parse.parseInt(split.get(qTaxIDColumn), 0);
75 rTaxID=Parse.parseInt(split.get(rTaxIDColumn), 0);
76 qBases=Parse.parseLong(split.get(qBasesColumn), 0);
77 rBases=Parse.parseLong(split.get(rBasesColumn), 0);
78 qSize=Parse.parseLong(split.get(qSizeColumn), 0);
79 rSize=Parse.parseLong(split.get(rSizeColumn), 0);
80 ani=Parse.parseDouble(split.get(aniColumn), 0);
81 byte[] ssuArray=split.get(ssuColumn);
82 ssu=ssuArray.length==1 && ssuArray[0]=='.' ? -1 : Parse.parseDouble(ssuArray, 0);
83 taxLevelExtended=TaxTree.stringToLevelExtended(new String(split.get(caLevelColumn)));
84 if(taxLevelExtended<0) {
85 System.err.println(new String(split.get(caLevelColumn)));
86 taxLevelExtended=0;
87 }
88 processed=false;
89 }
90
91 private TaxNode getTaxNode(String fname){
92 String name=ReadWrite.stripToCore(fname);
93 if(name.startsWith("tid_")){
94 int idx2=fname.indexOf('_', 4);
95 int x=Parse.parseInt(fname, 4, idx2);
96 return x>0 ? tree.getNode(x) : null;
97 //name=name.substring(idx2+1); //This would allow fall-through to name parsing
98 }
99 try {
100 return tree.getNodeByName(name);
101 } catch (Throwable e) {
102 return null;
103 }
104 }
105
106 private void parseDataMash(byte[] line){
107 ///dev/shm/tid_123_Zymomonas_mobilis.fna.gz /dev/shm/tid_456_bacterium_endosymbiont_of_Bathymodiolus_sp._5_South.fna.gz 0.43859 0.00515848 1/20000
108
109 String[] split=new String(line).split("\t");
110
111 String fraction=split[split.length-1];
112 int numerator=Integer.parseInt(fraction.split("/")[0]);
113 if(numerator<MIN_HITS){return;}
114 int denominator=Integer.parseInt(fraction.split("/")[1]);
115
116 //The default ordering is reversed since mash output is ordered first by ref, then query
117 //The normal ordering (as below) requires a linux sort
118 {
119 TaxNode qNode=getTaxNode(split[0]);
120 TaxNode rNode=getTaxNode(split[1]);
121
122 if(qNode==null || rNode==null){return;}
123 qTaxID=qNode.id;
124 rTaxID=rNode.id;
125 TaxNode ancestor=tree.commonAncestor(qNode, rNode);
126 taxLevelExtended=ancestor.levelExtended;
127 }
128
129 ani=numerator/(float)denominator;
130 ssu=-1;
131 if(taxLevelExtended<0){taxLevelExtended=0;}
132 processed=false;
133 }
134
135 //Returns a complete set when a new set is started
136 RecordSet processData(HashMap<Long, Float> map, boolean saveRecord){
137 RecordSet old=null;
138 if(processed){return null;}
139 levelAniSums[taxLevelExtended]+=ani;
140 levelCounts[taxLevelExtended]++;
141 aniLists[taxLevelExtended].add((float)ani);
142
143 if(ssu>0){
144 levelSSUSums[taxLevelExtended]+=ssu;
145 levelCountsSSU[taxLevelExtended]++;
146 ssuLists[taxLevelExtended].add((float)ssu);
147 }
148 if(map!=null){
149 long key=(((long)qTaxID)<<32)|rTaxID;
150 map.put(key, (float)ani);
151 }
152 if(saveRecord){
153 if(currentSet==null || currentSet.qID!=qTaxID){
154 old=currentSet;
155 currentSet=new RecordSet(qTaxID);
156 if(recordSets!=null){
157 recordSets.add(currentSet);
158 }
159 }
160 currentSet.records.add(new Record(this));
161 }
162 processed=true;
163 return old;
164 }
165
166 /*--------------------------------------------------------------*/
167
168 // final static int taxLevels=TaxTree.numTaxaNamesExtended;
169 final long[] levelCounts=new long[AnalyzeSketchResults.taxLevels];
170 final long[] levelCountsSSU=new long[AnalyzeSketchResults.taxLevels];
171
172 final double[] levelAniSums=new double[AnalyzeSketchResults.taxLevels];
173 final double[] levelSSUSums=new double[AnalyzeSketchResults.taxLevels];
174
175 final FloatList[] aniLists=new FloatList[AnalyzeSketchResults.taxLevels];
176 final FloatList[] ssuLists=new FloatList[AnalyzeSketchResults.taxLevels];
177
178 final ArrayList<RecordSet> recordSets;
179
180 final int mode;
181 final TaxTree tree;
182 final ByteStreamWriter bswBad;
183
184 int qTaxID=-1;
185 int rTaxID=-1;
186 long qBases;
187 long rBases;
188 long qSize;
189 long rSize;
190 double ani=-1;
191 double ssu=-1;
192 int taxLevelExtended=-1;
193 boolean processed=true;
194 RecordSet currentSet=null;
195 final boolean keepText;
196
197 byte[] text=null;
198
199 private static int qTaxIDColumn=7;
200 private static int rTaxIDColumn=8;
201 private static int qSizeColumn=3;
202 private static int rSizeColumn=4;
203 private static int qBasesColumn=5;
204 private static int rBasesColumn=6;
205 private static int aniColumn=2;
206 private static int ssuColumn=11;
207 private static int caLevelColumn=12;
208
209 static int MIN_HITS=3;
210
211 }