jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.HashMap;
|
jpayne@68
|
5
|
jpayne@68
|
6 import fileIO.ByteStreamWriter;
|
jpayne@68
|
7 import fileIO.ReadWrite;
|
jpayne@68
|
8 import shared.Parse;
|
jpayne@68
|
9 import shared.Tools;
|
jpayne@68
|
10 import structures.FloatList;
|
jpayne@68
|
11 import tax.TaxNode;
|
jpayne@68
|
12 import tax.TaxTree;
|
jpayne@68
|
13
|
jpayne@68
|
14 class ResultLineParser {
|
jpayne@68
|
15
|
jpayne@68
|
16 ResultLineParser(int mode_, TaxTree tree_, ByteStreamWriter bswBad_, ArrayList<RecordSet> recordSets_, boolean keepText_){
|
jpayne@68
|
17 mode=mode_;
|
jpayne@68
|
18 tree=tree_;
|
jpayne@68
|
19 bswBad=bswBad_;
|
jpayne@68
|
20 recordSets=recordSets_;
|
jpayne@68
|
21 keepText=keepText_ || bswBad!=null;
|
jpayne@68
|
22 for(int i=0; i<AnalyzeSketchResults.taxLevels; i++){
|
jpayne@68
|
23 aniLists[i]=new FloatList();
|
jpayne@68
|
24 ssuLists[i]=new FloatList();
|
jpayne@68
|
25 }
|
jpayne@68
|
26 }
|
jpayne@68
|
27
|
jpayne@68
|
28 void parse(byte[] line){
|
jpayne@68
|
29 if(keepText){text=line;}
|
jpayne@68
|
30 if(line[0]!='#'){
|
jpayne@68
|
31 if(mode==AnalyzeSketchResults.BBSKETCH_MODE){
|
jpayne@68
|
32 parseData(line);
|
jpayne@68
|
33 }else if(mode==AnalyzeSketchResults.MASH_MODE){
|
jpayne@68
|
34 parseDataMash(line);
|
jpayne@68
|
35 }else{
|
jpayne@68
|
36 assert(false) : "Bad mode: "+mode;
|
jpayne@68
|
37 }
|
jpayne@68
|
38 }else{
|
jpayne@68
|
39 parseHeader(line);
|
jpayne@68
|
40 if(bswBad!=null){bswBad.println(line);}
|
jpayne@68
|
41 }
|
jpayne@68
|
42 }
|
jpayne@68
|
43
|
jpayne@68
|
44 private synchronized void parseHeader(byte[] line){
|
jpayne@68
|
45 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
|
jpayne@68
|
46 for(int col=0; col<split.size(); col++){
|
jpayne@68
|
47 byte[] array=split.get(col);
|
jpayne@68
|
48 if(Tools.equals(array, "ANI") || Tools.equals(array, "AAI")){
|
jpayne@68
|
49 aniColumn=col;
|
jpayne@68
|
50 }else if(Tools.equals(array, "QTaxID")){
|
jpayne@68
|
51 qTaxIDColumn=col;
|
jpayne@68
|
52 }else if(Tools.equals(array, "RTaxID")){
|
jpayne@68
|
53 rTaxIDColumn=col;
|
jpayne@68
|
54 }else if(Tools.equals(array, "SSU")){
|
jpayne@68
|
55 ssuColumn=col;
|
jpayne@68
|
56 }else if(Tools.equals(array, "CALevel")){
|
jpayne@68
|
57 caLevelColumn=col;
|
jpayne@68
|
58 }
|
jpayne@68
|
59
|
jpayne@68
|
60 else if(Tools.equals(array, "QSize")){
|
jpayne@68
|
61 qSizeColumn=col;
|
jpayne@68
|
62 }else if(Tools.equals(array, "RefSize") || Tools.equals(array, "RSize")){
|
jpayne@68
|
63 rSizeColumn=col;
|
jpayne@68
|
64 }else if(Tools.equals(array, "QBases")){
|
jpayne@68
|
65 qBasesColumn=col;
|
jpayne@68
|
66 }else if(Tools.equals(array, "RBases")){
|
jpayne@68
|
67 rBasesColumn=col;
|
jpayne@68
|
68 }
|
jpayne@68
|
69 }
|
jpayne@68
|
70 }
|
jpayne@68
|
71
|
jpayne@68
|
72 private void parseData(byte[] line){
|
jpayne@68
|
73 ArrayList<byte[]> split=Tools.split(line, 0, (byte)'\t');
|
jpayne@68
|
74 qTaxID=Parse.parseInt(split.get(qTaxIDColumn), 0);
|
jpayne@68
|
75 rTaxID=Parse.parseInt(split.get(rTaxIDColumn), 0);
|
jpayne@68
|
76 qBases=Parse.parseLong(split.get(qBasesColumn), 0);
|
jpayne@68
|
77 rBases=Parse.parseLong(split.get(rBasesColumn), 0);
|
jpayne@68
|
78 qSize=Parse.parseLong(split.get(qSizeColumn), 0);
|
jpayne@68
|
79 rSize=Parse.parseLong(split.get(rSizeColumn), 0);
|
jpayne@68
|
80 ani=Parse.parseDouble(split.get(aniColumn), 0);
|
jpayne@68
|
81 byte[] ssuArray=split.get(ssuColumn);
|
jpayne@68
|
82 ssu=ssuArray.length==1 && ssuArray[0]=='.' ? -1 : Parse.parseDouble(ssuArray, 0);
|
jpayne@68
|
83 taxLevelExtended=TaxTree.stringToLevelExtended(new String(split.get(caLevelColumn)));
|
jpayne@68
|
84 if(taxLevelExtended<0) {
|
jpayne@68
|
85 System.err.println(new String(split.get(caLevelColumn)));
|
jpayne@68
|
86 taxLevelExtended=0;
|
jpayne@68
|
87 }
|
jpayne@68
|
88 processed=false;
|
jpayne@68
|
89 }
|
jpayne@68
|
90
|
jpayne@68
|
91 private TaxNode getTaxNode(String fname){
|
jpayne@68
|
92 String name=ReadWrite.stripToCore(fname);
|
jpayne@68
|
93 if(name.startsWith("tid_")){
|
jpayne@68
|
94 int idx2=fname.indexOf('_', 4);
|
jpayne@68
|
95 int x=Parse.parseInt(fname, 4, idx2);
|
jpayne@68
|
96 return x>0 ? tree.getNode(x) : null;
|
jpayne@68
|
97 //name=name.substring(idx2+1); //This would allow fall-through to name parsing
|
jpayne@68
|
98 }
|
jpayne@68
|
99 try {
|
jpayne@68
|
100 return tree.getNodeByName(name);
|
jpayne@68
|
101 } catch (Throwable e) {
|
jpayne@68
|
102 return null;
|
jpayne@68
|
103 }
|
jpayne@68
|
104 }
|
jpayne@68
|
105
|
jpayne@68
|
106 private void parseDataMash(byte[] line){
|
jpayne@68
|
107 ///dev/shm/tid_123_Zymomonas_mobilis.fna.gz /dev/shm/tid_456_bacterium_endosymbiont_of_Bathymodiolus_sp._5_South.fna.gz 0.43859 0.00515848 1/20000
|
jpayne@68
|
108
|
jpayne@68
|
109 String[] split=new String(line).split("\t");
|
jpayne@68
|
110
|
jpayne@68
|
111 String fraction=split[split.length-1];
|
jpayne@68
|
112 int numerator=Integer.parseInt(fraction.split("/")[0]);
|
jpayne@68
|
113 if(numerator<MIN_HITS){return;}
|
jpayne@68
|
114 int denominator=Integer.parseInt(fraction.split("/")[1]);
|
jpayne@68
|
115
|
jpayne@68
|
116 //The default ordering is reversed since mash output is ordered first by ref, then query
|
jpayne@68
|
117 //The normal ordering (as below) requires a linux sort
|
jpayne@68
|
118 {
|
jpayne@68
|
119 TaxNode qNode=getTaxNode(split[0]);
|
jpayne@68
|
120 TaxNode rNode=getTaxNode(split[1]);
|
jpayne@68
|
121
|
jpayne@68
|
122 if(qNode==null || rNode==null){return;}
|
jpayne@68
|
123 qTaxID=qNode.id;
|
jpayne@68
|
124 rTaxID=rNode.id;
|
jpayne@68
|
125 TaxNode ancestor=tree.commonAncestor(qNode, rNode);
|
jpayne@68
|
126 taxLevelExtended=ancestor.levelExtended;
|
jpayne@68
|
127 }
|
jpayne@68
|
128
|
jpayne@68
|
129 ani=numerator/(float)denominator;
|
jpayne@68
|
130 ssu=-1;
|
jpayne@68
|
131 if(taxLevelExtended<0){taxLevelExtended=0;}
|
jpayne@68
|
132 processed=false;
|
jpayne@68
|
133 }
|
jpayne@68
|
134
|
jpayne@68
|
135 //Returns a complete set when a new set is started
|
jpayne@68
|
136 RecordSet processData(HashMap<Long, Float> map, boolean saveRecord){
|
jpayne@68
|
137 RecordSet old=null;
|
jpayne@68
|
138 if(processed){return null;}
|
jpayne@68
|
139 levelAniSums[taxLevelExtended]+=ani;
|
jpayne@68
|
140 levelCounts[taxLevelExtended]++;
|
jpayne@68
|
141 aniLists[taxLevelExtended].add((float)ani);
|
jpayne@68
|
142
|
jpayne@68
|
143 if(ssu>0){
|
jpayne@68
|
144 levelSSUSums[taxLevelExtended]+=ssu;
|
jpayne@68
|
145 levelCountsSSU[taxLevelExtended]++;
|
jpayne@68
|
146 ssuLists[taxLevelExtended].add((float)ssu);
|
jpayne@68
|
147 }
|
jpayne@68
|
148 if(map!=null){
|
jpayne@68
|
149 long key=(((long)qTaxID)<<32)|rTaxID;
|
jpayne@68
|
150 map.put(key, (float)ani);
|
jpayne@68
|
151 }
|
jpayne@68
|
152 if(saveRecord){
|
jpayne@68
|
153 if(currentSet==null || currentSet.qID!=qTaxID){
|
jpayne@68
|
154 old=currentSet;
|
jpayne@68
|
155 currentSet=new RecordSet(qTaxID);
|
jpayne@68
|
156 if(recordSets!=null){
|
jpayne@68
|
157 recordSets.add(currentSet);
|
jpayne@68
|
158 }
|
jpayne@68
|
159 }
|
jpayne@68
|
160 currentSet.records.add(new Record(this));
|
jpayne@68
|
161 }
|
jpayne@68
|
162 processed=true;
|
jpayne@68
|
163 return old;
|
jpayne@68
|
164 }
|
jpayne@68
|
165
|
jpayne@68
|
166 /*--------------------------------------------------------------*/
|
jpayne@68
|
167
|
jpayne@68
|
168 // final static int taxLevels=TaxTree.numTaxaNamesExtended;
|
jpayne@68
|
169 final long[] levelCounts=new long[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
170 final long[] levelCountsSSU=new long[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
171
|
jpayne@68
|
172 final double[] levelAniSums=new double[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
173 final double[] levelSSUSums=new double[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
174
|
jpayne@68
|
175 final FloatList[] aniLists=new FloatList[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
176 final FloatList[] ssuLists=new FloatList[AnalyzeSketchResults.taxLevels];
|
jpayne@68
|
177
|
jpayne@68
|
178 final ArrayList<RecordSet> recordSets;
|
jpayne@68
|
179
|
jpayne@68
|
180 final int mode;
|
jpayne@68
|
181 final TaxTree tree;
|
jpayne@68
|
182 final ByteStreamWriter bswBad;
|
jpayne@68
|
183
|
jpayne@68
|
184 int qTaxID=-1;
|
jpayne@68
|
185 int rTaxID=-1;
|
jpayne@68
|
186 long qBases;
|
jpayne@68
|
187 long rBases;
|
jpayne@68
|
188 long qSize;
|
jpayne@68
|
189 long rSize;
|
jpayne@68
|
190 double ani=-1;
|
jpayne@68
|
191 double ssu=-1;
|
jpayne@68
|
192 int taxLevelExtended=-1;
|
jpayne@68
|
193 boolean processed=true;
|
jpayne@68
|
194 RecordSet currentSet=null;
|
jpayne@68
|
195 final boolean keepText;
|
jpayne@68
|
196
|
jpayne@68
|
197 byte[] text=null;
|
jpayne@68
|
198
|
jpayne@68
|
199 private static int qTaxIDColumn=7;
|
jpayne@68
|
200 private static int rTaxIDColumn=8;
|
jpayne@68
|
201 private static int qSizeColumn=3;
|
jpayne@68
|
202 private static int rSizeColumn=4;
|
jpayne@68
|
203 private static int qBasesColumn=5;
|
jpayne@68
|
204 private static int rBasesColumn=6;
|
jpayne@68
|
205 private static int aniColumn=2;
|
jpayne@68
|
206 private static int ssuColumn=11;
|
jpayne@68
|
207 private static int caLevelColumn=12;
|
jpayne@68
|
208
|
jpayne@68
|
209 static int MIN_HITS=3;
|
jpayne@68
|
210
|
jpayne@68
|
211 } |