comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/CompareGff.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package gff;
2
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.HashMap;
6 import java.util.Locale;
7 import java.util.Map.Entry;
8
9 import fileIO.ByteFile;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import prok.ProkObject;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import structures.StringNum;
20
21 /**
22 * Compares gff files for the purpose of grading gene-calling.
23 * @author Brian Bushnell
24 * @date October 3, 2018
25 *
26 */
27 public class CompareGff {
28
29 /*--------------------------------------------------------------*/
30 /*---------------- Initialization ----------------*/
31 /*--------------------------------------------------------------*/
32
33 /**
34 * Code entrance from the command line.
35 * @param args Command line arguments
36 */
37 public static void main(String[] args){
38 //Start a timer immediately upon code entrance.
39 Timer t=new Timer();
40
41 //Create an instance of this class
42 CompareGff x=new CompareGff(args);
43
44 //Run the object
45 x.process(t);
46
47 //Close the print stream if it was redirected
48 Shared.closeStream(x.outstream);
49 }
50
51 /**
52 * Constructor.
53 * @param args Command line arguments
54 */
55 public CompareGff(String[] args){
56
57 {//Preparse block for help, config files, and outstream
58 PreParser pp=new PreParser(args, getClass(), false);
59 args=pp.args;
60 outstream=pp.outstream;
61 }
62
63 //Set shared static variables prior to parsing
64 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
65 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
66
67 {//Parse the arguments
68 final Parser parser=parse(args);
69 overwrite=parser.overwrite;
70 append=parser.append;
71
72 in=parser.in1;
73 }
74
75 fixExtensions(); //Add or remove .gz or .bz2 as needed
76 checkFileExistence(); //Ensure files can be read and written
77 checkStatics(); //Adjust file-related static fields as needed for this program
78
79 ffin=FileFormat.testInput(in, FileFormat.GFF, null, true, true);
80 ffref=FileFormat.testInput(ref, FileFormat.GFF, null, true, true);
81 }
82
83 /*--------------------------------------------------------------*/
84 /*---------------- Initialization Helpers ----------------*/
85 /*--------------------------------------------------------------*/
86
87 /** Parse arguments from the command line */
88 private Parser parse(String[] args){
89
90 Parser parser=new Parser();
91 for(int i=0; i<args.length; i++){
92 String arg=args[i];
93 String[] split=arg.split("=");
94 String a=split[0].toLowerCase();
95 String b=split.length>1 ? split[1] : null;
96 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
97
98 if(a.equals("ref")){
99 ref=b;
100 }else if(a.equals("lines")){
101 maxLines=Long.parseLong(b);
102 if(maxLines<0){maxLines=Long.MAX_VALUE;}
103 }else if(a.equals("verbose")){
104 verbose=Parse.parseBoolean(b);
105 // ByteFile1.verbose=verbose;
106 // ByteFile2.verbose=verbose;
107 // ReadWrite.verbose=verbose;
108 }else if(parser.parse(arg, a, b)){
109 //do nothing
110 }else if(i==0 && arg.indexOf('=')<0){
111 parser.in1=arg;
112 }else if(i==1 && arg.indexOf('=')<0 && ref==null){
113 ref=arg;
114 }else{
115 outstream.println("Unknown parameter "+args[i]);
116 assert(false) : "Unknown parameter "+args[i];
117 // throw new RuntimeException("Unknown parameter "+args[i]);
118 }
119 }
120
121 return parser;
122 }
123
124 /** Add or remove .gz or .bz2 as needed */
125 private void fixExtensions(){
126 in=Tools.fixExtension(in);
127 ref=Tools.fixExtension(ref);
128 if(in==null || ref==null){throw new RuntimeException("Error - at least two input files are required.");}
129 }
130
131 /** Ensure files can be read and written */
132 private void checkFileExistence(){
133
134 //Ensure input files can be read
135 if(!Tools.testInputFiles(true, true, in, ref)){
136 throw new RuntimeException("\nCan't read some input files.\n");
137 }
138 }
139
140 /** Adjust file-related static fields as needed for this program */
141 private static void checkStatics(){
142 //Adjust the number of threads for input file reading
143 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
144 ByteFile.FORCE_MODE_BF2=true;
145 }
146
147 // if(!ByteFile.FORCE_MODE_BF2){
148 // ByteFile.FORCE_MODE_BF2=false;
149 // ByteFile.FORCE_MODE_BF1=true;
150 // }
151 }
152
153 /*--------------------------------------------------------------*/
154 /*---------------- Outer Methods ----------------*/
155 /*--------------------------------------------------------------*/
156
157 void process(Timer t){
158
159 ByteFile bf=ByteFile.makeByteFile(ffin);
160
161 processInner(bf);
162
163 errorState|=bf.close();
164
165 t.stop();
166
167 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
168
169 outstream.println();
170 outstream.println("Ref count: \t"+refCount);
171 outstream.println("Query count: \t"+queryCount);
172
173 outstream.println();
174 outstream.println("Ref-relative counts:");
175 outstream.println("True Positive Start: \t"+truePositiveStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStart*100.0/refCount)));
176 outstream.println("True Positive Stop: \t"+truePositiveStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStop*100.0/refCount)));
177 // outstream.println("False Positive Start:\t"+falsePositiveStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStart*100.0/refCount)));
178 // outstream.println("False Positive Stop: \t"+falsePositiveStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStop*100.0/refCount)));
179 outstream.println("False Negative Start:\t"+falseNegativeStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", falseNegativeStart*100.0/refCount)));
180 outstream.println("False Negative Stop: \t"+falseNegativeStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", falseNegativeStop*100.0/refCount)));
181
182 outstream.println();
183 outstream.println("Query-relative counts:");
184 outstream.println("True Positive Start: \t"+truePositiveStart2+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStart2*100.0/queryCount)));
185 outstream.println("True Positive Stop: \t"+truePositiveStop2+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStop2*100.0/queryCount)));
186 outstream.println("False Positive Start:\t"+falsePositiveStart2+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStart2*100.0/queryCount)));
187 outstream.println("False Positive Stop: \t"+falsePositiveStop2+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStop2*100.0/queryCount)));
188
189 outstream.println();
190 outstream.println("SNR: \t"+String.format(Locale.ROOT, "%.4f", 10*Math.log10((truePositiveStart2+truePositiveStop2+0.1)/(falsePositiveStart2+falsePositiveStop2+0.1))));
191
192 if(errorState){
193 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
194 }
195 }
196
197 /*--------------------------------------------------------------*/
198 /*---------------- Inner Methods ----------------*/
199 /*--------------------------------------------------------------*/
200
201 @SuppressWarnings("unchecked")
202 private void processInner(ByteFile bf){
203 byte[] line=bf.nextLine();
204
205 {
206 ArrayList<GffLine> refLines=GffLine.loadGffFile(ffref, "CDS,rRNA,tRNA", true);
207
208 refCount=refLines.size();
209 lineMap=new HashMap<StringNum, GffLine>();
210 startCountMap=new HashMap<StringNum, Integer>();
211 stopCountMap=new HashMap<StringNum, Integer>();
212
213 for(GffLine gline : refLines){
214 final int stop=gline.trueStop();
215 StringNum sn=new StringNum(gline.seqid, stop);
216 lineMap.put(sn, gline);
217 startCountMap.put(sn, 0);
218 stopCountMap.put(sn, 0);
219 assert(lineMap.get(sn)==gline);
220 // assert(false) : "\n\nsn='"+sn+"'\n"+lineMap.containsKey(sn)+"\n"+lineMap.keySet();
221 }
222 if(verbose){
223 System.err.println(lineMap);
224 System.err.println(startCountMap);
225 System.err.println(stopCountMap);
226 }
227 }
228
229 while(line!=null){
230 if(line.length>0){
231 if(maxLines>0 && linesProcessed>=maxLines){break;}
232 linesProcessed++;
233 bytesProcessed+=(line.length+1);
234
235 final boolean valid=(line[0]!='#');
236 if(valid){
237 queryCount++;
238 GffLine gline=new GffLine(line);
239 processLine(gline);
240 }
241 }
242 line=bf.nextLine();
243 }
244
245 for(Entry<StringNum, Integer> e : startCountMap.entrySet()){
246 if(e.getValue()<1){
247 falseNegativeStart++;
248 }
249 }
250 for(Entry<StringNum, Integer> e : stopCountMap.entrySet()){
251 if(e.getValue()<1){
252 falseNegativeStop++;
253 }
254 }
255 }
256
257 private void processLine(GffLine gline){
258 // boolean cds=gline.type.equals("CDS");
259 // boolean trna=gline.type.equals("tRNA");
260 // boolean rrna=gline.type.equals("rRNA");
261 // if(!cds && !trna && !rrna){return;}
262 // if(cds && !ProkObject.callCDS){return;}
263 // if(trna && !ProkObject.calltRNA){return;}
264 // if(rrna){
265 // int type=gline.prokType();
266 // if(ProkObject.processType(type)){return;}
267 // }
268 int type=gline.prokType();
269 if(!ProkObject.processType(type)){return;}
270
271 final int stop=gline.trueStop();
272 final int start=gline.trueStart();
273
274 // System.err.println("Considering "+start+", "+stop);
275
276 StringNum sn=new StringNum(gline.seqid, stop);
277 GffLine refline=lineMap.get(sn);
278
279 boolean fail=(refline==null || refline.strand!=gline.strand || !refline.type.equals(gline.type));
280 if(fail){
281 if(verbose){
282 System.err.println("Can't find "+sn+"\n"+gline+"\n"+refline);
283 assert(false) : "\n\nsn='"+sn+"'\n"+lineMap.containsKey(sn)+"\n"+lineMap.keySet();
284 }
285 falsePositiveStart++;
286 falsePositiveStop++;
287 falsePositiveStart2++;
288 falsePositiveStop2++;
289 }else{
290 assert(stop==refline.trueStop());
291 truePositiveStop++;
292 truePositiveStop2++;
293 stopCountMap.put(sn, stopCountMap.get(sn)+1);
294 if(start==refline.trueStart()){
295 truePositiveStart++;
296 truePositiveStart2++;
297 startCountMap.put(sn, startCountMap.get(sn)+1);
298 }else{
299 falsePositiveStart++;
300 falsePositiveStart2++;
301 }
302 }
303 }
304
305 /*--------------------------------------------------------------*/
306 /*---------------- Fields ----------------*/
307 /*--------------------------------------------------------------*/
308
309 private String in=null;
310 private String ref=null;
311
312
313 /*--------------------------------------------------------------*/
314
315 private HashMap<StringNum, GffLine> lineMap;
316 private HashMap<StringNum, Integer> startCountMap;
317 private HashMap<StringNum, Integer> stopCountMap;
318
319 // private HashMap<Integer, ArrayList<GffLine>> map;
320 // private HashSet<Integer> stopSet;
321 // private HashSet<Integer> startSet;
322 // private HashSet<Integer> stopSetM;
323 // private HashSet<Integer> startSetM;
324
325 private long linesProcessed=0;
326 private long linesOut=0;
327 private long bytesProcessed=0;
328 private long bytesOut=0;
329
330 private long maxLines=Long.MAX_VALUE;
331
332 private long falsePositiveStart=0;
333 private long falsePositiveStop=0;
334 private long truePositiveStart=0;
335 private long truePositiveStop=0;
336 private long falseNegativeStart=0;
337 private long falseNegativeStop=0;
338
339 private long falsePositiveStart2=0;
340 private long falsePositiveStop2=0;
341 private long truePositiveStart2=0;
342 private long truePositiveStop2=0;
343
344 private long refCount=0;
345 private long queryCount=0;
346
347 /*--------------------------------------------------------------*/
348 /*---------------- Final Fields ----------------*/
349 /*--------------------------------------------------------------*/
350
351 private final FileFormat ffin;
352 private final FileFormat ffref;
353
354 /*--------------------------------------------------------------*/
355 /*---------------- Common Fields ----------------*/
356 /*--------------------------------------------------------------*/
357
358 private PrintStream outstream=System.err;
359 public static boolean verbose=false;
360 public boolean errorState=false;
361 private boolean overwrite=false;
362 private boolean append=false;
363
364 }