Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/CutGff.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package gff; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Arrays; | |
7 import java.util.HashMap; | |
8 import java.util.concurrent.atomic.AtomicInteger; | |
9 import java.util.concurrent.atomic.AtomicLong; | |
10 | |
11 import aligner.Alignment; | |
12 import fileIO.ByteFile; | |
13 import fileIO.ByteStreamWriter; | |
14 import fileIO.FileFormat; | |
15 import fileIO.ReadWrite; | |
16 import prok.PGMTools; | |
17 import prok.ProkObject; | |
18 import shared.Parse; | |
19 import shared.Parser; | |
20 import shared.PreParser; | |
21 import shared.ReadStats; | |
22 import shared.Shared; | |
23 import shared.Timer; | |
24 import shared.Tools; | |
25 import stream.ConcurrentReadOutputStream; | |
26 import stream.Read; | |
27 import stream.ReadInputStream; | |
28 import structures.ByteBuilder; | |
29 import tax.GiToTaxid; | |
30 import tax.TaxClient; | |
31 import tax.TaxTree; | |
32 import template.Accumulator; | |
33 import template.ThreadWaiter; | |
34 | |
35 public class CutGff implements Accumulator<CutGff.ProcessThread> { | |
36 | |
37 /*--------------------------------------------------------------*/ | |
38 /*---------------- Initialization ----------------*/ | |
39 /*--------------------------------------------------------------*/ | |
40 | |
41 /** | |
42 * Code entrance from the command line. | |
43 * @param args Command line arguments | |
44 */ | |
45 public static void main(String[] args){ | |
46 //Start a timer immediately upon code entrance. | |
47 Timer t=new Timer(); | |
48 | |
49 //Create an instance of this class | |
50 CutGff x=new CutGff(args); | |
51 | |
52 //Run the object | |
53 x.process(t); | |
54 | |
55 //Close the print stream if it was redirected | |
56 Shared.closeStream(x.outstream); | |
57 } | |
58 | |
59 /** | |
60 * Constructor. | |
61 * @param args Command line arguments | |
62 */ | |
63 public CutGff(String[] args){ | |
64 {//Preparse block for help, config files, and outstream | |
65 PreParser pp=new PreParser(args, null/*getClass()*/, false); | |
66 args=pp.args; | |
67 outstream=pp.outstream; | |
68 } | |
69 | |
70 //Set shared static variables prior to parsing | |
71 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
72 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
73 | |
74 Shared.TRIM_READ_COMMENTS=Shared.TRIM_RNAME=true; | |
75 Read.TO_UPPER_CASE=true; | |
76 Read.VALIDATE_IN_CONSTRUCTOR=true; | |
77 GffLine.parseAttributes=true; | |
78 | |
79 {//Parse the arguments | |
80 final Parser parser=parse(args); | |
81 overwrite=parser.overwrite; | |
82 append=parser.append; | |
83 | |
84 out=parser.out1; | |
85 } | |
86 | |
87 if(alignRibo){ | |
88 //Load sequences | |
89 ProkObject.loadConsensusSequenceFromFile(false, false); | |
90 } | |
91 | |
92 fixExtensions(); //Add or remove .gz or .bz2 as needed | |
93 checkFileExistence(); //Ensure files can be read and written | |
94 checkStatics(); //Adjust file-related static fields as needed for this program | |
95 | |
96 ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false); | |
97 } | |
98 | |
99 /*--------------------------------------------------------------*/ | |
100 /*---------------- Initialization Helpers ----------------*/ | |
101 /*--------------------------------------------------------------*/ | |
102 | |
103 /** Parse arguments from the command line */ | |
104 private Parser parse(String[] args){ | |
105 | |
106 Parser parser=new Parser(); | |
107 parser.overwrite=overwrite; | |
108 for(int i=0; i<args.length; i++){ | |
109 String arg=args[i]; | |
110 String[] split=arg.split("="); | |
111 String a=split[0].toLowerCase(); | |
112 String b=split.length>1 ? split[1] : null; | |
113 if(b!=null && b.equalsIgnoreCase("null")){b=null;} | |
114 | |
115 // outstream.println(arg+", "+a+", "+b); | |
116 if(PGMTools.parseStatic(arg, a, b)){ | |
117 //do nothing | |
118 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){ | |
119 assert(b!=null); | |
120 Tools.addFiles(b, fnaList); | |
121 }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){ | |
122 assert(b!=null); | |
123 Tools.addFiles(b, gffList); | |
124 }else if(a.equals("verbose")){ | |
125 verbose=Parse.parseBoolean(b); | |
126 ReadWrite.verbose=verbose; | |
127 }else if(a.equals("alignribo") || a.equals("align")){ | |
128 alignRibo=Parse.parseBoolean(b); | |
129 }else if(a.equals("adjustendpoints")){ | |
130 adjustEndpoints=Parse.parseBoolean(b); | |
131 }else if(a.equalsIgnoreCase("slop16s") || a.equalsIgnoreCase("16sslop") || a.equalsIgnoreCase("ssuslop")){ | |
132 ssuSlop=Integer.parseInt(b); | |
133 }else if(a.equalsIgnoreCase("slop23s") || a.equalsIgnoreCase("23sslop") || a.equalsIgnoreCase("lsuslop")){ | |
134 lsuSlop=Integer.parseInt(b); | |
135 }else if(a.equalsIgnoreCase("maxns") || a.equalsIgnoreCase("maxundefined")){ | |
136 maxNs=Integer.parseInt(b); | |
137 }else if(a.equalsIgnoreCase("maxnrate") || a.equalsIgnoreCase("maxnfraction")){ | |
138 maxNFraction=Integer.parseInt(b); | |
139 }else if(a.equals("invert")){ | |
140 invert=Parse.parseBoolean(b); | |
141 }else if(a.equals("type") || a.equals("types")){ | |
142 types=b; | |
143 }else if(a.equals("attributes") || a.equals("requiredattributes")){ | |
144 requiredAttributes=b.split(","); | |
145 }else if(a.equals("banattributes") || a.equals("bannedattributes")){ | |
146 bannedAttributes=b.split(","); | |
147 }else if(a.equals("banpartial")){ | |
148 banPartial=Parse.parseBoolean(b); | |
149 } | |
150 | |
151 else if(a.equalsIgnoreCase("renameByTaxID")){ | |
152 renameByTaxID=Parse.parseBoolean(b); | |
153 }else if(a.equals("taxmode")){ | |
154 if("accession".equalsIgnoreCase(b)){ | |
155 taxMode=ACCESSION_MODE; | |
156 }else if("header".equalsIgnoreCase(b)){ | |
157 taxMode=HEADER_MODE; | |
158 }else if("gi".equalsIgnoreCase(b)){ | |
159 taxMode=GI_MODE; | |
160 }else if("taxid".equalsIgnoreCase(b)){ | |
161 taxMode=TAXID_MODE; | |
162 }else{ | |
163 assert(false) : "Bad tax mode: "+b; | |
164 } | |
165 }else if(a.equals("requirepresent")){ | |
166 requirePresent=Parse.parseBoolean(b); | |
167 }else if(a.equalsIgnoreCase("onePerFile")){ | |
168 onePerFile=Parse.parseBoolean(b); | |
169 }else if(a.equalsIgnoreCase("pickBest") || a.equalsIgnoreCase("findBest") || a.equalsIgnoreCase("keepBest")){ | |
170 pickBest=Parse.parseBoolean(b); | |
171 } | |
172 | |
173 else if(a.equals("minlen")){ | |
174 minLen=Integer.parseInt(b); | |
175 }else if(a.equals("maxlen")){ | |
176 maxLen=Integer.parseInt(b); | |
177 } | |
178 | |
179 else if(ProkObject.parse(arg, a, b)){ | |
180 //do nothing | |
181 }else if(parser.parse(arg, a, b)){ | |
182 //do nothing | |
183 }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){ | |
184 fnaList.add(arg); | |
185 }else{ | |
186 outstream.println("Unknown parameter "+args[i]); | |
187 assert(false) : "Unknown parameter "+args[i]; | |
188 // throw new RuntimeException("Unknown parameter "+args[i]); | |
189 } | |
190 } | |
191 | |
192 ArrayList<String> banned=new ArrayList<String>(); | |
193 if(banPartial){banned.add("partial=true");} | |
194 if(bannedAttributes!=null){ | |
195 for(String s : bannedAttributes){banned.add(s);} | |
196 } | |
197 bannedAttributes=banned.isEmpty() ? null : banned.toArray(new String[0]); | |
198 | |
199 if(gffList.isEmpty()){ | |
200 for(String s : fnaList){ | |
201 String prefix=ReadWrite.stripExtension(s); | |
202 String gff=prefix+".gff"; | |
203 File f=new File(gff); | |
204 if(!f.exists()){ | |
205 String gz=gff+".gz"; | |
206 f=new File(gz); | |
207 assert(f.exists() && f.canRead()) : "Can't read file "+gff; | |
208 gff=gz; | |
209 } | |
210 gffList.add(gff); | |
211 } | |
212 } | |
213 assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size(); | |
214 return parser; | |
215 } | |
216 | |
217 /** Add or remove .gz or .bz2 as needed */ | |
218 private void fixExtensions(){ | |
219 fnaList=Tools.fixExtension(fnaList); | |
220 gffList=Tools.fixExtension(gffList); | |
221 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} | |
222 } | |
223 | |
224 /** Ensure files can be read and written */ | |
225 private void checkFileExistence(){ | |
226 //Ensure output files can be written | |
227 if(!Tools.testOutputFiles(overwrite, append, false, out)){ | |
228 outstream.println((out==null)+", "+out); | |
229 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n"); | |
230 } | |
231 | |
232 //Ensure input files can be read | |
233 ArrayList<String> foo=new ArrayList<String>(); | |
234 foo.addAll(fnaList); | |
235 foo.addAll(gffList); | |
236 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){ | |
237 throw new RuntimeException("\nCan't read some input files.\n"); | |
238 } | |
239 | |
240 //Ensure that no file was specified multiple times | |
241 foo.add(out); | |
242 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){ | |
243 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
244 } | |
245 } | |
246 | |
247 /** Adjust file-related static fields as needed for this program */ | |
248 private static void checkStatics(){ | |
249 //Adjust the number of threads for input file reading | |
250 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
251 ByteFile.FORCE_MODE_BF2=true; | |
252 } | |
253 } | |
254 | |
255 /*--------------------------------------------------------------*/ | |
256 /*---------------- Actual Code ----------------*/ | |
257 /*--------------------------------------------------------------*/ | |
258 | |
259 | |
260 | |
261 /*--------------------------------------------------------------*/ | |
262 | |
263 public void process(Timer t){ | |
264 if(Shared.threads()>2 && fnaList.size()>1){ | |
265 processMT(t); | |
266 }else{ | |
267 processST(t); | |
268 } | |
269 } | |
270 | |
271 public void processST(Timer t){ | |
272 ByteStreamWriter bsw=(ffout==null ? null : new ByteStreamWriter(ffout)); | |
273 if(bsw!=null){bsw.start();} | |
274 | |
275 for(int i=0; i<fnaList.size(); i++){ | |
276 processFileST(fnaList.get(i), gffList.get(i), types, bsw); | |
277 } | |
278 | |
279 if(bsw!=null){bsw.poisonAndWait();} | |
280 t.stop(); | |
281 if(ffout!=null){outstream.println("Wrote "+out);} | |
282 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); | |
283 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false)); | |
284 if(alignRibo){ | |
285 outstream.println(Tools.number("Flipped: ", flipped.get(), 8)); | |
286 outstream.println(Tools.number("Failed Alignment: ", failed.get(), 8)); | |
287 } | |
288 } | |
289 | |
290 public void processMT(Timer t){ | |
291 | |
292 //Optionally create a read output stream | |
293 final ConcurrentReadOutputStream ros=makeCros(); | |
294 | |
295 //Reset counters | |
296 readsProcessed=readsOut=0; | |
297 basesProcessed=basesOut=0; | |
298 | |
299 //Process the reads in separate threads | |
300 spawnThreads(ros); | |
301 | |
302 if(verbose){outstream.println("Finished; closing streams.");} | |
303 | |
304 //Write anything that was accumulated by ReadStats | |
305 errorState|=ReadStats.writeAll(); | |
306 //Close the read streams | |
307 errorState|=ReadWrite.closeStream(ros); | |
308 | |
309 //Report timing and results | |
310 t.stop(); | |
311 if(ffout!=null){outstream.println("Wrote "+out);} | |
312 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); | |
313 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false)); | |
314 if(alignRibo){ | |
315 outstream.println(Tools.number("Flipped: ", flipped.get(), 8)); | |
316 outstream.println(Tools.number("Failed Alignment: ", failed.get(), 8)); | |
317 } | |
318 | |
319 //Throw an exception of there was an error in a thread | |
320 if(errorState){ | |
321 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
322 } | |
323 } | |
324 | |
325 /*--------------------------------------------------------------*/ | |
326 | |
327 private boolean hasAttributes(GffLine gline){ | |
328 int len=gline.length(); | |
329 if(len<minLen || len>maxLen){return false;} | |
330 if(hasAttributes(gline, bannedAttributes)){return false;} | |
331 return requiredAttributes==null || hasAttributes(gline, requiredAttributes); | |
332 } | |
333 | |
334 private static boolean hasAttributes(GffLine gline, String[] attributes){ | |
335 if(attributes==null){return false;} | |
336 for(String s : attributes){ | |
337 if(gline.attributes.contains(s)){ | |
338 return true; | |
339 } | |
340 } | |
341 return false; | |
342 } | |
343 | |
344 private void processFileST(String fna, String gff, String types, ByteStreamWriter bsw){ | |
345 ArrayList<Read> reads=processFile(fna, gff, types); | |
346 if(reads!=null){ | |
347 for(Read r : reads){ | |
348 if(bsw!=null){bsw.println(r);} | |
349 } | |
350 } | |
351 } | |
352 | |
353 private ArrayList<Read> processFile(String fna, String gff, String types){ | |
354 ArrayList<GffLine> lines=GffLine.loadGffFile(gff, types, false); | |
355 | |
356 ArrayList<Read> list=ReadInputStream.toReads(fna, FileFormat.FA, -1); | |
357 HashMap<String, Read> map=new HashMap<String, Read>(); | |
358 | |
359 for(Read r : list){ | |
360 readsProcessed++; | |
361 basesProcessed+=r.length(); | |
362 map.put(r.id, r); | |
363 } | |
364 | |
365 if(renameByTaxID){//Note this must be AFTER adding to the hashmap. | |
366 renameByTaxID(list); | |
367 } | |
368 | |
369 ArrayList<Read> outList=processLines(lines, map, invert); | |
370 | |
371 if(invert){ | |
372 for(Read r : list){ | |
373 readsOut++; | |
374 basesOut+=r.length(); | |
375 } | |
376 return list; | |
377 }else{ | |
378 if(outList!=null){ | |
379 for(Read r : outList){ | |
380 readsOut++; | |
381 basesOut+=r.length(); | |
382 } | |
383 } | |
384 return outList; | |
385 } | |
386 } | |
387 | |
388 private void renameByTaxID(ArrayList<Read> list){ | |
389 ByteBuilder bb=new ByteBuilder(); | |
390 for(Read r : list){ | |
391 if(bb.length>0){bb.comma();} | |
392 bb.append(taxMode==HEADER_MODE ? r.id : taxMode==ACCESSION_MODE ? parseAccession(r.id) : parseGi(r.id)); | |
393 } | |
394 final int[] ids; | |
395 if(taxMode==ACCESSION_MODE){ | |
396 ids=TaxClient.accessionToTaxidArray(bb.toString()); | |
397 }else if(taxMode==GI_MODE){ | |
398 ids=TaxClient.giToTaxidArray(bb.toString()); | |
399 }else if(taxMode==HEADER_MODE){ | |
400 ids=TaxClient.headerToTaxidArray(bb.toString()); | |
401 }else if(taxMode==TAXID_MODE){ | |
402 ids=parseTaxIds(list); | |
403 }else{ | |
404 throw new RuntimeException("Bad mode: "+TAXID_MODE); | |
405 } | |
406 assert(ids.length==list.size()) : ids.length+", "+list.size(); | |
407 for(int i=0; i<ids.length; i++){ | |
408 Read r=list.get(i); | |
409 int id=ids[i]; | |
410 | |
411 if(r.id!=null && r.id.startsWith("tid|")){ | |
412 id=TaxTree.parseHeaderStatic(r.id); | |
413 r.obj=id; | |
414 }else { | |
415 assert(id>=0 || !requirePresent) : "Can't find taxID for header: "+id+", "+r.name(); | |
416 | |
417 r.obj=id; | |
418 r.id="tid|"+id+"|"+id; | |
419 } | |
420 } | |
421 } | |
422 | |
423 private int[] parseTaxIds(ArrayList<Read> list){ | |
424 int[] ids=new int[list.size()]; | |
425 for(int i=0; i<list.size(); i++){ | |
426 Read r=list.get(i); | |
427 int x=GiToTaxid.parseTaxidNumber(r.id, '|'); | |
428 ids[i]=x; | |
429 } | |
430 return ids; | |
431 } | |
432 | |
433 private String parseAccession(String id){ | |
434 int dot=id.indexOf('.'); | |
435 int space=id.indexOf(' '); | |
436 // assert(dot>0 && space>0) : "Unexpected header format: "+id+"\nTry header mode instead of accession mode."; | |
437 int limit=Tools.min((dot<0 ? id.length() : dot), (space<0 ? id.length() : space)); | |
438 return id.substring(0, limit); | |
439 } | |
440 | |
441 private String parseGi(String id){ | |
442 assert(id.startsWith("gi|")); | |
443 String[] split=id.split("|"); | |
444 return split[1]; | |
445 } | |
446 | |
447 private ArrayList<Read> processLines(ArrayList<GffLine> lines, HashMap<String, Read> map, boolean invertSelection){ | |
448 ArrayList<Read> list=null; | |
449 for(GffLine gline : lines){ | |
450 if(hasAttributes(gline)){ | |
451 Read scaf=map.get(gline.seqid); | |
452 assert(scaf!=null) : "Can't find "+gline.seqid+" in "+map.keySet(); | |
453 | |
454 boolean pass=true; | |
455 Float identity=null; | |
456 if(alignRibo && gline.inbounds(scaf.length())){ | |
457 int type=gline.prokType(); | |
458 identity=align(gline, scaf.bases, type); | |
459 if(identity==null){pass=false;} | |
460 } | |
461 | |
462 if(pass){ | |
463 final int start=gline.start-1; | |
464 final int stop=gline.stop-1; | |
465 | |
466 if(invertSelection){ | |
467 byte[] bases=scaf.bases; | |
468 for(int i=start; i<=stop; i++){ | |
469 if(i>=0 && i<bases.length){ | |
470 bases[i]='N'; | |
471 } | |
472 } | |
473 }else{ | |
474 if(start>=0 && stop<scaf.length()){ | |
475 String id=gline.attributes; | |
476 if(renameByTaxID){ | |
477 id="tid|"+scaf.obj+"|"+id; | |
478 } | |
479 Read r=new Read(Arrays.copyOfRange(scaf.bases, start, stop+1), null, id, 1); | |
480 r.obj=identity; | |
481 | |
482 assert(!r.containsLowercase()) : r.toFasta()+"\n" | |
483 + "validated="+r.validated()+", scaf.validated="+scaf.validated()+", tuc="+Read.TO_UPPER_CASE+", vic="+Read.VALIDATE_IN_CONSTRUCTOR; | |
484 if(maxNs>=0 || maxNFraction>=0){ | |
485 long allowed=Tools.min(maxNs>=0 ? maxNs : r.length(), (long)(r.length()*(maxNFraction>=0 ? maxNFraction : 1))); | |
486 if(r.countUndefined()>allowed){r=null;} | |
487 } | |
488 | |
489 if(r!=null){ | |
490 if(gline.strand==1){r.reverseComplement();} | |
491 if(list==null){list=new ArrayList<Read>(8);} | |
492 list.add(r); | |
493 if(onePerFile && !pickBest){break;} | |
494 } | |
495 } | |
496 } | |
497 } | |
498 } | |
499 } | |
500 if(pickBest && list!=null && list.size()>1){ | |
501 Read best=null; | |
502 float bestID=0; | |
503 for(Read r : list){ | |
504 float identity=(r.obj==null ? 0.001f : (Float)r.obj); | |
505 if(identity>bestID){ | |
506 bestID=identity; | |
507 best=r; | |
508 } | |
509 } | |
510 assert(best!=null); | |
511 list.clear(); | |
512 list.add(best); | |
513 } | |
514 return list; | |
515 } | |
516 | |
517 private Float align(GffLine gline, byte[] scaf, int type){ | |
518 Read[] consensusReads=ProkObject.consensusReads(type); | |
519 if(consensusReads==null || consensusReads.length==0){ | |
520 assert(false) : type+"\n"+gline.toString(); | |
521 return null; | |
522 } | |
523 byte[] universal=consensusReads[0].bases; | |
524 float minIdentity=ProkObject.minID(type)*ID_MULT; | |
525 if(universal==null){assert(false); return 1F;} | |
526 | |
527 int start=gline.start-1; | |
528 int stop=gline.stop-1; | |
529 assert(start<=stop) : start+", "+stop+", "+scaf.length; | |
530 assert(start>=0 && start<scaf.length) : start+", "+stop+", "+scaf.length; | |
531 assert(stop>=0 && stop<scaf.length) : start+", "+stop+", "+scaf.length; | |
532 // final int a=Tools.max(0, start-(adjustEndpoints ? 100 : 20)); | |
533 // final int b=Tools.min(scaf.length-1, stop+(adjustEndpoints ? 100 : 20)); | |
534 final int a=Tools.max(0, start); | |
535 final int b=Tools.min(scaf.length-1, stop); | |
536 | |
537 byte[] ref=Arrays.copyOfRange(scaf, a, b+1); | |
538 Read r=new Read(ref, null, 0); | |
539 if(gline.strand==GffLine.MINUS){r.reverseComplement();} | |
540 | |
541 Alignment plus=new Alignment(r); | |
542 plus.align(universal); | |
543 // if(plus.id>=minIdentity){return true;} | |
544 | |
545 r.reverseComplement(); | |
546 Alignment minus=new Alignment(r); | |
547 minus.align(universal); | |
548 | |
549 Alignment best=null; | |
550 if(plus.id>=minus.id){ | |
551 best=plus; | |
552 // System.err.println("Kept: "+plus.id+" \t"+minus.id); | |
553 }else{ | |
554 best=minus; | |
555 if(minus.id>=minIdentity){ | |
556 if(verbose) {System.err.println("Flipped: "+plus.id+" \t"+minus.id+"");} | |
557 flipped.incrementAndGet(); | |
558 gline.strand=Shared.MINUS; | |
559 } | |
560 } | |
561 if(best.id>=minIdentity){ | |
562 return best.id; | |
563 }else{ | |
564 if(verbose) {System.err.println("Failed alignment: "+plus.id+" \t"+minus.id);} | |
565 failed.incrementAndGet(); | |
566 return null; | |
567 } | |
568 // | |
569 // int[] coords=KillSwitch.allocInt1D(2); | |
570 // float id1=align(universal, scaf, a, b, minIdentity, coords); | |
571 // final int rstart=coords[0], rstop=coords[1]; | |
572 // // assert(false) : rstart+", "+rstop+", "+(rstop-rstart+1)+", "+start+", "+stop; | |
573 // if(id1<minIdentity){ | |
574 // System.err.println("Low identity: "+String.format("%.2s", 100*id1)); | |
575 // return false; | |
576 // } | |
577 // if(adjustEndpoints){ | |
578 // int slop=(flag==4 ? AnalyzeGenes.lsuSlop : AnalyzeGenes.ssuSlop); | |
579 // if(Tools.absdif(start, rstart)>slop){ | |
580 // // System.err.println("rstart:\t"+start+" -> "+rstart); | |
581 // start=rstart; | |
582 // } | |
583 // if(Tools.absdif(stop, rstop)>slop){ | |
584 // // System.err.println("rstop: \t"+stop+" -> "+rstop); | |
585 // stop=rstop; | |
586 // } | |
587 // } | |
588 } | |
589 | |
590 /*--------------------------------------------------------------*/ | |
591 /*---------------- Thread Management ----------------*/ | |
592 /*--------------------------------------------------------------*/ | |
593 | |
594 private ConcurrentReadOutputStream makeCros(){ | |
595 if(ffout==null){return null;} | |
596 | |
597 //Select output buffer size based on whether it needs to be ordered | |
598 final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8); | |
599 | |
600 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout, null, buff, null, false); | |
601 ros.start(); //Start the stream | |
602 return ros; | |
603 } | |
604 | |
605 /** Spawn process threads */ | |
606 private void spawnThreads(ConcurrentReadOutputStream ros){ | |
607 //Do anything necessary prior to processing | |
608 | |
609 //Determine how many threads may be used | |
610 final int threads=Tools.min(Shared.threads(), fnaList.size()); | |
611 | |
612 //Controls access to input files | |
613 AtomicInteger atom=new AtomicInteger(0); | |
614 | |
615 //Fill a list with ProcessThreads | |
616 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads); | |
617 for(int i=0; i<threads; i++){ | |
618 alpt.add(new ProcessThread(atom, ros, i)); | |
619 } | |
620 | |
621 //Start the threads and wait for them to finish | |
622 boolean success=ThreadWaiter.startAndWait(alpt, this); | |
623 errorState&=!success; | |
624 | |
625 //Do anything necessary after processing | |
626 | |
627 } | |
628 | |
629 @Override | |
630 public final void accumulate(ProcessThread pt){ | |
631 readsProcessed+=pt.readsProcessedT; | |
632 basesProcessed+=pt.basesProcessedT; | |
633 readsOut+=pt.readsOutT; | |
634 basesOut+=pt.basesOutT; | |
635 errorState|=(!pt.success); | |
636 } | |
637 | |
638 @Override | |
639 public final boolean success(){return !errorState;} | |
640 | |
641 /*--------------------------------------------------------------*/ | |
642 /*---------------- Inner Classes ----------------*/ | |
643 /*--------------------------------------------------------------*/ | |
644 | |
645 /** This class is static to prevent accidental writing to shared variables. | |
646 * It is safe to remove the static modifier. */ | |
647 class ProcessThread extends Thread { | |
648 | |
649 //Constructor | |
650 ProcessThread(final AtomicInteger atom_, ConcurrentReadOutputStream ros_, final int tid_){ | |
651 atom=atom_; | |
652 ros=ros_; | |
653 tid=tid_; | |
654 } | |
655 | |
656 //Called by start() | |
657 @Override | |
658 public void run(){ | |
659 //Do anything necessary prior to processing | |
660 | |
661 for(int fnum=atom.getAndIncrement(), lim=fnaList.size(); fnum<lim; fnum=atom.getAndIncrement()) { | |
662 String fna=fnaList.get(fnum); | |
663 String gff=gffList.get(fnum); | |
664 //Process the reads | |
665 ArrayList<Read> list=processFileT(fna, gff, types); | |
666 if(ros!=null){ | |
667 if(list==null){list=dummy;} | |
668 ros.add(list, fnum); | |
669 } | |
670 } | |
671 | |
672 //Do anything necessary after processing | |
673 | |
674 //Indicate successful exit status | |
675 success=true; | |
676 } | |
677 | |
678 //Duplicated | |
679 private ArrayList<Read> processFileT(String fna, String gff, String types){ | |
680 ArrayList<GffLine> lines=GffLine.loadGffFile(gff, types, false); | |
681 | |
682 ArrayList<Read> list=ReadInputStream.toReads(fna, FileFormat.FA, -1); | |
683 HashMap<String, Read> map=new HashMap<String, Read>(); | |
684 | |
685 for(Read r : list){ | |
686 readsProcessedT++; | |
687 basesProcessedT+=r.length(); | |
688 map.put(r.id, r); | |
689 } | |
690 | |
691 if(renameByTaxID){//Note this must be AFTER adding to the hashmap. | |
692 renameByTaxID(list); | |
693 } | |
694 // assert(false) : renameByTaxID+", "+list.size(); | |
695 | |
696 ArrayList<Read> outList=processLines(lines, map, invert); | |
697 | |
698 if(invert){ | |
699 for(Read r : list){ | |
700 readsOutT++; | |
701 basesOutT+=r.length(); | |
702 } | |
703 return list; | |
704 }else{ | |
705 if(outList!=null){ | |
706 for(Read r : outList){ | |
707 readsOutT++; | |
708 basesOutT+=r.length(); | |
709 } | |
710 } | |
711 return outList; | |
712 } | |
713 } | |
714 | |
715 /** Number of reads processed by this thread */ | |
716 protected long readsProcessedT=0; | |
717 /** Number of bases processed by this thread */ | |
718 protected long basesProcessedT=0; | |
719 | |
720 /** Number of reads retained by this thread */ | |
721 protected long readsOutT=0; | |
722 /** Number of bases retained by this thread */ | |
723 protected long basesOutT=0; | |
724 | |
725 /** True only if this thread has completed successfully */ | |
726 boolean success=false; | |
727 | |
728 /** Shared output stream */ | |
729 private final ConcurrentReadOutputStream ros; | |
730 /** Thread ID */ | |
731 final int tid; | |
732 /** Next file ID */ | |
733 final AtomicInteger atom; | |
734 } | |
735 | |
736 /*--------------------------------------------------------------*/ | |
737 /*---------------- Fields ----------------*/ | |
738 /*--------------------------------------------------------------*/ | |
739 | |
740 private ArrayList<String> fnaList=new ArrayList<String>(); | |
741 private ArrayList<String> gffList=new ArrayList<String>(); | |
742 private String out=null; | |
743 private String types="CDS"; | |
744 private boolean invert=false; | |
745 private boolean banPartial=true; | |
746 private int minLen=1; | |
747 private int maxLen=Integer.MAX_VALUE; | |
748 | |
749 private String[] requiredAttributes; | |
750 private String[] bannedAttributes; | |
751 | |
752 /*--------------------------------------------------------------*/ | |
753 | |
754 private long bytesOut=0; | |
755 private boolean renameByTaxID=false; | |
756 private int taxMode=ACCESSION_MODE; | |
757 private boolean requirePresent=false; | |
758 private boolean alignRibo=false; | |
759 private boolean adjustEndpoints=false; | |
760 private boolean onePerFile=false; | |
761 private boolean pickBest=false; | |
762 private int ssuSlop=999; | |
763 private int lsuSlop=999; | |
764 | |
765 private float ID_MULT=0.96f; | |
766 | |
767 private int maxNs=-1; | |
768 private double maxNFraction=-1; | |
769 | |
770 private static int ACCESSION_MODE=0, GI_MODE=1, HEADER_MODE=2, TAXID_MODE=3; | |
771 | |
772 /*--------------------------------------------------------------*/ | |
773 | |
774 /** Number of reads processed */ | |
775 protected long readsProcessed=0; | |
776 /** Number of bases processed */ | |
777 protected long basesProcessed=0; | |
778 | |
779 /** Number of reads retained */ | |
780 protected long readsOut=0; | |
781 /** Number of bases retained */ | |
782 protected long basesOut=0; | |
783 | |
784 protected AtomicLong flipped=new AtomicLong(0); | |
785 protected AtomicLong failed=new AtomicLong(0); | |
786 | |
787 /** Quit after processing this many input reads; -1 means no limit */ | |
788 private long maxReads=-1; | |
789 | |
790 /*--------------------------------------------------------------*/ | |
791 /*---------------- Final Fields ----------------*/ | |
792 /*--------------------------------------------------------------*/ | |
793 | |
794 private final FileFormat ffout; | |
795 private final ArrayList<Read> dummy=new ArrayList<Read>(); | |
796 | |
797 /*--------------------------------------------------------------*/ | |
798 /*---------------- Common Fields ----------------*/ | |
799 /*--------------------------------------------------------------*/ | |
800 | |
801 private PrintStream outstream=System.err; | |
802 public static boolean verbose=false; | |
803 public boolean errorState=false; | |
804 public boolean ordered=true; | |
805 private boolean overwrite=true; | |
806 private boolean append=false; | |
807 | |
808 } |