comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/CutGff.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package gff;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.concurrent.atomic.AtomicInteger;
9 import java.util.concurrent.atomic.AtomicLong;
10
11 import aligner.Alignment;
12 import fileIO.ByteFile;
13 import fileIO.ByteStreamWriter;
14 import fileIO.FileFormat;
15 import fileIO.ReadWrite;
16 import prok.PGMTools;
17 import prok.ProkObject;
18 import shared.Parse;
19 import shared.Parser;
20 import shared.PreParser;
21 import shared.ReadStats;
22 import shared.Shared;
23 import shared.Timer;
24 import shared.Tools;
25 import stream.ConcurrentReadOutputStream;
26 import stream.Read;
27 import stream.ReadInputStream;
28 import structures.ByteBuilder;
29 import tax.GiToTaxid;
30 import tax.TaxClient;
31 import tax.TaxTree;
32 import template.Accumulator;
33 import template.ThreadWaiter;
34
35 public class CutGff implements Accumulator<CutGff.ProcessThread> {
36
37 /*--------------------------------------------------------------*/
38 /*---------------- Initialization ----------------*/
39 /*--------------------------------------------------------------*/
40
41 /**
42 * Code entrance from the command line.
43 * @param args Command line arguments
44 */
45 public static void main(String[] args){
46 //Start a timer immediately upon code entrance.
47 Timer t=new Timer();
48
49 //Create an instance of this class
50 CutGff x=new CutGff(args);
51
52 //Run the object
53 x.process(t);
54
55 //Close the print stream if it was redirected
56 Shared.closeStream(x.outstream);
57 }
58
59 /**
60 * Constructor.
61 * @param args Command line arguments
62 */
63 public CutGff(String[] args){
64 {//Preparse block for help, config files, and outstream
65 PreParser pp=new PreParser(args, null/*getClass()*/, false);
66 args=pp.args;
67 outstream=pp.outstream;
68 }
69
70 //Set shared static variables prior to parsing
71 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
72 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
73
74 Shared.TRIM_READ_COMMENTS=Shared.TRIM_RNAME=true;
75 Read.TO_UPPER_CASE=true;
76 Read.VALIDATE_IN_CONSTRUCTOR=true;
77 GffLine.parseAttributes=true;
78
79 {//Parse the arguments
80 final Parser parser=parse(args);
81 overwrite=parser.overwrite;
82 append=parser.append;
83
84 out=parser.out1;
85 }
86
87 if(alignRibo){
88 //Load sequences
89 ProkObject.loadConsensusSequenceFromFile(false, false);
90 }
91
92 fixExtensions(); //Add or remove .gz or .bz2 as needed
93 checkFileExistence(); //Ensure files can be read and written
94 checkStatics(); //Adjust file-related static fields as needed for this program
95
96 ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false);
97 }
98
99 /*--------------------------------------------------------------*/
100 /*---------------- Initialization Helpers ----------------*/
101 /*--------------------------------------------------------------*/
102
103 /** Parse arguments from the command line */
104 private Parser parse(String[] args){
105
106 Parser parser=new Parser();
107 parser.overwrite=overwrite;
108 for(int i=0; i<args.length; i++){
109 String arg=args[i];
110 String[] split=arg.split("=");
111 String a=split[0].toLowerCase();
112 String b=split.length>1 ? split[1] : null;
113 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
114
115 // outstream.println(arg+", "+a+", "+b);
116 if(PGMTools.parseStatic(arg, a, b)){
117 //do nothing
118 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){
119 assert(b!=null);
120 Tools.addFiles(b, fnaList);
121 }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){
122 assert(b!=null);
123 Tools.addFiles(b, gffList);
124 }else if(a.equals("verbose")){
125 verbose=Parse.parseBoolean(b);
126 ReadWrite.verbose=verbose;
127 }else if(a.equals("alignribo") || a.equals("align")){
128 alignRibo=Parse.parseBoolean(b);
129 }else if(a.equals("adjustendpoints")){
130 adjustEndpoints=Parse.parseBoolean(b);
131 }else if(a.equalsIgnoreCase("slop16s") || a.equalsIgnoreCase("16sslop") || a.equalsIgnoreCase("ssuslop")){
132 ssuSlop=Integer.parseInt(b);
133 }else if(a.equalsIgnoreCase("slop23s") || a.equalsIgnoreCase("23sslop") || a.equalsIgnoreCase("lsuslop")){
134 lsuSlop=Integer.parseInt(b);
135 }else if(a.equalsIgnoreCase("maxns") || a.equalsIgnoreCase("maxundefined")){
136 maxNs=Integer.parseInt(b);
137 }else if(a.equalsIgnoreCase("maxnrate") || a.equalsIgnoreCase("maxnfraction")){
138 maxNFraction=Integer.parseInt(b);
139 }else if(a.equals("invert")){
140 invert=Parse.parseBoolean(b);
141 }else if(a.equals("type") || a.equals("types")){
142 types=b;
143 }else if(a.equals("attributes") || a.equals("requiredattributes")){
144 requiredAttributes=b.split(",");
145 }else if(a.equals("banattributes") || a.equals("bannedattributes")){
146 bannedAttributes=b.split(",");
147 }else if(a.equals("banpartial")){
148 banPartial=Parse.parseBoolean(b);
149 }
150
151 else if(a.equalsIgnoreCase("renameByTaxID")){
152 renameByTaxID=Parse.parseBoolean(b);
153 }else if(a.equals("taxmode")){
154 if("accession".equalsIgnoreCase(b)){
155 taxMode=ACCESSION_MODE;
156 }else if("header".equalsIgnoreCase(b)){
157 taxMode=HEADER_MODE;
158 }else if("gi".equalsIgnoreCase(b)){
159 taxMode=GI_MODE;
160 }else if("taxid".equalsIgnoreCase(b)){
161 taxMode=TAXID_MODE;
162 }else{
163 assert(false) : "Bad tax mode: "+b;
164 }
165 }else if(a.equals("requirepresent")){
166 requirePresent=Parse.parseBoolean(b);
167 }else if(a.equalsIgnoreCase("onePerFile")){
168 onePerFile=Parse.parseBoolean(b);
169 }else if(a.equalsIgnoreCase("pickBest") || a.equalsIgnoreCase("findBest") || a.equalsIgnoreCase("keepBest")){
170 pickBest=Parse.parseBoolean(b);
171 }
172
173 else if(a.equals("minlen")){
174 minLen=Integer.parseInt(b);
175 }else if(a.equals("maxlen")){
176 maxLen=Integer.parseInt(b);
177 }
178
179 else if(ProkObject.parse(arg, a, b)){
180 //do nothing
181 }else if(parser.parse(arg, a, b)){
182 //do nothing
183 }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){
184 fnaList.add(arg);
185 }else{
186 outstream.println("Unknown parameter "+args[i]);
187 assert(false) : "Unknown parameter "+args[i];
188 // throw new RuntimeException("Unknown parameter "+args[i]);
189 }
190 }
191
192 ArrayList<String> banned=new ArrayList<String>();
193 if(banPartial){banned.add("partial=true");}
194 if(bannedAttributes!=null){
195 for(String s : bannedAttributes){banned.add(s);}
196 }
197 bannedAttributes=banned.isEmpty() ? null : banned.toArray(new String[0]);
198
199 if(gffList.isEmpty()){
200 for(String s : fnaList){
201 String prefix=ReadWrite.stripExtension(s);
202 String gff=prefix+".gff";
203 File f=new File(gff);
204 if(!f.exists()){
205 String gz=gff+".gz";
206 f=new File(gz);
207 assert(f.exists() && f.canRead()) : "Can't read file "+gff;
208 gff=gz;
209 }
210 gffList.add(gff);
211 }
212 }
213 assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size();
214 return parser;
215 }
216
217 /** Add or remove .gz or .bz2 as needed */
218 private void fixExtensions(){
219 fnaList=Tools.fixExtension(fnaList);
220 gffList=Tools.fixExtension(gffList);
221 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
222 }
223
224 /** Ensure files can be read and written */
225 private void checkFileExistence(){
226 //Ensure output files can be written
227 if(!Tools.testOutputFiles(overwrite, append, false, out)){
228 outstream.println((out==null)+", "+out);
229 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n");
230 }
231
232 //Ensure input files can be read
233 ArrayList<String> foo=new ArrayList<String>();
234 foo.addAll(fnaList);
235 foo.addAll(gffList);
236 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){
237 throw new RuntimeException("\nCan't read some input files.\n");
238 }
239
240 //Ensure that no file was specified multiple times
241 foo.add(out);
242 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){
243 throw new RuntimeException("\nSome file names were specified multiple times.\n");
244 }
245 }
246
247 /** Adjust file-related static fields as needed for this program */
248 private static void checkStatics(){
249 //Adjust the number of threads for input file reading
250 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
251 ByteFile.FORCE_MODE_BF2=true;
252 }
253 }
254
255 /*--------------------------------------------------------------*/
256 /*---------------- Actual Code ----------------*/
257 /*--------------------------------------------------------------*/
258
259
260
261 /*--------------------------------------------------------------*/
262
263 public void process(Timer t){
264 if(Shared.threads()>2 && fnaList.size()>1){
265 processMT(t);
266 }else{
267 processST(t);
268 }
269 }
270
271 public void processST(Timer t){
272 ByteStreamWriter bsw=(ffout==null ? null : new ByteStreamWriter(ffout));
273 if(bsw!=null){bsw.start();}
274
275 for(int i=0; i<fnaList.size(); i++){
276 processFileST(fnaList.get(i), gffList.get(i), types, bsw);
277 }
278
279 if(bsw!=null){bsw.poisonAndWait();}
280 t.stop();
281 if(ffout!=null){outstream.println("Wrote "+out);}
282 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
283 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
284 if(alignRibo){
285 outstream.println(Tools.number("Flipped: ", flipped.get(), 8));
286 outstream.println(Tools.number("Failed Alignment: ", failed.get(), 8));
287 }
288 }
289
290 public void processMT(Timer t){
291
292 //Optionally create a read output stream
293 final ConcurrentReadOutputStream ros=makeCros();
294
295 //Reset counters
296 readsProcessed=readsOut=0;
297 basesProcessed=basesOut=0;
298
299 //Process the reads in separate threads
300 spawnThreads(ros);
301
302 if(verbose){outstream.println("Finished; closing streams.");}
303
304 //Write anything that was accumulated by ReadStats
305 errorState|=ReadStats.writeAll();
306 //Close the read streams
307 errorState|=ReadWrite.closeStream(ros);
308
309 //Report timing and results
310 t.stop();
311 if(ffout!=null){outstream.println("Wrote "+out);}
312 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
313 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
314 if(alignRibo){
315 outstream.println(Tools.number("Flipped: ", flipped.get(), 8));
316 outstream.println(Tools.number("Failed Alignment: ", failed.get(), 8));
317 }
318
319 //Throw an exception of there was an error in a thread
320 if(errorState){
321 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
322 }
323 }
324
325 /*--------------------------------------------------------------*/
326
327 private boolean hasAttributes(GffLine gline){
328 int len=gline.length();
329 if(len<minLen || len>maxLen){return false;}
330 if(hasAttributes(gline, bannedAttributes)){return false;}
331 return requiredAttributes==null || hasAttributes(gline, requiredAttributes);
332 }
333
334 private static boolean hasAttributes(GffLine gline, String[] attributes){
335 if(attributes==null){return false;}
336 for(String s : attributes){
337 if(gline.attributes.contains(s)){
338 return true;
339 }
340 }
341 return false;
342 }
343
344 private void processFileST(String fna, String gff, String types, ByteStreamWriter bsw){
345 ArrayList<Read> reads=processFile(fna, gff, types);
346 if(reads!=null){
347 for(Read r : reads){
348 if(bsw!=null){bsw.println(r);}
349 }
350 }
351 }
352
353 private ArrayList<Read> processFile(String fna, String gff, String types){
354 ArrayList<GffLine> lines=GffLine.loadGffFile(gff, types, false);
355
356 ArrayList<Read> list=ReadInputStream.toReads(fna, FileFormat.FA, -1);
357 HashMap<String, Read> map=new HashMap<String, Read>();
358
359 for(Read r : list){
360 readsProcessed++;
361 basesProcessed+=r.length();
362 map.put(r.id, r);
363 }
364
365 if(renameByTaxID){//Note this must be AFTER adding to the hashmap.
366 renameByTaxID(list);
367 }
368
369 ArrayList<Read> outList=processLines(lines, map, invert);
370
371 if(invert){
372 for(Read r : list){
373 readsOut++;
374 basesOut+=r.length();
375 }
376 return list;
377 }else{
378 if(outList!=null){
379 for(Read r : outList){
380 readsOut++;
381 basesOut+=r.length();
382 }
383 }
384 return outList;
385 }
386 }
387
388 private void renameByTaxID(ArrayList<Read> list){
389 ByteBuilder bb=new ByteBuilder();
390 for(Read r : list){
391 if(bb.length>0){bb.comma();}
392 bb.append(taxMode==HEADER_MODE ? r.id : taxMode==ACCESSION_MODE ? parseAccession(r.id) : parseGi(r.id));
393 }
394 final int[] ids;
395 if(taxMode==ACCESSION_MODE){
396 ids=TaxClient.accessionToTaxidArray(bb.toString());
397 }else if(taxMode==GI_MODE){
398 ids=TaxClient.giToTaxidArray(bb.toString());
399 }else if(taxMode==HEADER_MODE){
400 ids=TaxClient.headerToTaxidArray(bb.toString());
401 }else if(taxMode==TAXID_MODE){
402 ids=parseTaxIds(list);
403 }else{
404 throw new RuntimeException("Bad mode: "+TAXID_MODE);
405 }
406 assert(ids.length==list.size()) : ids.length+", "+list.size();
407 for(int i=0; i<ids.length; i++){
408 Read r=list.get(i);
409 int id=ids[i];
410
411 if(r.id!=null && r.id.startsWith("tid|")){
412 id=TaxTree.parseHeaderStatic(r.id);
413 r.obj=id;
414 }else {
415 assert(id>=0 || !requirePresent) : "Can't find taxID for header: "+id+", "+r.name();
416
417 r.obj=id;
418 r.id="tid|"+id+"|"+id;
419 }
420 }
421 }
422
423 private int[] parseTaxIds(ArrayList<Read> list){
424 int[] ids=new int[list.size()];
425 for(int i=0; i<list.size(); i++){
426 Read r=list.get(i);
427 int x=GiToTaxid.parseTaxidNumber(r.id, '|');
428 ids[i]=x;
429 }
430 return ids;
431 }
432
433 private String parseAccession(String id){
434 int dot=id.indexOf('.');
435 int space=id.indexOf(' ');
436 // assert(dot>0 && space>0) : "Unexpected header format: "+id+"\nTry header mode instead of accession mode.";
437 int limit=Tools.min((dot<0 ? id.length() : dot), (space<0 ? id.length() : space));
438 return id.substring(0, limit);
439 }
440
441 private String parseGi(String id){
442 assert(id.startsWith("gi|"));
443 String[] split=id.split("|");
444 return split[1];
445 }
446
447 private ArrayList<Read> processLines(ArrayList<GffLine> lines, HashMap<String, Read> map, boolean invertSelection){
448 ArrayList<Read> list=null;
449 for(GffLine gline : lines){
450 if(hasAttributes(gline)){
451 Read scaf=map.get(gline.seqid);
452 assert(scaf!=null) : "Can't find "+gline.seqid+" in "+map.keySet();
453
454 boolean pass=true;
455 Float identity=null;
456 if(alignRibo && gline.inbounds(scaf.length())){
457 int type=gline.prokType();
458 identity=align(gline, scaf.bases, type);
459 if(identity==null){pass=false;}
460 }
461
462 if(pass){
463 final int start=gline.start-1;
464 final int stop=gline.stop-1;
465
466 if(invertSelection){
467 byte[] bases=scaf.bases;
468 for(int i=start; i<=stop; i++){
469 if(i>=0 && i<bases.length){
470 bases[i]='N';
471 }
472 }
473 }else{
474 if(start>=0 && stop<scaf.length()){
475 String id=gline.attributes;
476 if(renameByTaxID){
477 id="tid|"+scaf.obj+"|"+id;
478 }
479 Read r=new Read(Arrays.copyOfRange(scaf.bases, start, stop+1), null, id, 1);
480 r.obj=identity;
481
482 assert(!r.containsLowercase()) : r.toFasta()+"\n"
483 + "validated="+r.validated()+", scaf.validated="+scaf.validated()+", tuc="+Read.TO_UPPER_CASE+", vic="+Read.VALIDATE_IN_CONSTRUCTOR;
484 if(maxNs>=0 || maxNFraction>=0){
485 long allowed=Tools.min(maxNs>=0 ? maxNs : r.length(), (long)(r.length()*(maxNFraction>=0 ? maxNFraction : 1)));
486 if(r.countUndefined()>allowed){r=null;}
487 }
488
489 if(r!=null){
490 if(gline.strand==1){r.reverseComplement();}
491 if(list==null){list=new ArrayList<Read>(8);}
492 list.add(r);
493 if(onePerFile && !pickBest){break;}
494 }
495 }
496 }
497 }
498 }
499 }
500 if(pickBest && list!=null && list.size()>1){
501 Read best=null;
502 float bestID=0;
503 for(Read r : list){
504 float identity=(r.obj==null ? 0.001f : (Float)r.obj);
505 if(identity>bestID){
506 bestID=identity;
507 best=r;
508 }
509 }
510 assert(best!=null);
511 list.clear();
512 list.add(best);
513 }
514 return list;
515 }
516
517 private Float align(GffLine gline, byte[] scaf, int type){
518 Read[] consensusReads=ProkObject.consensusReads(type);
519 if(consensusReads==null || consensusReads.length==0){
520 assert(false) : type+"\n"+gline.toString();
521 return null;
522 }
523 byte[] universal=consensusReads[0].bases;
524 float minIdentity=ProkObject.minID(type)*ID_MULT;
525 if(universal==null){assert(false); return 1F;}
526
527 int start=gline.start-1;
528 int stop=gline.stop-1;
529 assert(start<=stop) : start+", "+stop+", "+scaf.length;
530 assert(start>=0 && start<scaf.length) : start+", "+stop+", "+scaf.length;
531 assert(stop>=0 && stop<scaf.length) : start+", "+stop+", "+scaf.length;
532 // final int a=Tools.max(0, start-(adjustEndpoints ? 100 : 20));
533 // final int b=Tools.min(scaf.length-1, stop+(adjustEndpoints ? 100 : 20));
534 final int a=Tools.max(0, start);
535 final int b=Tools.min(scaf.length-1, stop);
536
537 byte[] ref=Arrays.copyOfRange(scaf, a, b+1);
538 Read r=new Read(ref, null, 0);
539 if(gline.strand==GffLine.MINUS){r.reverseComplement();}
540
541 Alignment plus=new Alignment(r);
542 plus.align(universal);
543 // if(plus.id>=minIdentity){return true;}
544
545 r.reverseComplement();
546 Alignment minus=new Alignment(r);
547 minus.align(universal);
548
549 Alignment best=null;
550 if(plus.id>=minus.id){
551 best=plus;
552 // System.err.println("Kept: "+plus.id+" \t"+minus.id);
553 }else{
554 best=minus;
555 if(minus.id>=minIdentity){
556 if(verbose) {System.err.println("Flipped: "+plus.id+" \t"+minus.id+"");}
557 flipped.incrementAndGet();
558 gline.strand=Shared.MINUS;
559 }
560 }
561 if(best.id>=minIdentity){
562 return best.id;
563 }else{
564 if(verbose) {System.err.println("Failed alignment: "+plus.id+" \t"+minus.id);}
565 failed.incrementAndGet();
566 return null;
567 }
568 //
569 // int[] coords=KillSwitch.allocInt1D(2);
570 // float id1=align(universal, scaf, a, b, minIdentity, coords);
571 // final int rstart=coords[0], rstop=coords[1];
572 // // assert(false) : rstart+", "+rstop+", "+(rstop-rstart+1)+", "+start+", "+stop;
573 // if(id1<minIdentity){
574 // System.err.println("Low identity: "+String.format("%.2s", 100*id1));
575 // return false;
576 // }
577 // if(adjustEndpoints){
578 // int slop=(flag==4 ? AnalyzeGenes.lsuSlop : AnalyzeGenes.ssuSlop);
579 // if(Tools.absdif(start, rstart)>slop){
580 // // System.err.println("rstart:\t"+start+" -> "+rstart);
581 // start=rstart;
582 // }
583 // if(Tools.absdif(stop, rstop)>slop){
584 // // System.err.println("rstop: \t"+stop+" -> "+rstop);
585 // stop=rstop;
586 // }
587 // }
588 }
589
590 /*--------------------------------------------------------------*/
591 /*---------------- Thread Management ----------------*/
592 /*--------------------------------------------------------------*/
593
594 private ConcurrentReadOutputStream makeCros(){
595 if(ffout==null){return null;}
596
597 //Select output buffer size based on whether it needs to be ordered
598 final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
599
600 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout, null, buff, null, false);
601 ros.start(); //Start the stream
602 return ros;
603 }
604
605 /** Spawn process threads */
606 private void spawnThreads(ConcurrentReadOutputStream ros){
607 //Do anything necessary prior to processing
608
609 //Determine how many threads may be used
610 final int threads=Tools.min(Shared.threads(), fnaList.size());
611
612 //Controls access to input files
613 AtomicInteger atom=new AtomicInteger(0);
614
615 //Fill a list with ProcessThreads
616 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
617 for(int i=0; i<threads; i++){
618 alpt.add(new ProcessThread(atom, ros, i));
619 }
620
621 //Start the threads and wait for them to finish
622 boolean success=ThreadWaiter.startAndWait(alpt, this);
623 errorState&=!success;
624
625 //Do anything necessary after processing
626
627 }
628
629 @Override
630 public final void accumulate(ProcessThread pt){
631 readsProcessed+=pt.readsProcessedT;
632 basesProcessed+=pt.basesProcessedT;
633 readsOut+=pt.readsOutT;
634 basesOut+=pt.basesOutT;
635 errorState|=(!pt.success);
636 }
637
638 @Override
639 public final boolean success(){return !errorState;}
640
641 /*--------------------------------------------------------------*/
642 /*---------------- Inner Classes ----------------*/
643 /*--------------------------------------------------------------*/
644
645 /** This class is static to prevent accidental writing to shared variables.
646 * It is safe to remove the static modifier. */
647 class ProcessThread extends Thread {
648
649 //Constructor
650 ProcessThread(final AtomicInteger atom_, ConcurrentReadOutputStream ros_, final int tid_){
651 atom=atom_;
652 ros=ros_;
653 tid=tid_;
654 }
655
656 //Called by start()
657 @Override
658 public void run(){
659 //Do anything necessary prior to processing
660
661 for(int fnum=atom.getAndIncrement(), lim=fnaList.size(); fnum<lim; fnum=atom.getAndIncrement()) {
662 String fna=fnaList.get(fnum);
663 String gff=gffList.get(fnum);
664 //Process the reads
665 ArrayList<Read> list=processFileT(fna, gff, types);
666 if(ros!=null){
667 if(list==null){list=dummy;}
668 ros.add(list, fnum);
669 }
670 }
671
672 //Do anything necessary after processing
673
674 //Indicate successful exit status
675 success=true;
676 }
677
678 //Duplicated
679 private ArrayList<Read> processFileT(String fna, String gff, String types){
680 ArrayList<GffLine> lines=GffLine.loadGffFile(gff, types, false);
681
682 ArrayList<Read> list=ReadInputStream.toReads(fna, FileFormat.FA, -1);
683 HashMap<String, Read> map=new HashMap<String, Read>();
684
685 for(Read r : list){
686 readsProcessedT++;
687 basesProcessedT+=r.length();
688 map.put(r.id, r);
689 }
690
691 if(renameByTaxID){//Note this must be AFTER adding to the hashmap.
692 renameByTaxID(list);
693 }
694 // assert(false) : renameByTaxID+", "+list.size();
695
696 ArrayList<Read> outList=processLines(lines, map, invert);
697
698 if(invert){
699 for(Read r : list){
700 readsOutT++;
701 basesOutT+=r.length();
702 }
703 return list;
704 }else{
705 if(outList!=null){
706 for(Read r : outList){
707 readsOutT++;
708 basesOutT+=r.length();
709 }
710 }
711 return outList;
712 }
713 }
714
715 /** Number of reads processed by this thread */
716 protected long readsProcessedT=0;
717 /** Number of bases processed by this thread */
718 protected long basesProcessedT=0;
719
720 /** Number of reads retained by this thread */
721 protected long readsOutT=0;
722 /** Number of bases retained by this thread */
723 protected long basesOutT=0;
724
725 /** True only if this thread has completed successfully */
726 boolean success=false;
727
728 /** Shared output stream */
729 private final ConcurrentReadOutputStream ros;
730 /** Thread ID */
731 final int tid;
732 /** Next file ID */
733 final AtomicInteger atom;
734 }
735
736 /*--------------------------------------------------------------*/
737 /*---------------- Fields ----------------*/
738 /*--------------------------------------------------------------*/
739
740 private ArrayList<String> fnaList=new ArrayList<String>();
741 private ArrayList<String> gffList=new ArrayList<String>();
742 private String out=null;
743 private String types="CDS";
744 private boolean invert=false;
745 private boolean banPartial=true;
746 private int minLen=1;
747 private int maxLen=Integer.MAX_VALUE;
748
749 private String[] requiredAttributes;
750 private String[] bannedAttributes;
751
752 /*--------------------------------------------------------------*/
753
754 private long bytesOut=0;
755 private boolean renameByTaxID=false;
756 private int taxMode=ACCESSION_MODE;
757 private boolean requirePresent=false;
758 private boolean alignRibo=false;
759 private boolean adjustEndpoints=false;
760 private boolean onePerFile=false;
761 private boolean pickBest=false;
762 private int ssuSlop=999;
763 private int lsuSlop=999;
764
765 private float ID_MULT=0.96f;
766
767 private int maxNs=-1;
768 private double maxNFraction=-1;
769
770 private static int ACCESSION_MODE=0, GI_MODE=1, HEADER_MODE=2, TAXID_MODE=3;
771
772 /*--------------------------------------------------------------*/
773
774 /** Number of reads processed */
775 protected long readsProcessed=0;
776 /** Number of bases processed */
777 protected long basesProcessed=0;
778
779 /** Number of reads retained */
780 protected long readsOut=0;
781 /** Number of bases retained */
782 protected long basesOut=0;
783
784 protected AtomicLong flipped=new AtomicLong(0);
785 protected AtomicLong failed=new AtomicLong(0);
786
787 /** Quit after processing this many input reads; -1 means no limit */
788 private long maxReads=-1;
789
790 /*--------------------------------------------------------------*/
791 /*---------------- Final Fields ----------------*/
792 /*--------------------------------------------------------------*/
793
794 private final FileFormat ffout;
795 private final ArrayList<Read> dummy=new ArrayList<Read>();
796
797 /*--------------------------------------------------------------*/
798 /*---------------- Common Fields ----------------*/
799 /*--------------------------------------------------------------*/
800
801 private PrintStream outstream=System.err;
802 public static boolean verbose=false;
803 public boolean errorState=false;
804 public boolean ordered=true;
805 private boolean overwrite=true;
806 private boolean append=false;
807
808 }