annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/PivotSet.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package clump;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6
jpayne@68 7 import bloom.KCountArray;
jpayne@68 8 import bloom.ReadCounter;
jpayne@68 9 import fileIO.ByteFile;
jpayne@68 10 import fileIO.FileFormat;
jpayne@68 11 import fileIO.ReadWrite;
jpayne@68 12 import jgi.BBMerge;
jpayne@68 13 import shared.Parse;
jpayne@68 14 import shared.Parser;
jpayne@68 15 import shared.PreParser;
jpayne@68 16 import shared.ReadStats;
jpayne@68 17 import shared.Shared;
jpayne@68 18 import shared.Timer;
jpayne@68 19 import shared.Tools;
jpayne@68 20 import stream.ConcurrentReadInputStream;
jpayne@68 21 import stream.FASTQ;
jpayne@68 22 import stream.FastaReadInputStream;
jpayne@68 23 import stream.Read;
jpayne@68 24 import structures.ListNum;
jpayne@68 25
jpayne@68 26 /**
jpayne@68 27 * Reduces reads to their feature kmer.
jpayne@68 28 * @author Brian Bushnell
jpayne@68 29 * @date August 19, 2016
jpayne@68 30 *
jpayne@68 31 */
jpayne@68 32 public class PivotSet {
jpayne@68 33
jpayne@68 34 /*--------------------------------------------------------------*/
jpayne@68 35 /*---------------- Static Methods ----------------*/
jpayne@68 36 /*--------------------------------------------------------------*/
jpayne@68 37
jpayne@68 38 /**
jpayne@68 39 * Code entrance from the command line.
jpayne@68 40 * @param args Command line arguments
jpayne@68 41 */
jpayne@68 42 public static void main(String[] args){
jpayne@68 43 makeSet(args);
jpayne@68 44 }
jpayne@68 45
jpayne@68 46 public static KCountArray makeSet(String[] args){
jpayne@68 47 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ;
jpayne@68 48 Timer t=new Timer();
jpayne@68 49 PivotSet x=new PivotSet(args);
jpayne@68 50 KCountArray kca=x.process(t, false);
jpayne@68 51 ReadWrite.USE_PIGZ=pigz;
jpayne@68 52 ReadWrite.USE_UNPIGZ=unpigz;
jpayne@68 53
jpayne@68 54 //Close the print stream if it was redirected
jpayne@68 55 Shared.closeStream(x.outstream);
jpayne@68 56
jpayne@68 57 return kca;
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 /*--------------------------------------------------------------*/
jpayne@68 61 /*---------------- Initialization ----------------*/
jpayne@68 62 /*--------------------------------------------------------------*/
jpayne@68 63
jpayne@68 64 /**
jpayne@68 65 * Constructor.
jpayne@68 66 * @param args Command line arguments
jpayne@68 67 */
jpayne@68 68 public PivotSet(String[] args){
jpayne@68 69
jpayne@68 70 {//Preparse block for help, config files, and outstream
jpayne@68 71 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 72 args=pp.args;
jpayne@68 73 outstream=pp.outstream;
jpayne@68 74 }
jpayne@68 75
jpayne@68 76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 77 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 78
jpayne@68 79 Parser parser=new Parser();
jpayne@68 80 for(int i=0; i<args.length; i++){
jpayne@68 81 String arg=args[i];
jpayne@68 82 String[] split=arg.split("=");
jpayne@68 83 String a=split[0].toLowerCase();
jpayne@68 84 String b=split.length>1 ? split[1] : null;
jpayne@68 85
jpayne@68 86 if(parser.parse(arg, a, b)){
jpayne@68 87 //do nothing
jpayne@68 88 }else if(a.equals("verbose")){
jpayne@68 89 verbose=KmerComparator.verbose=Parse.parseBoolean(b);
jpayne@68 90 }else if(a.equals("parse_flag_goes_here")){
jpayne@68 91 //Set a variable here
jpayne@68 92 }else if(a.equals("k")){
jpayne@68 93 k=Integer.parseInt(b);
jpayne@68 94 assert(k>0 && k<32);
jpayne@68 95 }else if(a.equals("ecco")){
jpayne@68 96 ecco=Parse.parseBoolean(b);
jpayne@68 97 }else if(a.equals("rename") || a.equals("addname")){
jpayne@68 98 //do nothing
jpayne@68 99 }else if(a.equals("rcomp") || a.equals("reversecomplement")){
jpayne@68 100 //do nothing
jpayne@68 101 }else if(a.equals("condense") || a.equals("consensus")){
jpayne@68 102 //do nothing
jpayne@68 103 }else if(a.equals("mincount") || a.equals("consensus")){
jpayne@68 104 minCount=Integer.parseInt(b);
jpayne@68 105 }else if(a.equals("correct") || a.equals("ecc")){
jpayne@68 106 //do nothing
jpayne@68 107 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
jpayne@68 108 //do nothing
jpayne@68 109 }else if(a.equals("seed")){
jpayne@68 110 KmerComparator.defaultSeed=Long.parseLong(b);
jpayne@68 111 }else if(a.equals("hashes")){
jpayne@68 112 KmerComparator.setHashes(Integer.parseInt(b));
jpayne@68 113 }else{
jpayne@68 114 outstream.println("Unknown parameter "+args[i]);
jpayne@68 115 assert(false) : "Unknown parameter "+args[i];
jpayne@68 116 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 117 }
jpayne@68 118 }
jpayne@68 119
jpayne@68 120 {//Process parser fields
jpayne@68 121 Parser.processQuality();
jpayne@68 122
jpayne@68 123 maxReads=parser.maxReads;
jpayne@68 124
jpayne@68 125 in1=parser.in1;
jpayne@68 126 in2=parser.in2;
jpayne@68 127
jpayne@68 128 extin=parser.extin;
jpayne@68 129 }
jpayne@68 130
jpayne@68 131 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
jpayne@68 132 in2=in1.replace("#", "2");
jpayne@68 133 in1=in1.replace("#", "1");
jpayne@68 134 }
jpayne@68 135 if(in2!=null){
jpayne@68 136 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");}
jpayne@68 137 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 assert(FastaReadInputStream.settingsOK());
jpayne@68 141
jpayne@68 142 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 143 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 144 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 145 }
jpayne@68 146
jpayne@68 147 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
jpayne@68 148 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true);
jpayne@68 149 }
jpayne@68 150
jpayne@68 151
jpayne@68 152 /*--------------------------------------------------------------*/
jpayne@68 153 /*---------------- Outer Methods ----------------*/
jpayne@68 154 /*--------------------------------------------------------------*/
jpayne@68 155
jpayne@68 156 private static long getCells(double fraction, int cbits){
jpayne@68 157 final long memory=Runtime.getRuntime().maxMemory();
jpayne@68 158 final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45);
jpayne@68 159 final double filterMem=usable*fraction;
jpayne@68 160 return (long)((filterMem*8)/cbits);
jpayne@68 161 }
jpayne@68 162
jpayne@68 163 /** Create read streams and process all data */
jpayne@68 164 public KCountArray process(Timer t, boolean amino){
jpayne@68 165 int cbits=2;
jpayne@68 166 while((1L<<cbits)<=minCount){cbits*=2;}
jpayne@68 167 int filterHashes=2;
jpayne@68 168 float fraction=0.1f;
jpayne@68 169 long cells=getCells(fraction, cbits);
jpayne@68 170 ReadCounter rc=new ReadCounter(k, true, ecco, false, amino);
jpayne@68 171 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, maxReads, 1, 1, 1, 1, null, 0);
jpayne@68 172
jpayne@68 173 final ConcurrentReadInputStream cris;
jpayne@68 174 {
jpayne@68 175 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null);
jpayne@68 176 cris.start();
jpayne@68 177 if(verbose){outstream.println("Started cris");}
jpayne@68 178 }
jpayne@68 179
jpayne@68 180 readsProcessed=0;
jpayne@68 181 basesProcessed=0;
jpayne@68 182
jpayne@68 183 //Process the read stream
jpayne@68 184 processInner(cris, kca);
jpayne@68 185
jpayne@68 186 if(verbose){outstream.println("Finished; closing streams.");}
jpayne@68 187
jpayne@68 188 errorState|=ReadStats.writeAll();
jpayne@68 189 errorState|=ReadWrite.closeStreams(cris);
jpayne@68 190
jpayne@68 191 t.stop();
jpayne@68 192
jpayne@68 193 outstream.println("Made filter: \t"+kca.toShortString(filterHashes));
jpayne@68 194 outstream.println("Estimated pivots: \t"+(long)kca.estimateUniqueKmers(filterHashes));
jpayne@68 195 outstream.println("Estimated pivots >1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount));
jpayne@68 196
jpayne@68 197 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
jpayne@68 198
jpayne@68 199 if(errorState){
jpayne@68 200 Clumpify.sharedErrorState=true;
jpayne@68 201 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 202 }
jpayne@68 203 return kca;
jpayne@68 204 }
jpayne@68 205
jpayne@68 206 /** Manage threads */
jpayne@68 207 public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){
jpayne@68 208
jpayne@68 209 KmerComparator kc=new KmerComparator(k, false, false);
jpayne@68 210 int cbits=2;
jpayne@68 211 while((1L<<cbits)<=minCount){cbits*=2;}
jpayne@68 212 int filterHashes=2;
jpayne@68 213 float fraction=0.1f;
jpayne@68 214 long cells=getCells(fraction, cbits);
jpayne@68 215 ReadCounter rc=new ReadCounter(k, true, false, false, amino);
jpayne@68 216 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, -1, 1, 1, 1, 1, null, 0);
jpayne@68 217
jpayne@68 218 if(verbose){System.err.println("Making hash threads.");}
jpayne@68 219 final int threads=Shared.threads();
jpayne@68 220 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
jpayne@68 221 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, false));}
jpayne@68 222
jpayne@68 223 if(verbose){System.err.println("Starting threads.");}
jpayne@68 224 for(HashThread ht : alht){ht.start();}
jpayne@68 225
jpayne@68 226 if(verbose){System.err.println("Waiting for threads.");}
jpayne@68 227 /* Wait for threads to die */
jpayne@68 228 for(HashThread ht : alht){
jpayne@68 229
jpayne@68 230 /* Wait for a thread to die */
jpayne@68 231 while(ht.getState()!=Thread.State.TERMINATED){
jpayne@68 232 try {
jpayne@68 233 ht.join();
jpayne@68 234 } catch (InterruptedException e) {
jpayne@68 235 e.printStackTrace();
jpayne@68 236 }
jpayne@68 237 }
jpayne@68 238 }
jpayne@68 239 kca.shutdown();
jpayne@68 240 return kca;
jpayne@68 241 }
jpayne@68 242
jpayne@68 243 /** Manage threads */
jpayne@68 244 public void processInner(final ConcurrentReadInputStream cris, KCountArray kca){
jpayne@68 245 if(verbose){outstream.println("Making comparator.");}
jpayne@68 246 KmerComparator kc=new KmerComparator(k, false, false);
jpayne@68 247
jpayne@68 248 if(verbose){outstream.println("Making hash threads.");}
jpayne@68 249 final int threads=Shared.threads();
jpayne@68 250 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
jpayne@68 251 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, ecco));}
jpayne@68 252
jpayne@68 253 if(verbose){outstream.println("Starting threads.");}
jpayne@68 254 for(HashThread ht : alht){ht.start();}
jpayne@68 255
jpayne@68 256 if(verbose){outstream.println("Waiting for threads.");}
jpayne@68 257 /* Wait for threads to die */
jpayne@68 258 for(HashThread ht : alht){
jpayne@68 259
jpayne@68 260 /* Wait for a thread to die */
jpayne@68 261 while(ht.getState()!=Thread.State.TERMINATED){
jpayne@68 262 try {
jpayne@68 263 ht.join();
jpayne@68 264 } catch (InterruptedException e) {
jpayne@68 265 e.printStackTrace();
jpayne@68 266 }
jpayne@68 267 }
jpayne@68 268 readsProcessed+=ht.readsProcessedT;
jpayne@68 269 basesProcessed+=ht.basesProcessedT;
jpayne@68 270 }
jpayne@68 271 kca.shutdown();
jpayne@68 272 }
jpayne@68 273
jpayne@68 274 /*--------------------------------------------------------------*/
jpayne@68 275 /*---------------- Inner Classes ----------------*/
jpayne@68 276 /*--------------------------------------------------------------*/
jpayne@68 277
jpayne@68 278 private static class HashThread extends Thread{
jpayne@68 279
jpayne@68 280 HashThread(ConcurrentReadInputStream cris_, KmerComparator kc_, KCountArray kca_, boolean ecco_){
jpayne@68 281 cris=cris_;
jpayne@68 282 kc=kc_;
jpayne@68 283 kca=kca_;
jpayne@68 284 ecco=ecco_;
jpayne@68 285 }
jpayne@68 286
jpayne@68 287 @Override
jpayne@68 288 public void run(){
jpayne@68 289
jpayne@68 290 ListNum<Read> ln=cris.nextList();
jpayne@68 291 ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 292
jpayne@68 293 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
jpayne@68 294 for(Read r1 : reads){
jpayne@68 295 Read r2=r1.mate;
jpayne@68 296 readsProcessedT+=r1.pairCount();
jpayne@68 297 basesProcessedT+=r1.pairLength();
jpayne@68 298 if(ecco && r2!=null){
jpayne@68 299 if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);}
jpayne@68 300 }
jpayne@68 301 {
jpayne@68 302 final long kmer=kc.hash(r1, null, 0, false);
jpayne@68 303 if(kmer>=0){
jpayne@68 304 kca.increment(kmer);
jpayne@68 305 }
jpayne@68 306 }
jpayne@68 307 if(r2!=null){
jpayne@68 308 final long kmer=kc.hash(r2, null, 0, false);
jpayne@68 309 if(kmer>=0){
jpayne@68 310 kca.increment(kmer);
jpayne@68 311 }
jpayne@68 312 }
jpayne@68 313 }
jpayne@68 314 cris.returnList(ln);
jpayne@68 315 ln=cris.nextList();
jpayne@68 316 reads=(ln!=null ? ln.list : null);
jpayne@68 317 }
jpayne@68 318 if(ln!=null){
jpayne@68 319 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
jpayne@68 320 }
jpayne@68 321 }
jpayne@68 322
jpayne@68 323 final ConcurrentReadInputStream cris;
jpayne@68 324 final KmerComparator kc;
jpayne@68 325 final KCountArray kca;
jpayne@68 326 final boolean ecco;
jpayne@68 327
jpayne@68 328 protected long readsProcessedT=0;
jpayne@68 329 protected long basesProcessedT=0;
jpayne@68 330 }
jpayne@68 331
jpayne@68 332 /*--------------------------------------------------------------*/
jpayne@68 333 /*---------------- Inner Methods ----------------*/
jpayne@68 334 /*--------------------------------------------------------------*/
jpayne@68 335
jpayne@68 336 /*--------------------------------------------------------------*/
jpayne@68 337 /*---------------- Fields ----------------*/
jpayne@68 338 /*--------------------------------------------------------------*/
jpayne@68 339
jpayne@68 340 private int k=31;
jpayne@68 341 private int minCount=2;
jpayne@68 342
jpayne@68 343 /*--------------------------------------------------------------*/
jpayne@68 344 /*---------------- I/O Fields ----------------*/
jpayne@68 345 /*--------------------------------------------------------------*/
jpayne@68 346
jpayne@68 347 private String in1=null;
jpayne@68 348 private String in2=null;
jpayne@68 349
jpayne@68 350 private String extin=null;
jpayne@68 351
jpayne@68 352 /*--------------------------------------------------------------*/
jpayne@68 353
jpayne@68 354 protected long readsProcessed=0;
jpayne@68 355 protected long basesProcessed=0;
jpayne@68 356
jpayne@68 357 private long maxReads=-1;
jpayne@68 358 private boolean ecco=false;
jpayne@68 359
jpayne@68 360 /*--------------------------------------------------------------*/
jpayne@68 361 /*---------------- Final Fields ----------------*/
jpayne@68 362 /*--------------------------------------------------------------*/
jpayne@68 363
jpayne@68 364 private final FileFormat ffin1;
jpayne@68 365 private final FileFormat ffin2;
jpayne@68 366
jpayne@68 367 /*--------------------------------------------------------------*/
jpayne@68 368 /*---------------- Common Fields ----------------*/
jpayne@68 369 /*--------------------------------------------------------------*/
jpayne@68 370
jpayne@68 371 private PrintStream outstream=System.err;
jpayne@68 372 public static boolean verbose=false;
jpayne@68 373 public boolean errorState=false;
jpayne@68 374
jpayne@68 375 }