annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/InvertKey.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6
jpayne@68 7 import dna.AminoAcid;
jpayne@68 8 import fileIO.ByteFile;
jpayne@68 9 import fileIO.ByteFile1;
jpayne@68 10 import fileIO.ByteFile2;
jpayne@68 11 import fileIO.ByteStreamWriter;
jpayne@68 12 import fileIO.FileFormat;
jpayne@68 13 import fileIO.ReadWrite;
jpayne@68 14 import shared.Parse;
jpayne@68 15 import shared.Parser;
jpayne@68 16 import shared.PreParser;
jpayne@68 17 import shared.ReadStats;
jpayne@68 18 import shared.Shared;
jpayne@68 19 import shared.Timer;
jpayne@68 20 import shared.Tools;
jpayne@68 21 import stream.ConcurrentGenericReadInputStream;
jpayne@68 22 import stream.ConcurrentReadInputStream;
jpayne@68 23 import stream.FASTQ;
jpayne@68 24 import stream.FastaReadInputStream;
jpayne@68 25 import stream.Read;
jpayne@68 26 import structures.ListNum;
jpayne@68 27 import structures.LongHashSet;
jpayne@68 28
jpayne@68 29 /**
jpayne@68 30 * @author Brian Bushnell
jpayne@68 31 * @date Oct 17, 2014
jpayne@68 32 *
jpayne@68 33 */
jpayne@68 34 public class InvertKey extends SketchObject {
jpayne@68 35
jpayne@68 36 public static void main(String[] args){
jpayne@68 37 Timer t=new Timer();
jpayne@68 38 InvertKey x=new InvertKey(args);
jpayne@68 39 x.process(t);
jpayne@68 40
jpayne@68 41 //Close the print stream if it was redirected
jpayne@68 42 Shared.closeStream(x.outstream);
jpayne@68 43 }
jpayne@68 44
jpayne@68 45 public InvertKey(String[] args){
jpayne@68 46
jpayne@68 47 {//Preparse block for help, config files, and outstream
jpayne@68 48 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 49 args=pp.args;
jpayne@68 50 outstream=pp.outstream;
jpayne@68 51 }
jpayne@68 52
jpayne@68 53 Shared.capBuffers(4);
jpayne@68 54 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 55 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 56
jpayne@68 57 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
jpayne@68 58 int k_=32, k2_=0;
jpayne@68 59
jpayne@68 60 Parser parser=new Parser();
jpayne@68 61 for(int i=0; i<args.length; i++){
jpayne@68 62 String arg=args[i];
jpayne@68 63 String[] split=arg.split("=");
jpayne@68 64 String a=split[0].toLowerCase();
jpayne@68 65 String b=split.length>1 ? split[1] : null;
jpayne@68 66
jpayne@68 67 if(a.equals("verbose")){
jpayne@68 68 verbose=Parse.parseBoolean(b);
jpayne@68 69 ByteFile1.verbose=verbose;
jpayne@68 70 ByteFile2.verbose=verbose;
jpayne@68 71 stream.FastaReadInputStream.verbose=verbose;
jpayne@68 72 ConcurrentGenericReadInputStream.verbose=verbose;
jpayne@68 73 stream.FastqReadInputStream.verbose=verbose;
jpayne@68 74 ReadWrite.verbose=verbose;
jpayne@68 75 }else if(a.equals("key")){
jpayne@68 76 keyString=b;
jpayne@68 77 }else if(a.equals("out")){
jpayne@68 78 out1=b;
jpayne@68 79 }else if(a.equalsIgnoreCase("k")){
jpayne@68 80 assert(b!=null) : "Bad parameter: "+arg;
jpayne@68 81 if(b.indexOf(',')>=0){
jpayne@68 82 String[] bsplit=b.split(",");
jpayne@68 83 assert(bsplit.length==2) : "Bad argument "+arg;
jpayne@68 84 int x=Integer.parseInt(bsplit[0]);
jpayne@68 85 int y=Integer.parseInt(bsplit[1]);
jpayne@68 86 k_=Tools.max(x, y);
jpayne@68 87 k2_=Tools.min(x, y);
jpayne@68 88 if(k_==k2_){k2_=0;}
jpayne@68 89 }else{
jpayne@68 90 k_=Integer.parseInt(b);
jpayne@68 91 k2_=0;
jpayne@68 92 }
jpayne@68 93 }else if(a.equalsIgnoreCase("printonce")){
jpayne@68 94 printOnce=Parse.parseBoolean(b);
jpayne@68 95 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
jpayne@68 96 parser.in1=arg;
jpayne@68 97 }else if(parser.out1==null && i==1 && !arg.contains("=")){
jpayne@68 98 out1=arg;
jpayne@68 99 }else if(parser.parse(arg, a, b)){
jpayne@68 100 //do nothing
jpayne@68 101 }else{
jpayne@68 102 outstream.println("Unknown parameter "+args[i]);
jpayne@68 103 assert(false) : "Unknown parameter "+args[i];
jpayne@68 104 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 105 }
jpayne@68 106 }
jpayne@68 107
jpayne@68 108 k=k_;
jpayne@68 109 k2=k2_;
jpayne@68 110 shift=2*k;
jpayne@68 111 shift2=shift-2;
jpayne@68 112 mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32
jpayne@68 113
jpayne@68 114 {//Process parser fields
jpayne@68 115 Parser.processQuality();
jpayne@68 116
jpayne@68 117 maxReads=parser.maxReads;
jpayne@68 118
jpayne@68 119 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 120 append=ReadStats.append=parser.append;
jpayne@68 121
jpayne@68 122 in1=parser.in1;
jpayne@68 123 }
jpayne@68 124
jpayne@68 125 assert(FastaReadInputStream.settingsOK());
jpayne@68 126
jpayne@68 127 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 128 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
jpayne@68 129 ByteFile.FORCE_MODE_BF2=false;
jpayne@68 130 ByteFile.FORCE_MODE_BF1=true;
jpayne@68 131 }
jpayne@68 132
jpayne@68 133 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
jpayne@68 134
jpayne@68 135 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
jpayne@68 136 outstream.println((out1==null)+", "+out1);
jpayne@68 137 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false);
jpayne@68 141
jpayne@68 142 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true);
jpayne@68 143
jpayne@68 144 SketchObject.postParse();
jpayne@68 145
jpayne@68 146 if(keyString.indexOf(',')>0){
jpayne@68 147 String[] split=keyString.split(",");
jpayne@68 148 set=new LongHashSet(split.length*2);
jpayne@68 149 for(String s : split){
jpayne@68 150 long x=Long.MAX_VALUE-Sketch.parseA48(s);
jpayne@68 151 set.add(x);
jpayne@68 152 // assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView();
jpayne@68 153 }
jpayne@68 154 key0=-1;
jpayne@68 155 // System.err.println(set.toStringListView()+", "+set.size());
jpayne@68 156 assert(!set.isEmpty());
jpayne@68 157 }else if(keyString.endsWith(".sketch")){
jpayne@68 158 SketchTool tool=new SketchTool(10000, 0, false, false);
jpayne@68 159 Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0);
jpayne@68 160 set=new LongHashSet(sk.length()*2);
jpayne@68 161 for(long x : sk.keys){set.add(Long.MAX_VALUE-x);}
jpayne@68 162 key0=-1;
jpayne@68 163 // System.err.println(set.toStringListView()+", "+set.size());
jpayne@68 164 assert(!set.isEmpty());
jpayne@68 165 }else{
jpayne@68 166 key0=Long.MAX_VALUE-Sketch.parseA48(keyString);
jpayne@68 167 set=null;
jpayne@68 168 // System.err.println(key0);
jpayne@68 169 }
jpayne@68 170 }
jpayne@68 171
jpayne@68 172 void process(Timer t){
jpayne@68 173
jpayne@68 174 final ConcurrentReadInputStream cris;
jpayne@68 175 {
jpayne@68 176 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null);
jpayne@68 177 cris.start();
jpayne@68 178 if(verbose){outstream.println("Started cris");}
jpayne@68 179 }
jpayne@68 180 boolean paired=cris.paired();
jpayne@68 181 // if(verbose){
jpayne@68 182 if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
jpayne@68 183 // }
jpayne@68 184
jpayne@68 185 final ByteStreamWriter bsw;
jpayne@68 186 if(out1!=null){
jpayne@68 187 fasta=ffout1.fasta() && !out1.endsWith(".txt");
jpayne@68 188 bsw=new ByteStreamWriter(ffout1);
jpayne@68 189 bsw.start();
jpayne@68 190 }else{bsw=null;}
jpayne@68 191
jpayne@68 192 long readsProcessed=0;
jpayne@68 193 long basesProcessed=0;
jpayne@68 194 boolean finished=false;
jpayne@68 195
jpayne@68 196 {
jpayne@68 197
jpayne@68 198 ListNum<Read> ln=cris.nextList();
jpayne@68 199 ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 200
jpayne@68 201 // outstream.println("Fetched "+reads);
jpayne@68 202
jpayne@68 203 if(reads!=null && !reads.isEmpty()){
jpayne@68 204 Read r=reads.get(0);
jpayne@68 205 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired());
jpayne@68 206 }
jpayne@68 207
jpayne@68 208 while(reads!=null && reads.size()>0 && !finished){
jpayne@68 209
jpayne@68 210 for(int idx=0; idx<reads.size() && !finished; idx++){
jpayne@68 211 final Read r1=reads.get(idx);
jpayne@68 212
jpayne@68 213 finished=invert(key0, r1, bsw);
jpayne@68 214
jpayne@68 215 final int initialLength1=r1.length();
jpayne@68 216
jpayne@68 217 readsProcessed++;
jpayne@68 218 basesProcessed+=initialLength1;
jpayne@68 219 }
jpayne@68 220
jpayne@68 221 cris.returnList(ln);
jpayne@68 222 ln=cris.nextList();
jpayne@68 223 reads=(ln!=null ? ln.list : null);
jpayne@68 224 }
jpayne@68 225 if(ln!=null){
jpayne@68 226 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
jpayne@68 227 }
jpayne@68 228 }
jpayne@68 229
jpayne@68 230 errorState|=(ReadWrite.closeStream(cris));
jpayne@68 231 if(bsw!=null){errorState|=bsw.poisonAndWait();}
jpayne@68 232
jpayne@68 233 t.stop();
jpayne@68 234 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
jpayne@68 235
jpayne@68 236 if(errorState && !finished && maxReads<1){
jpayne@68 237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 238 }
jpayne@68 239 }
jpayne@68 240
jpayne@68 241 private boolean invert(long key2, Read r, ByteStreamWriter bsw) {
jpayne@68 242 final byte[] bases=r.bases;
jpayne@68 243
jpayne@68 244 long kmer=0;
jpayne@68 245 long rkmer=0;
jpayne@68 246 int len=0;
jpayne@68 247
jpayne@68 248
jpayne@68 249 // System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key));
jpayne@68 250
jpayne@68 251 for(int i=0; i<bases.length; i++){
jpayne@68 252 byte b=bases[i];
jpayne@68 253 long x=AminoAcid.baseToNumber[b];
jpayne@68 254 long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 255 kmer=((kmer<<2)|x)&mask;
jpayne@68 256 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 257 if(x<0){len=0; rkmer=0;}else{len++;}
jpayne@68 258 if(len>=k){
jpayne@68 259 kmersProcessed++;
jpayne@68 260 final long hashcode=hash(kmer, rkmer);
jpayne@68 261 boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode));
jpayne@68 262 if(found){
jpayne@68 263 if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);}
jpayne@68 264 bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k));
jpayne@68 265 if(printOnce){
jpayne@68 266 if(key0>=0){return true;}
jpayne@68 267 else{
jpayne@68 268 set.remove(hashcode);
jpayne@68 269 return set.isEmpty();
jpayne@68 270 }
jpayne@68 271 }
jpayne@68 272 }
jpayne@68 273 }
jpayne@68 274 }
jpayne@68 275 return false;
jpayne@68 276 }
jpayne@68 277
jpayne@68 278 /*--------------------------------------------------------------*/
jpayne@68 279
jpayne@68 280 final long key0;
jpayne@68 281 final LongHashSet set;
jpayne@68 282
jpayne@68 283 final int shift;
jpayne@68 284 final int shift2;
jpayne@68 285 final long mask;
jpayne@68 286
jpayne@68 287 boolean printOnce=true;
jpayne@68 288 long kmersProcessed=0;
jpayne@68 289
jpayne@68 290 private String in1=null;
jpayne@68 291 boolean fasta;
jpayne@68 292 boolean sketch;
jpayne@68 293 private String keyString=null;
jpayne@68 294
jpayne@68 295 private String out1="stdout.fa";
jpayne@68 296
jpayne@68 297 /*--------------------------------------------------------------*/
jpayne@68 298
jpayne@68 299 private long maxReads=-1;
jpayne@68 300
jpayne@68 301 /*--------------------------------------------------------------*/
jpayne@68 302
jpayne@68 303 private final FileFormat ffin1;
jpayne@68 304
jpayne@68 305 private final FileFormat ffout1;
jpayne@68 306
jpayne@68 307
jpayne@68 308 /*--------------------------------------------------------------*/
jpayne@68 309
jpayne@68 310 private PrintStream outstream=System.err;
jpayne@68 311 public static boolean verbose=false;
jpayne@68 312 public boolean errorState=false;
jpayne@68 313 private boolean overwrite=false;
jpayne@68 314 private boolean append=false;
jpayne@68 315
jpayne@68 316 }