Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/InvertKey.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/InvertKey.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,316 @@ +package sketch; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; + +import dna.AminoAcid; +import fileIO.ByteFile; +import fileIO.ByteFile1; +import fileIO.ByteFile2; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import stream.ConcurrentGenericReadInputStream; +import stream.ConcurrentReadInputStream; +import stream.FASTQ; +import stream.FastaReadInputStream; +import stream.Read; +import structures.ListNum; +import structures.LongHashSet; + +/** + * @author Brian Bushnell + * @date Oct 17, 2014 + * + */ +public class InvertKey extends SketchObject { + + public static void main(String[] args){ + Timer t=new Timer(); + InvertKey x=new InvertKey(args); + x.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + } + + public InvertKey(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + } + + Shared.capBuffers(4); + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + + FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false; + int k_=32, k2_=0; + + Parser parser=new Parser(); + for(int i=0; i<args.length; i++){ + String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + ByteFile1.verbose=verbose; + ByteFile2.verbose=verbose; + stream.FastaReadInputStream.verbose=verbose; + ConcurrentGenericReadInputStream.verbose=verbose; + stream.FastqReadInputStream.verbose=verbose; + ReadWrite.verbose=verbose; + }else if(a.equals("key")){ + keyString=b; + }else if(a.equals("out")){ + out1=b; + }else if(a.equalsIgnoreCase("k")){ + assert(b!=null) : "Bad parameter: "+arg; + if(b.indexOf(',')>=0){ + String[] bsplit=b.split(","); + assert(bsplit.length==2) : "Bad argument "+arg; + int x=Integer.parseInt(bsplit[0]); + int y=Integer.parseInt(bsplit[1]); + k_=Tools.max(x, y); + k2_=Tools.min(x, y); + if(k_==k2_){k2_=0;} + }else{ + k_=Integer.parseInt(b); + k2_=0; + } + }else if(a.equalsIgnoreCase("printonce")){ + printOnce=Parse.parseBoolean(b); + }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ + parser.in1=arg; + }else if(parser.out1==null && i==1 && !arg.contains("=")){ + out1=arg; + }else if(parser.parse(arg, a, b)){ + //do nothing + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + k=k_; + k2=k2_; + shift=2*k; + shift2=shift-2; + mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32 + + {//Process parser fields + Parser.processQuality(); + + maxReads=parser.maxReads; + + overwrite=ReadStats.overwrite=parser.overwrite; + append=ReadStats.append=parser.append; + + in1=parser.in1; + } + + assert(FastaReadInputStream.settingsOK()); + + if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ + ByteFile.FORCE_MODE_BF2=false; + ByteFile.FORCE_MODE_BF1=true; + } + + if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} + + if(!Tools.testOutputFiles(overwrite, append, false, out1)){ + outstream.println((out1==null)+", "+out1); + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); + } + + ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false); + + ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true); + + SketchObject.postParse(); + + if(keyString.indexOf(',')>0){ + String[] split=keyString.split(","); + set=new LongHashSet(split.length*2); + for(String s : split){ + long x=Long.MAX_VALUE-Sketch.parseA48(s); + set.add(x); +// assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView(); + } + key0=-1; +// System.err.println(set.toStringListView()+", "+set.size()); + assert(!set.isEmpty()); + }else if(keyString.endsWith(".sketch")){ + SketchTool tool=new SketchTool(10000, 0, false, false); + Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0); + set=new LongHashSet(sk.length()*2); + for(long x : sk.keys){set.add(Long.MAX_VALUE-x);} + key0=-1; +// System.err.println(set.toStringListView()+", "+set.size()); + assert(!set.isEmpty()); + }else{ + key0=Long.MAX_VALUE-Sketch.parseA48(keyString); + set=null; +// System.err.println(key0); + } + } + + void process(Timer t){ + + final ConcurrentReadInputStream cris; + { + cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null); + cris.start(); + if(verbose){outstream.println("Started cris");} + } + boolean paired=cris.paired(); +// if(verbose){ + if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));} +// } + + final ByteStreamWriter bsw; + if(out1!=null){ + fasta=ffout1.fasta() && !out1.endsWith(".txt"); + bsw=new ByteStreamWriter(ffout1); + bsw.start(); + }else{bsw=null;} + + long readsProcessed=0; + long basesProcessed=0; + boolean finished=false; + + { + + ListNum<Read> ln=cris.nextList(); + ArrayList<Read> reads=(ln!=null ? ln.list : null); + +// outstream.println("Fetched "+reads); + + if(reads!=null && !reads.isEmpty()){ + Read r=reads.get(0); + assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); + } + + while(reads!=null && reads.size()>0 && !finished){ + + for(int idx=0; idx<reads.size() && !finished; idx++){ + final Read r1=reads.get(idx); + + finished=invert(key0, r1, bsw); + + final int initialLength1=r1.length(); + + readsProcessed++; + basesProcessed+=initialLength1; + } + + cris.returnList(ln); + ln=cris.nextList(); + reads=(ln!=null ? ln.list : null); + } + if(ln!=null){ + cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); + } + } + + errorState|=(ReadWrite.closeStream(cris)); + if(bsw!=null){errorState|=bsw.poisonAndWait();} + + t.stop(); + outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); + + if(errorState && !finished && maxReads<1){ + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + } + + private boolean invert(long key2, Read r, ByteStreamWriter bsw) { + final byte[] bases=r.bases; + + long kmer=0; + long rkmer=0; + int len=0; + + +// System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key)); + + for(int i=0; i<bases.length; i++){ + byte b=bases[i]; + long x=AminoAcid.baseToNumber[b]; + long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){len=0; rkmer=0;}else{len++;} + if(len>=k){ + kmersProcessed++; + final long hashcode=hash(kmer, rkmer); + boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode)); + if(found){ + if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);} + bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k)); + if(printOnce){ + if(key0>=0){return true;} + else{ + set.remove(hashcode); + return set.isEmpty(); + } + } + } + } + } + return false; + } + + /*--------------------------------------------------------------*/ + + final long key0; + final LongHashSet set; + + final int shift; + final int shift2; + final long mask; + + boolean printOnce=true; + long kmersProcessed=0; + + private String in1=null; + boolean fasta; + boolean sketch; + private String keyString=null; + + private String out1="stdout.fa"; + + /*--------------------------------------------------------------*/ + + private long maxReads=-1; + + /*--------------------------------------------------------------*/ + + private final FileFormat ffin1; + + private final FileFormat ffout1; + + + /*--------------------------------------------------------------*/ + + private PrintStream outstream=System.err; + public static boolean verbose=false; + public boolean errorState=false; + private boolean overwrite=false; + private boolean append=false; + +}