diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/TestLargeKmer.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/TestLargeKmer.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,190 @@
+package bloom;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import dna.AminoAcid;
+import fileIO.ReadWrite;
+import shared.Timer;
+import stream.ConcurrentGenericReadInputStream;
+import stream.FastqReadInputStream;
+import stream.Read;
+import structures.ListNum;
+
+/**
+ * @author Brian Bushnell
+ * @date Jul 5, 2012
+ *
+ */
+public class TestLargeKmer {
+	
+	public static void main(String args[]){
+		Timer t=new Timer();
+		
+		String fname1=args[0];
+		String fname2=(args.length>4 || args[1].contains(".") ? args[1] : null);
+		int k=Integer.parseInt(args[args.length-3]);
+		int cbits=Integer.parseInt(args[args.length-2]);
+		int k2=Integer.parseInt(args[args.length-1]);
+		
+		KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
+		long[] counts2=countK2(fname1, fname2, k, counts, k2);
+		
+		t.stop();
+		System.out.println("Finished counting; time = "+t+"\n");
+		
+		for(int i=0; i<counts2.length; i++){
+			System.out.println(i+":\t"+counts2[i]);
+		}
+	}
+	
+	public static long[] countK2(String fname1, String fname2, int k, int cbits, int k2){
+		KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
+		return countK2(fname1, fname2, k, counts, k2);
+	}
+	
+	public static long[] countK2(String fname1, String fname2, int k, KCountArray2 counts1, int k2){
+		assert(k>=1 && k<20);
+		final int kbits=2*k;
+		final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
+		FastqReadInputStream fris1=new FastqReadInputStream(fname1, false);
+		FastqReadInputStream fris2=(fname2==null ? null : new FastqReadInputStream(fname2, false));
+		ConcurrentGenericReadInputStream cris=new ConcurrentGenericReadInputStream(fris1, fris2, KmerCount3.maxReads);
+		
+		cris.start();
+		System.err.println("Started cris");
+		boolean paired=cris.paired();
+		
+		long kmer=0; //current kmer
+		int len=0;  //distance since last contig start or ambiguous base
+		
+		
+		final long[] upperBound=new long[BOUND_LEN]; //Lowest upper bound provable of kmer count
+		final int[] ring=new int[k2-k+1];
+		final int[] subcount=new int[BOUND_LEN];
+		final int maxValue=subcount.length-1;
+		
+		{
+			ListNum<Read> ln=cris.nextList();
+			ArrayList<Read> reads=(ln!=null ? ln.list : null);
+			
+			if(reads!=null && !reads.isEmpty()){
+				Read r=reads.get(0);
+				assert(paired==(r.mate!=null));
+			}
+			
+			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
+				//System.err.println("reads.size()="+reads.size());
+				for(Read r : reads){
+					
+					len=0;
+					kmer=0;
+					Arrays.fill(subcount, 0);
+					
+					byte[] bases=r.bases;
+					byte[] quals=r.quality;
+					for(int i=0; i<bases.length; i++){
+						byte b=bases[i];
+						int x=AminoAcid.baseToNumber[b];
+						
+						int ringpos=i%ring.length;
+						int old=ring[ringpos];
+						int value=0;
+						
+						if(x<0 || quals[i]<KmerCount3.minQuality){
+							len=0;
+							kmer=0;
+						}else{
+							kmer=((kmer<<2)|x)&mask;
+							len++;
+							
+							if(len>=k){
+								value=counts1.read(kmer);
+							}
+						}
+						value=min(value, maxValue);
+						
+						ring[ringpos]=value;
+						subcount[value]++;
+						
+						if(i>=ring.length){
+							subcount[old]--;
+						}
+						
+						if(len>=k2){
+							int sub=0;
+							while(sub<subcount.length && subcount[sub]==0){sub++;}
+							assert(sub<subcount.length);
+							upperBound[sub]++;
+						}
+						
+					}
+					
+					if(r.mate!=null){
+						bases=r.mate.bases;
+						quals=r.mate.quality;
+
+						len=0;
+						kmer=0;
+						Arrays.fill(subcount, 0);
+						for(int i=0; i<bases.length; i++){
+							byte b=bases[i];
+							int x=AminoAcid.baseToNumber[b];
+							
+							int ringpos=i%ring.length;
+							int old=ring[ringpos];
+							int value=0;
+							
+							if(x<0 || quals[i]<KmerCount3.minQuality){
+								len=0;
+								kmer=0;
+							}else{
+								kmer=((kmer<<2)|x)&mask;
+								len++;
+								
+								if(len>=k){
+									value=counts1.read(kmer);
+								}
+							}
+							value=min(value, maxValue);
+							
+							ring[ringpos]=value;
+							subcount[value]++;
+							
+							if(i>=ring.length){
+								subcount[old]--;
+							}
+							
+							if(len>=k2){
+								int sub=0;
+								while(sub<subcount.length && subcount[sub]==0){sub++;}
+								assert(sub<subcount.length);
+								upperBound[sub]++;
+							}
+							
+						}
+					}
+					
+				}
+				//System.err.println("returning list");
+				cris.returnList(ln);
+				//System.err.println("fetching list");
+				ln=cris.nextList();
+				reads=(ln!=null ? ln.list : null);
+			}
+			System.err.println("Finished reading");
+			cris.returnList(ln);
+			System.err.println("Returned list");
+			ReadWrite.closeStreams(cris);
+			System.err.println("Closed stream");
+		}
+		
+		return upperBound;
+	}
+	
+	public static final int min(int x, int y){return x<y ? x : y;}
+	public static final int max(int x, int y){return x>y ? x : y;}
+	
+	public static final int BOUND_LEN=256;
+	
+}