annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/TestLargeKmer.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package bloom;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.Arrays;
jpayne@68 5
jpayne@68 6 import dna.AminoAcid;
jpayne@68 7 import fileIO.ReadWrite;
jpayne@68 8 import shared.Timer;
jpayne@68 9 import stream.ConcurrentGenericReadInputStream;
jpayne@68 10 import stream.FastqReadInputStream;
jpayne@68 11 import stream.Read;
jpayne@68 12 import structures.ListNum;
jpayne@68 13
jpayne@68 14 /**
jpayne@68 15 * @author Brian Bushnell
jpayne@68 16 * @date Jul 5, 2012
jpayne@68 17 *
jpayne@68 18 */
jpayne@68 19 public class TestLargeKmer {
jpayne@68 20
jpayne@68 21 public static void main(String args[]){
jpayne@68 22 Timer t=new Timer();
jpayne@68 23
jpayne@68 24 String fname1=args[0];
jpayne@68 25 String fname2=(args.length>4 || args[1].contains(".") ? args[1] : null);
jpayne@68 26 int k=Integer.parseInt(args[args.length-3]);
jpayne@68 27 int cbits=Integer.parseInt(args[args.length-2]);
jpayne@68 28 int k2=Integer.parseInt(args[args.length-1]);
jpayne@68 29
jpayne@68 30 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
jpayne@68 31 long[] counts2=countK2(fname1, fname2, k, counts, k2);
jpayne@68 32
jpayne@68 33 t.stop();
jpayne@68 34 System.out.println("Finished counting; time = "+t+"\n");
jpayne@68 35
jpayne@68 36 for(int i=0; i<counts2.length; i++){
jpayne@68 37 System.out.println(i+":\t"+counts2[i]);
jpayne@68 38 }
jpayne@68 39 }
jpayne@68 40
jpayne@68 41 public static long[] countK2(String fname1, String fname2, int k, int cbits, int k2){
jpayne@68 42 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
jpayne@68 43 return countK2(fname1, fname2, k, counts, k2);
jpayne@68 44 }
jpayne@68 45
jpayne@68 46 public static long[] countK2(String fname1, String fname2, int k, KCountArray2 counts1, int k2){
jpayne@68 47 assert(k>=1 && k<20);
jpayne@68 48 final int kbits=2*k;
jpayne@68 49 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
jpayne@68 50 FastqReadInputStream fris1=new FastqReadInputStream(fname1, false);
jpayne@68 51 FastqReadInputStream fris2=(fname2==null ? null : new FastqReadInputStream(fname2, false));
jpayne@68 52 ConcurrentGenericReadInputStream cris=new ConcurrentGenericReadInputStream(fris1, fris2, KmerCount3.maxReads);
jpayne@68 53
jpayne@68 54 cris.start();
jpayne@68 55 System.err.println("Started cris");
jpayne@68 56 boolean paired=cris.paired();
jpayne@68 57
jpayne@68 58 long kmer=0; //current kmer
jpayne@68 59 int len=0; //distance since last contig start or ambiguous base
jpayne@68 60
jpayne@68 61
jpayne@68 62 final long[] upperBound=new long[BOUND_LEN]; //Lowest upper bound provable of kmer count
jpayne@68 63 final int[] ring=new int[k2-k+1];
jpayne@68 64 final int[] subcount=new int[BOUND_LEN];
jpayne@68 65 final int maxValue=subcount.length-1;
jpayne@68 66
jpayne@68 67 {
jpayne@68 68 ListNum<Read> ln=cris.nextList();
jpayne@68 69 ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 70
jpayne@68 71 if(reads!=null && !reads.isEmpty()){
jpayne@68 72 Read r=reads.get(0);
jpayne@68 73 assert(paired==(r.mate!=null));
jpayne@68 74 }
jpayne@68 75
jpayne@68 76 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
jpayne@68 77 //System.err.println("reads.size()="+reads.size());
jpayne@68 78 for(Read r : reads){
jpayne@68 79
jpayne@68 80 len=0;
jpayne@68 81 kmer=0;
jpayne@68 82 Arrays.fill(subcount, 0);
jpayne@68 83
jpayne@68 84 byte[] bases=r.bases;
jpayne@68 85 byte[] quals=r.quality;
jpayne@68 86 for(int i=0; i<bases.length; i++){
jpayne@68 87 byte b=bases[i];
jpayne@68 88 int x=AminoAcid.baseToNumber[b];
jpayne@68 89
jpayne@68 90 int ringpos=i%ring.length;
jpayne@68 91 int old=ring[ringpos];
jpayne@68 92 int value=0;
jpayne@68 93
jpayne@68 94 if(x<0 || quals[i]<KmerCount3.minQuality){
jpayne@68 95 len=0;
jpayne@68 96 kmer=0;
jpayne@68 97 }else{
jpayne@68 98 kmer=((kmer<<2)|x)&mask;
jpayne@68 99 len++;
jpayne@68 100
jpayne@68 101 if(len>=k){
jpayne@68 102 value=counts1.read(kmer);
jpayne@68 103 }
jpayne@68 104 }
jpayne@68 105 value=min(value, maxValue);
jpayne@68 106
jpayne@68 107 ring[ringpos]=value;
jpayne@68 108 subcount[value]++;
jpayne@68 109
jpayne@68 110 if(i>=ring.length){
jpayne@68 111 subcount[old]--;
jpayne@68 112 }
jpayne@68 113
jpayne@68 114 if(len>=k2){
jpayne@68 115 int sub=0;
jpayne@68 116 while(sub<subcount.length && subcount[sub]==0){sub++;}
jpayne@68 117 assert(sub<subcount.length);
jpayne@68 118 upperBound[sub]++;
jpayne@68 119 }
jpayne@68 120
jpayne@68 121 }
jpayne@68 122
jpayne@68 123 if(r.mate!=null){
jpayne@68 124 bases=r.mate.bases;
jpayne@68 125 quals=r.mate.quality;
jpayne@68 126
jpayne@68 127 len=0;
jpayne@68 128 kmer=0;
jpayne@68 129 Arrays.fill(subcount, 0);
jpayne@68 130 for(int i=0; i<bases.length; i++){
jpayne@68 131 byte b=bases[i];
jpayne@68 132 int x=AminoAcid.baseToNumber[b];
jpayne@68 133
jpayne@68 134 int ringpos=i%ring.length;
jpayne@68 135 int old=ring[ringpos];
jpayne@68 136 int value=0;
jpayne@68 137
jpayne@68 138 if(x<0 || quals[i]<KmerCount3.minQuality){
jpayne@68 139 len=0;
jpayne@68 140 kmer=0;
jpayne@68 141 }else{
jpayne@68 142 kmer=((kmer<<2)|x)&mask;
jpayne@68 143 len++;
jpayne@68 144
jpayne@68 145 if(len>=k){
jpayne@68 146 value=counts1.read(kmer);
jpayne@68 147 }
jpayne@68 148 }
jpayne@68 149 value=min(value, maxValue);
jpayne@68 150
jpayne@68 151 ring[ringpos]=value;
jpayne@68 152 subcount[value]++;
jpayne@68 153
jpayne@68 154 if(i>=ring.length){
jpayne@68 155 subcount[old]--;
jpayne@68 156 }
jpayne@68 157
jpayne@68 158 if(len>=k2){
jpayne@68 159 int sub=0;
jpayne@68 160 while(sub<subcount.length && subcount[sub]==0){sub++;}
jpayne@68 161 assert(sub<subcount.length);
jpayne@68 162 upperBound[sub]++;
jpayne@68 163 }
jpayne@68 164
jpayne@68 165 }
jpayne@68 166 }
jpayne@68 167
jpayne@68 168 }
jpayne@68 169 //System.err.println("returning list");
jpayne@68 170 cris.returnList(ln);
jpayne@68 171 //System.err.println("fetching list");
jpayne@68 172 ln=cris.nextList();
jpayne@68 173 reads=(ln!=null ? ln.list : null);
jpayne@68 174 }
jpayne@68 175 System.err.println("Finished reading");
jpayne@68 176 cris.returnList(ln);
jpayne@68 177 System.err.println("Returned list");
jpayne@68 178 ReadWrite.closeStreams(cris);
jpayne@68 179 System.err.println("Closed stream");
jpayne@68 180 }
jpayne@68 181
jpayne@68 182 return upperBound;
jpayne@68 183 }
jpayne@68 184
jpayne@68 185 public static final int min(int x, int y){return x<y ? x : y;}
jpayne@68 186 public static final int max(int x, int y){return x>y ? x : y;}
jpayne@68 187
jpayne@68 188 public static final int BOUND_LEN=256;
jpayne@68 189
jpayne@68 190 }