comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/TestLargeKmer.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5
6 import dna.AminoAcid;
7 import fileIO.ReadWrite;
8 import shared.Timer;
9 import stream.ConcurrentGenericReadInputStream;
10 import stream.FastqReadInputStream;
11 import stream.Read;
12 import structures.ListNum;
13
14 /**
15 * @author Brian Bushnell
16 * @date Jul 5, 2012
17 *
18 */
19 public class TestLargeKmer {
20
21 public static void main(String args[]){
22 Timer t=new Timer();
23
24 String fname1=args[0];
25 String fname2=(args.length>4 || args[1].contains(".") ? args[1] : null);
26 int k=Integer.parseInt(args[args.length-3]);
27 int cbits=Integer.parseInt(args[args.length-2]);
28 int k2=Integer.parseInt(args[args.length-1]);
29
30 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
31 long[] counts2=countK2(fname1, fname2, k, counts, k2);
32
33 t.stop();
34 System.out.println("Finished counting; time = "+t+"\n");
35
36 for(int i=0; i<counts2.length; i++){
37 System.out.println(i+":\t"+counts2[i]);
38 }
39 }
40
41 public static long[] countK2(String fname1, String fname2, int k, int cbits, int k2){
42 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
43 return countK2(fname1, fname2, k, counts, k2);
44 }
45
46 public static long[] countK2(String fname1, String fname2, int k, KCountArray2 counts1, int k2){
47 assert(k>=1 && k<20);
48 final int kbits=2*k;
49 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
50 FastqReadInputStream fris1=new FastqReadInputStream(fname1, false);
51 FastqReadInputStream fris2=(fname2==null ? null : new FastqReadInputStream(fname2, false));
52 ConcurrentGenericReadInputStream cris=new ConcurrentGenericReadInputStream(fris1, fris2, KmerCount3.maxReads);
53
54 cris.start();
55 System.err.println("Started cris");
56 boolean paired=cris.paired();
57
58 long kmer=0; //current kmer
59 int len=0; //distance since last contig start or ambiguous base
60
61
62 final long[] upperBound=new long[BOUND_LEN]; //Lowest upper bound provable of kmer count
63 final int[] ring=new int[k2-k+1];
64 final int[] subcount=new int[BOUND_LEN];
65 final int maxValue=subcount.length-1;
66
67 {
68 ListNum<Read> ln=cris.nextList();
69 ArrayList<Read> reads=(ln!=null ? ln.list : null);
70
71 if(reads!=null && !reads.isEmpty()){
72 Read r=reads.get(0);
73 assert(paired==(r.mate!=null));
74 }
75
76 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
77 //System.err.println("reads.size()="+reads.size());
78 for(Read r : reads){
79
80 len=0;
81 kmer=0;
82 Arrays.fill(subcount, 0);
83
84 byte[] bases=r.bases;
85 byte[] quals=r.quality;
86 for(int i=0; i<bases.length; i++){
87 byte b=bases[i];
88 int x=AminoAcid.baseToNumber[b];
89
90 int ringpos=i%ring.length;
91 int old=ring[ringpos];
92 int value=0;
93
94 if(x<0 || quals[i]<KmerCount3.minQuality){
95 len=0;
96 kmer=0;
97 }else{
98 kmer=((kmer<<2)|x)&mask;
99 len++;
100
101 if(len>=k){
102 value=counts1.read(kmer);
103 }
104 }
105 value=min(value, maxValue);
106
107 ring[ringpos]=value;
108 subcount[value]++;
109
110 if(i>=ring.length){
111 subcount[old]--;
112 }
113
114 if(len>=k2){
115 int sub=0;
116 while(sub<subcount.length && subcount[sub]==0){sub++;}
117 assert(sub<subcount.length);
118 upperBound[sub]++;
119 }
120
121 }
122
123 if(r.mate!=null){
124 bases=r.mate.bases;
125 quals=r.mate.quality;
126
127 len=0;
128 kmer=0;
129 Arrays.fill(subcount, 0);
130 for(int i=0; i<bases.length; i++){
131 byte b=bases[i];
132 int x=AminoAcid.baseToNumber[b];
133
134 int ringpos=i%ring.length;
135 int old=ring[ringpos];
136 int value=0;
137
138 if(x<0 || quals[i]<KmerCount3.minQuality){
139 len=0;
140 kmer=0;
141 }else{
142 kmer=((kmer<<2)|x)&mask;
143 len++;
144
145 if(len>=k){
146 value=counts1.read(kmer);
147 }
148 }
149 value=min(value, maxValue);
150
151 ring[ringpos]=value;
152 subcount[value]++;
153
154 if(i>=ring.length){
155 subcount[old]--;
156 }
157
158 if(len>=k2){
159 int sub=0;
160 while(sub<subcount.length && subcount[sub]==0){sub++;}
161 assert(sub<subcount.length);
162 upperBound[sub]++;
163 }
164
165 }
166 }
167
168 }
169 //System.err.println("returning list");
170 cris.returnList(ln);
171 //System.err.println("fetching list");
172 ln=cris.nextList();
173 reads=(ln!=null ? ln.list : null);
174 }
175 System.err.println("Finished reading");
176 cris.returnList(ln);
177 System.err.println("Returned list");
178 ReadWrite.closeStreams(cris);
179 System.err.println("Closed stream");
180 }
181
182 return upperBound;
183 }
184
185 public static final int min(int x, int y){return x<y ? x : y;}
186 public static final int max(int x, int y){return x>y ? x : y;}
187
188 public static final int BOUND_LEN=256;
189
190 }