Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/TestLargeKmer.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Arrays; | |
5 | |
6 import dna.AminoAcid; | |
7 import fileIO.ReadWrite; | |
8 import shared.Timer; | |
9 import stream.ConcurrentGenericReadInputStream; | |
10 import stream.FastqReadInputStream; | |
11 import stream.Read; | |
12 import structures.ListNum; | |
13 | |
14 /** | |
15 * @author Brian Bushnell | |
16 * @date Jul 5, 2012 | |
17 * | |
18 */ | |
19 public class TestLargeKmer { | |
20 | |
21 public static void main(String args[]){ | |
22 Timer t=new Timer(); | |
23 | |
24 String fname1=args[0]; | |
25 String fname2=(args.length>4 || args[1].contains(".") ? args[1] : null); | |
26 int k=Integer.parseInt(args[args.length-3]); | |
27 int cbits=Integer.parseInt(args[args.length-2]); | |
28 int k2=Integer.parseInt(args[args.length-1]); | |
29 | |
30 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits); | |
31 long[] counts2=countK2(fname1, fname2, k, counts, k2); | |
32 | |
33 t.stop(); | |
34 System.out.println("Finished counting; time = "+t+"\n"); | |
35 | |
36 for(int i=0; i<counts2.length; i++){ | |
37 System.out.println(i+":\t"+counts2[i]); | |
38 } | |
39 } | |
40 | |
41 public static long[] countK2(String fname1, String fname2, int k, int cbits, int k2){ | |
42 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits); | |
43 return countK2(fname1, fname2, k, counts, k2); | |
44 } | |
45 | |
46 public static long[] countK2(String fname1, String fname2, int k, KCountArray2 counts1, int k2){ | |
47 assert(k>=1 && k<20); | |
48 final int kbits=2*k; | |
49 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits)); | |
50 FastqReadInputStream fris1=new FastqReadInputStream(fname1, false); | |
51 FastqReadInputStream fris2=(fname2==null ? null : new FastqReadInputStream(fname2, false)); | |
52 ConcurrentGenericReadInputStream cris=new ConcurrentGenericReadInputStream(fris1, fris2, KmerCount3.maxReads); | |
53 | |
54 cris.start(); | |
55 System.err.println("Started cris"); | |
56 boolean paired=cris.paired(); | |
57 | |
58 long kmer=0; //current kmer | |
59 int len=0; //distance since last contig start or ambiguous base | |
60 | |
61 | |
62 final long[] upperBound=new long[BOUND_LEN]; //Lowest upper bound provable of kmer count | |
63 final int[] ring=new int[k2-k+1]; | |
64 final int[] subcount=new int[BOUND_LEN]; | |
65 final int maxValue=subcount.length-1; | |
66 | |
67 { | |
68 ListNum<Read> ln=cris.nextList(); | |
69 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
70 | |
71 if(reads!=null && !reads.isEmpty()){ | |
72 Read r=reads.get(0); | |
73 assert(paired==(r.mate!=null)); | |
74 } | |
75 | |
76 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning | |
77 //System.err.println("reads.size()="+reads.size()); | |
78 for(Read r : reads){ | |
79 | |
80 len=0; | |
81 kmer=0; | |
82 Arrays.fill(subcount, 0); | |
83 | |
84 byte[] bases=r.bases; | |
85 byte[] quals=r.quality; | |
86 for(int i=0; i<bases.length; i++){ | |
87 byte b=bases[i]; | |
88 int x=AminoAcid.baseToNumber[b]; | |
89 | |
90 int ringpos=i%ring.length; | |
91 int old=ring[ringpos]; | |
92 int value=0; | |
93 | |
94 if(x<0 || quals[i]<KmerCount3.minQuality){ | |
95 len=0; | |
96 kmer=0; | |
97 }else{ | |
98 kmer=((kmer<<2)|x)&mask; | |
99 len++; | |
100 | |
101 if(len>=k){ | |
102 value=counts1.read(kmer); | |
103 } | |
104 } | |
105 value=min(value, maxValue); | |
106 | |
107 ring[ringpos]=value; | |
108 subcount[value]++; | |
109 | |
110 if(i>=ring.length){ | |
111 subcount[old]--; | |
112 } | |
113 | |
114 if(len>=k2){ | |
115 int sub=0; | |
116 while(sub<subcount.length && subcount[sub]==0){sub++;} | |
117 assert(sub<subcount.length); | |
118 upperBound[sub]++; | |
119 } | |
120 | |
121 } | |
122 | |
123 if(r.mate!=null){ | |
124 bases=r.mate.bases; | |
125 quals=r.mate.quality; | |
126 | |
127 len=0; | |
128 kmer=0; | |
129 Arrays.fill(subcount, 0); | |
130 for(int i=0; i<bases.length; i++){ | |
131 byte b=bases[i]; | |
132 int x=AminoAcid.baseToNumber[b]; | |
133 | |
134 int ringpos=i%ring.length; | |
135 int old=ring[ringpos]; | |
136 int value=0; | |
137 | |
138 if(x<0 || quals[i]<KmerCount3.minQuality){ | |
139 len=0; | |
140 kmer=0; | |
141 }else{ | |
142 kmer=((kmer<<2)|x)&mask; | |
143 len++; | |
144 | |
145 if(len>=k){ | |
146 value=counts1.read(kmer); | |
147 } | |
148 } | |
149 value=min(value, maxValue); | |
150 | |
151 ring[ringpos]=value; | |
152 subcount[value]++; | |
153 | |
154 if(i>=ring.length){ | |
155 subcount[old]--; | |
156 } | |
157 | |
158 if(len>=k2){ | |
159 int sub=0; | |
160 while(sub<subcount.length && subcount[sub]==0){sub++;} | |
161 assert(sub<subcount.length); | |
162 upperBound[sub]++; | |
163 } | |
164 | |
165 } | |
166 } | |
167 | |
168 } | |
169 //System.err.println("returning list"); | |
170 cris.returnList(ln); | |
171 //System.err.println("fetching list"); | |
172 ln=cris.nextList(); | |
173 reads=(ln!=null ? ln.list : null); | |
174 } | |
175 System.err.println("Finished reading"); | |
176 cris.returnList(ln); | |
177 System.err.println("Returned list"); | |
178 ReadWrite.closeStreams(cris); | |
179 System.err.println("Closed stream"); | |
180 } | |
181 | |
182 return upperBound; | |
183 } | |
184 | |
185 public static final int min(int x, int y){return x<y ? x : y;} | |
186 public static final int max(int x, int y){return x>y ? x : y;} | |
187 | |
188 public static final int BOUND_LEN=256; | |
189 | |
190 } |