jpayne@68
|
1 package bloom;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.Arrays;
|
jpayne@68
|
5
|
jpayne@68
|
6 import dna.AminoAcid;
|
jpayne@68
|
7 import fileIO.ReadWrite;
|
jpayne@68
|
8 import shared.Timer;
|
jpayne@68
|
9 import stream.ConcurrentGenericReadInputStream;
|
jpayne@68
|
10 import stream.FastqReadInputStream;
|
jpayne@68
|
11 import stream.Read;
|
jpayne@68
|
12 import structures.ListNum;
|
jpayne@68
|
13
|
jpayne@68
|
14 /**
|
jpayne@68
|
15 * @author Brian Bushnell
|
jpayne@68
|
16 * @date Jul 5, 2012
|
jpayne@68
|
17 *
|
jpayne@68
|
18 */
|
jpayne@68
|
19 public class TestLargeKmer {
|
jpayne@68
|
20
|
jpayne@68
|
21 public static void main(String args[]){
|
jpayne@68
|
22 Timer t=new Timer();
|
jpayne@68
|
23
|
jpayne@68
|
24 String fname1=args[0];
|
jpayne@68
|
25 String fname2=(args.length>4 || args[1].contains(".") ? args[1] : null);
|
jpayne@68
|
26 int k=Integer.parseInt(args[args.length-3]);
|
jpayne@68
|
27 int cbits=Integer.parseInt(args[args.length-2]);
|
jpayne@68
|
28 int k2=Integer.parseInt(args[args.length-1]);
|
jpayne@68
|
29
|
jpayne@68
|
30 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
|
jpayne@68
|
31 long[] counts2=countK2(fname1, fname2, k, counts, k2);
|
jpayne@68
|
32
|
jpayne@68
|
33 t.stop();
|
jpayne@68
|
34 System.out.println("Finished counting; time = "+t+"\n");
|
jpayne@68
|
35
|
jpayne@68
|
36 for(int i=0; i<counts2.length; i++){
|
jpayne@68
|
37 System.out.println(i+":\t"+counts2[i]);
|
jpayne@68
|
38 }
|
jpayne@68
|
39 }
|
jpayne@68
|
40
|
jpayne@68
|
41 public static long[] countK2(String fname1, String fname2, int k, int cbits, int k2){
|
jpayne@68
|
42 KCountArray2 counts=KmerCount3.countFastq(fname1, fname2, k, cbits);
|
jpayne@68
|
43 return countK2(fname1, fname2, k, counts, k2);
|
jpayne@68
|
44 }
|
jpayne@68
|
45
|
jpayne@68
|
46 public static long[] countK2(String fname1, String fname2, int k, KCountArray2 counts1, int k2){
|
jpayne@68
|
47 assert(k>=1 && k<20);
|
jpayne@68
|
48 final int kbits=2*k;
|
jpayne@68
|
49 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
|
jpayne@68
|
50 FastqReadInputStream fris1=new FastqReadInputStream(fname1, false);
|
jpayne@68
|
51 FastqReadInputStream fris2=(fname2==null ? null : new FastqReadInputStream(fname2, false));
|
jpayne@68
|
52 ConcurrentGenericReadInputStream cris=new ConcurrentGenericReadInputStream(fris1, fris2, KmerCount3.maxReads);
|
jpayne@68
|
53
|
jpayne@68
|
54 cris.start();
|
jpayne@68
|
55 System.err.println("Started cris");
|
jpayne@68
|
56 boolean paired=cris.paired();
|
jpayne@68
|
57
|
jpayne@68
|
58 long kmer=0; //current kmer
|
jpayne@68
|
59 int len=0; //distance since last contig start or ambiguous base
|
jpayne@68
|
60
|
jpayne@68
|
61
|
jpayne@68
|
62 final long[] upperBound=new long[BOUND_LEN]; //Lowest upper bound provable of kmer count
|
jpayne@68
|
63 final int[] ring=new int[k2-k+1];
|
jpayne@68
|
64 final int[] subcount=new int[BOUND_LEN];
|
jpayne@68
|
65 final int maxValue=subcount.length-1;
|
jpayne@68
|
66
|
jpayne@68
|
67 {
|
jpayne@68
|
68 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
69 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
70
|
jpayne@68
|
71 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
72 Read r=reads.get(0);
|
jpayne@68
|
73 assert(paired==(r.mate!=null));
|
jpayne@68
|
74 }
|
jpayne@68
|
75
|
jpayne@68
|
76 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
77 //System.err.println("reads.size()="+reads.size());
|
jpayne@68
|
78 for(Read r : reads){
|
jpayne@68
|
79
|
jpayne@68
|
80 len=0;
|
jpayne@68
|
81 kmer=0;
|
jpayne@68
|
82 Arrays.fill(subcount, 0);
|
jpayne@68
|
83
|
jpayne@68
|
84 byte[] bases=r.bases;
|
jpayne@68
|
85 byte[] quals=r.quality;
|
jpayne@68
|
86 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
87 byte b=bases[i];
|
jpayne@68
|
88 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
89
|
jpayne@68
|
90 int ringpos=i%ring.length;
|
jpayne@68
|
91 int old=ring[ringpos];
|
jpayne@68
|
92 int value=0;
|
jpayne@68
|
93
|
jpayne@68
|
94 if(x<0 || quals[i]<KmerCount3.minQuality){
|
jpayne@68
|
95 len=0;
|
jpayne@68
|
96 kmer=0;
|
jpayne@68
|
97 }else{
|
jpayne@68
|
98 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
99 len++;
|
jpayne@68
|
100
|
jpayne@68
|
101 if(len>=k){
|
jpayne@68
|
102 value=counts1.read(kmer);
|
jpayne@68
|
103 }
|
jpayne@68
|
104 }
|
jpayne@68
|
105 value=min(value, maxValue);
|
jpayne@68
|
106
|
jpayne@68
|
107 ring[ringpos]=value;
|
jpayne@68
|
108 subcount[value]++;
|
jpayne@68
|
109
|
jpayne@68
|
110 if(i>=ring.length){
|
jpayne@68
|
111 subcount[old]--;
|
jpayne@68
|
112 }
|
jpayne@68
|
113
|
jpayne@68
|
114 if(len>=k2){
|
jpayne@68
|
115 int sub=0;
|
jpayne@68
|
116 while(sub<subcount.length && subcount[sub]==0){sub++;}
|
jpayne@68
|
117 assert(sub<subcount.length);
|
jpayne@68
|
118 upperBound[sub]++;
|
jpayne@68
|
119 }
|
jpayne@68
|
120
|
jpayne@68
|
121 }
|
jpayne@68
|
122
|
jpayne@68
|
123 if(r.mate!=null){
|
jpayne@68
|
124 bases=r.mate.bases;
|
jpayne@68
|
125 quals=r.mate.quality;
|
jpayne@68
|
126
|
jpayne@68
|
127 len=0;
|
jpayne@68
|
128 kmer=0;
|
jpayne@68
|
129 Arrays.fill(subcount, 0);
|
jpayne@68
|
130 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
131 byte b=bases[i];
|
jpayne@68
|
132 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
133
|
jpayne@68
|
134 int ringpos=i%ring.length;
|
jpayne@68
|
135 int old=ring[ringpos];
|
jpayne@68
|
136 int value=0;
|
jpayne@68
|
137
|
jpayne@68
|
138 if(x<0 || quals[i]<KmerCount3.minQuality){
|
jpayne@68
|
139 len=0;
|
jpayne@68
|
140 kmer=0;
|
jpayne@68
|
141 }else{
|
jpayne@68
|
142 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
143 len++;
|
jpayne@68
|
144
|
jpayne@68
|
145 if(len>=k){
|
jpayne@68
|
146 value=counts1.read(kmer);
|
jpayne@68
|
147 }
|
jpayne@68
|
148 }
|
jpayne@68
|
149 value=min(value, maxValue);
|
jpayne@68
|
150
|
jpayne@68
|
151 ring[ringpos]=value;
|
jpayne@68
|
152 subcount[value]++;
|
jpayne@68
|
153
|
jpayne@68
|
154 if(i>=ring.length){
|
jpayne@68
|
155 subcount[old]--;
|
jpayne@68
|
156 }
|
jpayne@68
|
157
|
jpayne@68
|
158 if(len>=k2){
|
jpayne@68
|
159 int sub=0;
|
jpayne@68
|
160 while(sub<subcount.length && subcount[sub]==0){sub++;}
|
jpayne@68
|
161 assert(sub<subcount.length);
|
jpayne@68
|
162 upperBound[sub]++;
|
jpayne@68
|
163 }
|
jpayne@68
|
164
|
jpayne@68
|
165 }
|
jpayne@68
|
166 }
|
jpayne@68
|
167
|
jpayne@68
|
168 }
|
jpayne@68
|
169 //System.err.println("returning list");
|
jpayne@68
|
170 cris.returnList(ln);
|
jpayne@68
|
171 //System.err.println("fetching list");
|
jpayne@68
|
172 ln=cris.nextList();
|
jpayne@68
|
173 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
174 }
|
jpayne@68
|
175 System.err.println("Finished reading");
|
jpayne@68
|
176 cris.returnList(ln);
|
jpayne@68
|
177 System.err.println("Returned list");
|
jpayne@68
|
178 ReadWrite.closeStreams(cris);
|
jpayne@68
|
179 System.err.println("Closed stream");
|
jpayne@68
|
180 }
|
jpayne@68
|
181
|
jpayne@68
|
182 return upperBound;
|
jpayne@68
|
183 }
|
jpayne@68
|
184
|
jpayne@68
|
185 public static final int min(int x, int y){return x<y ? x : y;}
|
jpayne@68
|
186 public static final int max(int x, int y){return x>y ? x : y;}
|
jpayne@68
|
187
|
jpayne@68
|
188 public static final int BOUND_LEN=256;
|
jpayne@68
|
189
|
jpayne@68
|
190 }
|