Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KmerCount3.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Locale; | |
5 | |
6 import dna.AminoAcid; | |
7 import fileIO.FileFormat; | |
8 import fileIO.ReadWrite; | |
9 import shared.Timer; | |
10 import stream.ConcurrentReadInputStream; | |
11 import stream.FastaReadInputStream; | |
12 import stream.Read; | |
13 import structures.ListNum; | |
14 | |
15 /** | |
16 * @author Brian Bushnell | |
17 * @date Jul 5, 2012 | |
18 * | |
19 */ | |
20 public class KmerCount3 extends KmerCountAbstract { | |
21 | |
22 public static void main(String[] args){ | |
23 | |
24 Timer t=new Timer(); | |
25 | |
26 String fname1=args[0]; | |
27 String fname2=(args.length>3 || args[1].contains(".") ? args[1] : null); | |
28 int k=Integer.parseInt(args[args.length-2]); | |
29 int cbits=Integer.parseInt(args[args.length-1]); | |
30 | |
31 KCountArray2 count=null; | |
32 | |
33 if(fileIO.FileFormat.hasFastaExtension(fname1)){ | |
34 FastaReadInputStream.MIN_READ_LEN=k; | |
35 } | |
36 count=countFastq(fname1, fname2, k, cbits); | |
37 | |
38 | |
39 t.stop(); | |
40 System.out.println("Finished counting; time = "+t); | |
41 | |
42 long[] freq=count.transformToFrequency(); | |
43 | |
44 // System.out.println(count+"\n"); | |
45 // System.out.println(Arrays.toString(freq)+"\n"); | |
46 | |
47 long sum=sum(freq); | |
48 System.out.println("Kmer fraction:"); | |
49 int lim1=8, lim2=16; | |
50 for(int i=0; i<lim1; i++){ | |
51 String prefix=i+""; | |
52 while(prefix.length()<8){prefix=prefix+" ";} | |
53 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*freq[i]/(double)sum))+"\t"+freq[i]); | |
54 } | |
55 while(lim1<=freq.length){ | |
56 int x=0; | |
57 for(int i=lim1; i<lim2; i++){ | |
58 x+=freq[i]; | |
59 } | |
60 String prefix=lim1+"-"+(lim2-1); | |
61 if(lim2>=freq.length){prefix=lim1+"+";} | |
62 while(prefix.length()<8){prefix=prefix+" ";} | |
63 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum))+"\t"+x); | |
64 lim1*=2; | |
65 lim2=min(lim2*2, freq.length); | |
66 } | |
67 } | |
68 | |
69 public static KCountArray2 countFastq(String reads1, String reads2, int k, int cbits){ | |
70 assert(k>=1 && k<20); | |
71 final int kbits=2*k; | |
72 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits)); | |
73 final long cells=mask+1; | |
74 if(verbose){System.err.println("k="+k+", kbits="+kbits+", cells="+cells+", mask="+Long.toHexString(mask));} | |
75 final KCountArray2 count=new KCountArray2(cells, cbits); | |
76 | |
77 final ConcurrentReadInputStream cris; | |
78 { | |
79 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true); | |
80 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true); | |
81 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2); | |
82 cris.start(); //4567 | |
83 } | |
84 | |
85 assert(cris!=null) : reads1; | |
86 System.err.println("Started cris"); | |
87 boolean paired=cris.paired(); | |
88 System.err.println("Paired: "+paired); | |
89 | |
90 long kmer=0; //current kmer | |
91 int len=0; //distance since last contig start or ambiguous base | |
92 | |
93 { | |
94 ListNum<Read> ln=cris.nextList(); | |
95 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
96 | |
97 if(reads!=null && !reads.isEmpty()){ | |
98 Read r=reads.get(0); | |
99 assert(paired==(r.mate!=null)); | |
100 } | |
101 | |
102 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning | |
103 //System.err.println("reads.size()="+reads.size()); | |
104 for(Read r : reads){ | |
105 readsProcessed++; | |
106 | |
107 len=0; | |
108 kmer=0; | |
109 byte[] bases=r.bases; | |
110 byte[] quals=r.quality; | |
111 for(int i=0; i<bases.length; i++){ | |
112 byte b=bases[i]; | |
113 int x=AminoAcid.baseToNumber[b]; | |
114 if(x<0 || quals[i]<minQuality){ | |
115 len=0; | |
116 kmer=0; | |
117 }else{ | |
118 kmer=((kmer<<2)|x)&mask; | |
119 len++; | |
120 if(len>=k){ | |
121 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer)); | |
122 count.increment(kmer, 1); | |
123 // System.out.println(" -> "+count.read(kmer)); | |
124 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]); | |
125 // array[(int)kmer]++; | |
126 // System.out.println(" -> "+array[(int)kmer]+"\n"); | |
127 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3); | |
128 } | |
129 } | |
130 } | |
131 | |
132 if(r.mate!=null){ | |
133 len=0; | |
134 kmer=0; | |
135 bases=r.mate.bases; | |
136 quals=r.mate.quality; | |
137 for(int i=0; i<bases.length; i++){ | |
138 byte b=bases[i]; | |
139 int x=AminoAcid.baseToNumber[b]; | |
140 if(x<0 || quals[i]<minQuality){ | |
141 len=0; | |
142 kmer=0; | |
143 }else{ | |
144 kmer=((kmer<<2)|x)&mask; | |
145 len++; | |
146 if(len>=k){ | |
147 count.increment(kmer, 1); | |
148 } | |
149 } | |
150 } | |
151 } | |
152 | |
153 } | |
154 //System.err.println("returning list"); | |
155 cris.returnList(ln); | |
156 //System.err.println("fetching list"); | |
157 ln=cris.nextList(); | |
158 reads=(ln!=null ? ln.list : null); | |
159 } | |
160 System.err.println("Finished reading"); | |
161 cris.returnList(ln); | |
162 System.err.println("Returned list"); | |
163 ReadWrite.closeStream(cris); | |
164 System.err.println("Closed stream"); | |
165 System.err.println("Processed "+readsProcessed+" reads."); | |
166 } | |
167 | |
168 return count; | |
169 } | |
170 | |
171 } |