annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KmerCount3.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package bloom;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.Locale;
jpayne@68 5
jpayne@68 6 import dna.AminoAcid;
jpayne@68 7 import fileIO.FileFormat;
jpayne@68 8 import fileIO.ReadWrite;
jpayne@68 9 import shared.Timer;
jpayne@68 10 import stream.ConcurrentReadInputStream;
jpayne@68 11 import stream.FastaReadInputStream;
jpayne@68 12 import stream.Read;
jpayne@68 13 import structures.ListNum;
jpayne@68 14
jpayne@68 15 /**
jpayne@68 16 * @author Brian Bushnell
jpayne@68 17 * @date Jul 5, 2012
jpayne@68 18 *
jpayne@68 19 */
jpayne@68 20 public class KmerCount3 extends KmerCountAbstract {
jpayne@68 21
jpayne@68 22 public static void main(String[] args){
jpayne@68 23
jpayne@68 24 Timer t=new Timer();
jpayne@68 25
jpayne@68 26 String fname1=args[0];
jpayne@68 27 String fname2=(args.length>3 || args[1].contains(".") ? args[1] : null);
jpayne@68 28 int k=Integer.parseInt(args[args.length-2]);
jpayne@68 29 int cbits=Integer.parseInt(args[args.length-1]);
jpayne@68 30
jpayne@68 31 KCountArray2 count=null;
jpayne@68 32
jpayne@68 33 if(fileIO.FileFormat.hasFastaExtension(fname1)){
jpayne@68 34 FastaReadInputStream.MIN_READ_LEN=k;
jpayne@68 35 }
jpayne@68 36 count=countFastq(fname1, fname2, k, cbits);
jpayne@68 37
jpayne@68 38
jpayne@68 39 t.stop();
jpayne@68 40 System.out.println("Finished counting; time = "+t);
jpayne@68 41
jpayne@68 42 long[] freq=count.transformToFrequency();
jpayne@68 43
jpayne@68 44 // System.out.println(count+"\n");
jpayne@68 45 // System.out.println(Arrays.toString(freq)+"\n");
jpayne@68 46
jpayne@68 47 long sum=sum(freq);
jpayne@68 48 System.out.println("Kmer fraction:");
jpayne@68 49 int lim1=8, lim2=16;
jpayne@68 50 for(int i=0; i<lim1; i++){
jpayne@68 51 String prefix=i+"";
jpayne@68 52 while(prefix.length()<8){prefix=prefix+" ";}
jpayne@68 53 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*freq[i]/(double)sum))+"\t"+freq[i]);
jpayne@68 54 }
jpayne@68 55 while(lim1<=freq.length){
jpayne@68 56 int x=0;
jpayne@68 57 for(int i=lim1; i<lim2; i++){
jpayne@68 58 x+=freq[i];
jpayne@68 59 }
jpayne@68 60 String prefix=lim1+"-"+(lim2-1);
jpayne@68 61 if(lim2>=freq.length){prefix=lim1+"+";}
jpayne@68 62 while(prefix.length()<8){prefix=prefix+" ";}
jpayne@68 63 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum))+"\t"+x);
jpayne@68 64 lim1*=2;
jpayne@68 65 lim2=min(lim2*2, freq.length);
jpayne@68 66 }
jpayne@68 67 }
jpayne@68 68
jpayne@68 69 public static KCountArray2 countFastq(String reads1, String reads2, int k, int cbits){
jpayne@68 70 assert(k>=1 && k<20);
jpayne@68 71 final int kbits=2*k;
jpayne@68 72 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
jpayne@68 73 final long cells=mask+1;
jpayne@68 74 if(verbose){System.err.println("k="+k+", kbits="+kbits+", cells="+cells+", mask="+Long.toHexString(mask));}
jpayne@68 75 final KCountArray2 count=new KCountArray2(cells, cbits);
jpayne@68 76
jpayne@68 77 final ConcurrentReadInputStream cris;
jpayne@68 78 {
jpayne@68 79 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
jpayne@68 80 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
jpayne@68 81 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
jpayne@68 82 cris.start(); //4567
jpayne@68 83 }
jpayne@68 84
jpayne@68 85 assert(cris!=null) : reads1;
jpayne@68 86 System.err.println("Started cris");
jpayne@68 87 boolean paired=cris.paired();
jpayne@68 88 System.err.println("Paired: "+paired);
jpayne@68 89
jpayne@68 90 long kmer=0; //current kmer
jpayne@68 91 int len=0; //distance since last contig start or ambiguous base
jpayne@68 92
jpayne@68 93 {
jpayne@68 94 ListNum<Read> ln=cris.nextList();
jpayne@68 95 ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 96
jpayne@68 97 if(reads!=null && !reads.isEmpty()){
jpayne@68 98 Read r=reads.get(0);
jpayne@68 99 assert(paired==(r.mate!=null));
jpayne@68 100 }
jpayne@68 101
jpayne@68 102 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
jpayne@68 103 //System.err.println("reads.size()="+reads.size());
jpayne@68 104 for(Read r : reads){
jpayne@68 105 readsProcessed++;
jpayne@68 106
jpayne@68 107 len=0;
jpayne@68 108 kmer=0;
jpayne@68 109 byte[] bases=r.bases;
jpayne@68 110 byte[] quals=r.quality;
jpayne@68 111 for(int i=0; i<bases.length; i++){
jpayne@68 112 byte b=bases[i];
jpayne@68 113 int x=AminoAcid.baseToNumber[b];
jpayne@68 114 if(x<0 || quals[i]<minQuality){
jpayne@68 115 len=0;
jpayne@68 116 kmer=0;
jpayne@68 117 }else{
jpayne@68 118 kmer=((kmer<<2)|x)&mask;
jpayne@68 119 len++;
jpayne@68 120 if(len>=k){
jpayne@68 121 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer));
jpayne@68 122 count.increment(kmer, 1);
jpayne@68 123 // System.out.println(" -> "+count.read(kmer));
jpayne@68 124 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]);
jpayne@68 125 // array[(int)kmer]++;
jpayne@68 126 // System.out.println(" -> "+array[(int)kmer]+"\n");
jpayne@68 127 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3);
jpayne@68 128 }
jpayne@68 129 }
jpayne@68 130 }
jpayne@68 131
jpayne@68 132 if(r.mate!=null){
jpayne@68 133 len=0;
jpayne@68 134 kmer=0;
jpayne@68 135 bases=r.mate.bases;
jpayne@68 136 quals=r.mate.quality;
jpayne@68 137 for(int i=0; i<bases.length; i++){
jpayne@68 138 byte b=bases[i];
jpayne@68 139 int x=AminoAcid.baseToNumber[b];
jpayne@68 140 if(x<0 || quals[i]<minQuality){
jpayne@68 141 len=0;
jpayne@68 142 kmer=0;
jpayne@68 143 }else{
jpayne@68 144 kmer=((kmer<<2)|x)&mask;
jpayne@68 145 len++;
jpayne@68 146 if(len>=k){
jpayne@68 147 count.increment(kmer, 1);
jpayne@68 148 }
jpayne@68 149 }
jpayne@68 150 }
jpayne@68 151 }
jpayne@68 152
jpayne@68 153 }
jpayne@68 154 //System.err.println("returning list");
jpayne@68 155 cris.returnList(ln);
jpayne@68 156 //System.err.println("fetching list");
jpayne@68 157 ln=cris.nextList();
jpayne@68 158 reads=(ln!=null ? ln.list : null);
jpayne@68 159 }
jpayne@68 160 System.err.println("Finished reading");
jpayne@68 161 cris.returnList(ln);
jpayne@68 162 System.err.println("Returned list");
jpayne@68 163 ReadWrite.closeStream(cris);
jpayne@68 164 System.err.println("Closed stream");
jpayne@68 165 System.err.println("Processed "+readsProcessed+" reads.");
jpayne@68 166 }
jpayne@68 167
jpayne@68 168 return count;
jpayne@68 169 }
jpayne@68 170
jpayne@68 171 }