jpayne@68: package prok; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.BitSet; jpayne@68: import java.util.HashMap; jpayne@68: jpayne@68: import aligner.SingleStateAlignerFlat2; jpayne@68: import dna.AminoAcid; jpayne@68: import fileIO.FileFormat; jpayne@68: import gff.GffLine; jpayne@68: import shared.KillSwitch; jpayne@68: import shared.Shared; jpayne@68: import shared.Tools; jpayne@68: import stream.Read; jpayne@68: import stream.ReadInputStream; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.IntList; jpayne@68: jpayne@68: /** jpayne@68: * This class is designed to store kmer frequencies related to gene jpayne@68: * starts, stops, and interiors. It can be loaded from a pgm file. jpayne@68: * jpayne@68: * It's possible to use multiple GeneModels; for example, one for jpayne@68: * each of several GC ranges or clades. jpayne@68: * @author Brian Bushnell jpayne@68: * @date Sep 24, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class GeneModel extends ProkObject { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public GeneModel(boolean fill){ jpayne@68: if(fill){ jpayne@68: fillContainers(); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: void fillContainers(){ jpayne@68: statsCDS.setInner(kInnerCDS, 3); jpayne@68: statsCDS.setStart(kStartCDS, startFrames, startLeftOffset); jpayne@68: statsCDS.setStop(kStopCDS, stopFrames, stopLeftOffset); jpayne@68: jpayne@68: for(int i=0; i scafList; jpayne@68: {//Scoped to save memory jpayne@68: ArrayList reads=ReadInputStream.toReads(fnaFile, maxReads); jpayne@68: readsProcessed+=reads.size(); jpayne@68: scafList=new ArrayList(reads.size()); jpayne@68: for(Read r : reads){ jpayne@68: basesProcessed+=r.length(); jpayne@68: scafList.add(new ScafData(r)); jpayne@68: } jpayne@68: } jpayne@68: {//Scoped to save memory jpayne@68: ArrayList[] allGffLines=GffLine.loadGffFileByType(gffFile, "CDS,rRNA,tRNA", true); jpayne@68: ArrayList cds=allGffLines[0]; jpayne@68: ArrayList rrna=allGffLines[1]; jpayne@68: ArrayList trna=allGffLines[2]; jpayne@68: genesProcessed+=cds.size(); jpayne@68: genesProcessed+=(rrna==null ? 0 : rrna.size()); jpayne@68: genesProcessed+=(trna==null ? 0 : trna.size()); jpayne@68: jpayne@68: HashMap scafMap=makeScafMap(scafList); jpayne@68: fillScafDataCDS(cds, scafMap); jpayne@68: fillScafDataRNA(rrna, scafMap); jpayne@68: fillScafDataRNA(trna, scafMap); jpayne@68: } jpayne@68: jpayne@68: countBases(scafList); jpayne@68: if(PROCESS_PLUS_STRAND){ jpayne@68: processStrand(scafList, Shared.PLUS); jpayne@68: } jpayne@68: if(PROCESS_MINUS_STRAND){ jpayne@68: for(ScafData sd : scafList){ jpayne@68: sd.clear(); jpayne@68: sd.reverseComplement(); jpayne@68: } jpayne@68: processStrand(scafList, Shared.MINUS); jpayne@68: for(ScafData sd : scafList){ jpayne@68: sd.clear(); jpayne@68: sd.reverseComplement(); jpayne@68: } jpayne@68: } jpayne@68: return false; jpayne@68: } jpayne@68: jpayne@68: public void add(GeneModel pgm){ jpayne@68: for(int i=0; i makeScafMap(ArrayList scafList){ jpayne@68: HashMap scafMap=new HashMap(scafList.size()*3); jpayne@68: for(ScafData sd : scafList){scafMap.put(sd.name, sd);} jpayne@68: for(ScafData sd : scafList){ jpayne@68: String name=sd.name; jpayne@68: int idx=name.indexOf(' '); jpayne@68: if(idx>=0){ jpayne@68: String prefix=name.substring(0, idx); jpayne@68: if(scafMap.containsKey(prefix)){ jpayne@68: assert(false) : "Duplicate degenerate name: '"+name+"', '"+prefix+"'"; jpayne@68: }else{ jpayne@68: scafMap.put(prefix, sd); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: return scafMap; jpayne@68: } jpayne@68: jpayne@68: public void fillScafDataCDS(ArrayList cdsLines, HashMap scafMap){ jpayne@68: if(!callCDS){return;} jpayne@68: for(GffLine gline : cdsLines){ jpayne@68: ScafData sd=scafMap.get(gline.seqid); jpayne@68: assert(sd!=null) : "Can't find scaffold for GffLine "+gline.seqid; jpayne@68: sd.addCDS(gline); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public void fillScafDataRNA(ArrayList rnaLines, HashMap scafMap){ jpayne@68: for(GffLine gline : rnaLines){ jpayne@68: ScafData sd=scafMap.get(gline.seqid); jpayne@68: assert(sd!=null) : "Can't find scaffold for GffLine "+gline.seqid; jpayne@68: if(processType(gline.prokType())){ jpayne@68: sd.addRNA(gline); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public void processStrand(ArrayList scafList, int strand){ jpayne@68: for(ScafData sd : scafList){ jpayne@68: processCDS(sd, strand); jpayne@68: processRNA(sd, strand); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private void countBases(ArrayList scafList){ jpayne@68: for(ScafData sd : scafList){ jpayne@68: countBases(sd.bases); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void countBases(byte[] bases){ jpayne@68: for(byte b : bases){ jpayne@68: int x=AminoAcid.baseToNumberACGTother[b]; jpayne@68: baseCounts[x]++; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Finding Codons ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static void findStopCodons(byte[] bases, IntList list, BitSet valid){ jpayne@68: final int k=3; jpayne@68: final int mask=~((-1)<<(2*k)); jpayne@68: int kmer=0; jpayne@68: int len=0; jpayne@68: jpayne@68: for(int i=0; i=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: int point=i;//End of the stop codon jpayne@68: if(isStopCodon(kmer) && !valid.get(point)){ jpayne@68: list.add(point); jpayne@68: valid.set(point); jpayne@68: } jpayne@68: } jpayne@68: }else{len=0;} jpayne@68: } jpayne@68: jpayne@68: for(int i=50; i=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: int point=i-k+1;//Start of the start codon jpayne@68: if(isStartCodon(kmer) && !valid.get(point)){ jpayne@68: list.add(point); jpayne@68: valid.set(point); jpayne@68: } jpayne@68: } jpayne@68: }else{len=0;} jpayne@68: } jpayne@68: jpayne@68: for(int i=50; i=sd.length()){return;} jpayne@68: // assert(start=0) : gline.toString()+"\n"+sd.length()+"\n"+sd.name; jpayne@68: markFrames(start, stop, frames, kInnerCDS); jpayne@68: sd.starts.add(start); jpayne@68: sd.stops.add(stop); jpayne@68: // assert(gline.start!=337) : gline+"\n"+start+", "+stop; jpayne@68: } jpayne@68: jpayne@68: private boolean processRnaLine(final GffLine gline, final ScafData sd, final int type){ jpayne@68: final int strand=gline.strand; jpayne@68: assert(strand==sd.strand()); jpayne@68: final byte[] frames=sd.frames; jpayne@68: int start=gline.start-1, stop=gline.stop-1; jpayne@68: if(start<0 || stop>=sd.length()){return false;} jpayne@68: assert(start<=stop); jpayne@68: if(strand==Shared.MINUS){ jpayne@68: int x=sd.length()-start-1; jpayne@68: int y=sd.length()-stop-1; jpayne@68: start=y; jpayne@68: stop=x; jpayne@68: } jpayne@68: jpayne@68: if(AnalyzeGenes.alignRibo){ jpayne@68: // byte[] seq=sd.fetch(start, stop); jpayne@68: Read[] consensusReads=ProkObject.consensusReads(type); jpayne@68: byte[] universal=(consensusReads!=null && consensusReads.length>0 ? consensusReads[0].bases : null); jpayne@68: float minIdentity=ProkObject.minID(type); jpayne@68: if(universal!=null){ jpayne@68: int[] coords=KillSwitch.allocInt1D(2); jpayne@68: final int a=Tools.max(0, start-(AnalyzeGenes.adjustEndpoints ? 200 : 50)); jpayne@68: final int b=Tools.min(sd.bases.length-1, stop+(AnalyzeGenes.adjustEndpoints ? 200 : 50)); jpayne@68: float id1=align(universal, sd.bases, a, b, minIdentity, coords); jpayne@68: final int rstart=coords[0], rstop=coords[1]; jpayne@68: // assert(false) : rstart+", "+rstop+", "+(rstop-rstart+1)+", "+start+", "+stop; jpayne@68: if(id1startSlop){ jpayne@68: // System.err.println("rstart:\t"+start+" -> "+rstart); jpayne@68: start=rstart; jpayne@68: } jpayne@68: if(Tools.absdif(stop, rstop)>stopSlop){ jpayne@68: // System.err.println("rstop: \t"+stop+" -> "+rstop); jpayne@68: stop=rstop; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: StatsContainer sc=allContainers[type]; jpayne@68: sc.start.processPoint(sd.bases, start, 1); jpayne@68: sc.stop.processPoint(sd.bases, stop, 1); jpayne@68: assert(sc!=statsCDS); jpayne@68: jpayne@68: assert(start>=0) : gline.toString()+"\n"+sd.length()+"\n"+sd.name; jpayne@68: final byte flag=typeToFlag(type); jpayne@68: for(int i=start+kInnerRNA-1; i<=stop; i++){ jpayne@68: frames[i]|=flag; jpayne@68: } jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: private float align(byte[] query, byte[] ref, int start, int stop, float minIdentity, int[] coords){ jpayne@68: // final int a=0, b=ref.length-1; jpayne@68: SingleStateAlignerFlat2 ssa=GeneCaller.getSSA(); jpayne@68: final int minScore=ssa.minScoreByIdentity(query.length, minIdentity); jpayne@68: int[] max=ssa.fillUnlimited(query, ref, start, stop, minScore); jpayne@68: if(max==null){return 0;} jpayne@68: jpayne@68: final int rows=max[0]; jpayne@68: final int maxCol=max[1]; jpayne@68: final int maxState=max[2]; jpayne@68: // final int maxScore=max[3]; jpayne@68: // final int maxStart=max[4]; jpayne@68: jpayne@68: //returns {score, bestRefStart, bestRefStop} jpayne@68: //padded: {score, bestRefStart, bestRefStop, padLeft, padRight}; jpayne@68: final int[] score=ssa.score(query, ref, start, stop, rows, maxCol, maxState); jpayne@68: final int rstart=Tools.max(score[1], 0); jpayne@68: final int rstop=Tools.min(score[2], ref.length-1); jpayne@68: if(coords!=null){ jpayne@68: coords[0]=rstart; jpayne@68: coords[1]=rstop; jpayne@68: } jpayne@68: final float id=ssa.tracebackIdentity(query, ref, start, stop, rows, maxCol, maxState, null); jpayne@68: return id; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Each frame byte has a bit marked for valid coding frames. jpayne@68: * For example, if frames[23]=0b100, then base 23 is the last base in a kmer starting at the 3rd base in a codon. jpayne@68: * If frames[23]=0, then no coding kmer end at that location on this strand. jpayne@68: * @param start jpayne@68: * @param stop jpayne@68: * @param frames jpayne@68: * @param k jpayne@68: */ jpayne@68: private static void markFrames(int start, int stop, byte[] frames, int k){ jpayne@68: assert(start<=stop) : start+", "+stop; jpayne@68: for(int i=start+k-1, frameBit=(1<<((k-1)%3)), max=Tools.min(stop-3, frames.length-1); i<=max; i++){ jpayne@68: frames[i]=(byte)(frames[i]|frameBit); jpayne@68: frameBit<<=1; jpayne@68: if(frameBit>4){frameBit=1;} jpayne@68: } jpayne@68: // assert(false) : Arrays.toString(Arrays.copyOfRange(frames, start, start+20))+"\n"+start; //This is correct jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Counting Kmers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private void processCDS(ScafData sd, int strand){ jpayne@68: if(!callCDS){return;} jpayne@68: ArrayList glines=sd.cdsLines[strand]; jpayne@68: for(GffLine gline : glines){ jpayne@68: assert(gline.strand==strand); jpayne@68: processGene(gline, sd); jpayne@68: statsCDS.addLength(gline.length()); jpayne@68: } jpayne@68: jpayne@68: statsCDS.inner.processCDSFrames(sd.bases, sd.frames); jpayne@68: BitSet startSet=processEnds(sd.bases, statsCDS.start, sd.starts, 1); jpayne@68: BitSet stopSet=processEnds(sd.bases, statsCDS.stop, sd.stops, 1); jpayne@68: // outstream.println("Processed "+sd.starts.size+" valid starts and "+sd.stops.size+" stops."); jpayne@68: sd.clear(); jpayne@68: findStartCodons(sd.bases, sd.starts, startSet); jpayne@68: findStopCodons(sd.bases, sd.stops, stopSet); jpayne@68: // outstream.println("Found "+sd.starts.size+" invalid starts and "+sd.stops.size+" stops."); jpayne@68: processEnds(sd.bases, statsCDS.start, sd.starts, 0); jpayne@68: processEnds(sd.bases, statsCDS.stop, sd.stops, 0); jpayne@68: } jpayne@68: jpayne@68: private static int getGlineType(GffLine gline, ScafData sd){ jpayne@68: if(!gline.inbounds(sd.length()) || gline.partial()){return -1;} jpayne@68: jpayne@68: final int length=gline.length(); jpayne@68: final int type=gline.prokType(); jpayne@68: if(type<0){ jpayne@68: return type; jpayne@68: }else if(type==CDS){ jpayne@68: return type; jpayne@68: }else if(type==tRNA && length>=40 && length<=120){ jpayne@68: return type; jpayne@68: }else if(type==r16S && length>=1440 && length<=1620){ jpayne@68: return type; jpayne@68: }else if(type==r23S && length>=2720 && length<=3170){ jpayne@68: return type; jpayne@68: }else if(type==r5S && length>=90 && length<=150){ jpayne@68: return type; jpayne@68: }else if(type==r18S && length>=1400 && length<=2000){ //TODO: Check length range jpayne@68: return type; jpayne@68: } jpayne@68: return -1; jpayne@68: } jpayne@68: jpayne@68: private void processRNA(ScafData sd, int strand){ jpayne@68: sd.clear(); jpayne@68: ArrayList lines=sd.rnaLines[strand]; jpayne@68: for(GffLine gline : lines){ jpayne@68: assert(gline.strand==strand); jpayne@68: final int type=getGlineType(gline, sd); jpayne@68: if(type>0){ jpayne@68: StatsContainer sc=allContainers[type]; jpayne@68: sc.addLength(gline.length()); jpayne@68: processRnaLine(gline, sd, type); jpayne@68: } jpayne@68: } jpayne@68: processRnaInner(sd); jpayne@68: processRnaEnds(sd); jpayne@68: } jpayne@68: jpayne@68: void processRnaInner(ScafData sd){ jpayne@68: byte[] bases=sd.bases; jpayne@68: byte[] frames=sd.frames; jpayne@68: final int k=kInnerRNA;//TODO: Note! This is linked to a single static variable for all RNAs. jpayne@68: final int mask=~((-1)<<(2*k)); jpayne@68: int kmer=0; jpayne@68: int len=0; jpayne@68: jpayne@68: for(int i=0; i=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: int vf=frames[i]; jpayne@68: for(int type=0; type<5; type++){ jpayne@68: int valid=vf&1; jpayne@68: rnaContainers[type].inner.add(kmer, 0, valid); jpayne@68: vf=(vf>>1); jpayne@68: } jpayne@68: } jpayne@68: }else{len=0;} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: void processRnaEnds(ScafData sd){ jpayne@68: byte[] bases=sd.bases; jpayne@68: jpayne@68: final int k=stats16S.start.k; jpayne@68: final int kMax=stats16S.start.kMax; jpayne@68: final int mask=stats16S.start.mask; jpayne@68: final long[] counts=new long[kMax];//TODO: Slow jpayne@68: jpayne@68: int kmer=0; jpayne@68: int len=0; jpayne@68: jpayne@68: for(int i=0; i=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: counts[kmer]++; jpayne@68: } jpayne@68: }else{len=0;} jpayne@68: } jpayne@68: for(StatsContainer sc : rnaContainers){ jpayne@68: FrameStats fs=sc.start; jpayne@68: for(long[] array : fs.countsFalse){ jpayne@68: Tools.add(array, counts); jpayne@68: } jpayne@68: fs=sc.stop; jpayne@68: for(long[] array : fs.countsFalse){ jpayne@68: Tools.add(array, counts); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private static BitSet processEnds(byte[] bases, FrameStats stats, IntList list, int valid){ jpayne@68: BitSet points=new BitSet(bases.length); jpayne@68: for(int i=0; i0){f=(f+0.0005f*ss);} //Does not seem to help; needs more study. jpayne@68: // return f; jpayne@68: // } jpayne@68: // jpayne@68: // //Assumes bases are in the correct strand jpayne@68: // public float calcStopScore(int stop, byte[] bases){ jpayne@68: // float f=scorePoint(stop, bases, stopStats); jpayne@68: // return f; jpayne@68: // } jpayne@68: // jpayne@68: // //Assumes bases are in the correct strand jpayne@68: // public float calcRnaStartScore(int start, int stop, byte[] bases){ jpayne@68: // float f=scorePoint(start, bases, rrnaStartStats); jpayne@68: // return f; jpayne@68: // } jpayne@68: // jpayne@68: // //Assumes bases are in the correct strand jpayne@68: // public float calcRnaStopScore(int stop, byte[] bases){ jpayne@68: // float f=scorePoint(stop, bases, rrnaStopStats); jpayne@68: // return f; jpayne@68: // } jpayne@68: jpayne@68: // public static float calcKmerScore(int start, int stop, int startFrame, byte[] bases, FrameStats stats){ jpayne@68: // jpayne@68: // assert(stats.frames==3); jpayne@68: // final int k=stats.k; jpayne@68: // final int mask=~((-1)<<(2*k)); jpayne@68: // jpayne@68: // int kmer=0; jpayne@68: // int len=0; jpayne@68: // float score=0; jpayne@68: // int numKmers=0; jpayne@68: // jpayne@68: // for(int pos=start, currentFrame=startFrame; pos=0){ jpayne@68: // kmer=((kmer<<2)|x)&mask; jpayne@68: // len++; jpayne@68: // if(len>=k){ jpayne@68: // float prob=stats.probs[currentFrame][kmer]; jpayne@68: // float dif=prob-0.99f;//Prob above 1 is more likely than average jpayne@68: // score+=dif; jpayne@68: // numKmers++; jpayne@68: // } jpayne@68: // }else{ jpayne@68: // len=0; jpayne@68: // kmer=0; jpayne@68: // } jpayne@68: // jpayne@68: // currentFrame++; jpayne@68: // if(currentFrame>2){currentFrame=0;} jpayne@68: // } jpayne@68: // return score/Tools.max(1f, numKmers); jpayne@68: // } jpayne@68: // jpayne@68: // /** jpayne@68: // * TODO jpayne@68: // * Evaluate the relative difference between left and right frequencies. jpayne@68: // * The purpose is to find locations where the left side looks noncoding and the right side looks coding. jpayne@68: // * Does not currently yield useful results. jpayne@68: // */ jpayne@68: // public static float scoreStart2(int point, byte[] bases, int stop, FrameStats stats){ jpayne@68: // final int k=stats.k; jpayne@68: // jpayne@68: // int start=point-45; jpayne@68: // if(start<0 || stop>bases.length){return 0.5f;} jpayne@68: // jpayne@68: // float left=calcKmerScore(start, Tools.min(point+k-2, bases.length), 0, bases, stats); jpayne@68: // float right=calcKmerScore(point, stop-3, 0, bases, stats); jpayne@68: // return right-left; //High numbers are likely to be starts; non-starts should be near 0. jpayne@68: // } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- toString ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: @Override jpayne@68: public String toString(){ jpayne@68: return appendTo(new ByteBuilder()).toString(); jpayne@68: } jpayne@68: jpayne@68: public ByteBuilder appendTo(ByteBuilder bb){ jpayne@68: jpayne@68: // Collections.sort(fnames); jpayne@68: taxIds.sort(); jpayne@68: jpayne@68: bb.append("#BBMap "+Shared.BBMAP_VERSION_STRING+" Prokaryotic Gene Model\n"); jpayne@68: bb.append("#files"); jpayne@68: bb.tab().append(numFiles); jpayne@68: // if(fnames.size()>5){ jpayne@68: // bb.tab().append(fnames.size()); jpayne@68: // }else{ jpayne@68: // for(String fname : fnames){ jpayne@68: // bb.tab().append(fname); jpayne@68: // } jpayne@68: // } jpayne@68: bb.nl(); jpayne@68: bb.append("#taxIDs"); jpayne@68: for(int i=0; i5) : allContainers.length; jpayne@68: jpayne@68: return bb; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Stats ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public final StatsContainer statsCDS=new StatsContainer(CDS); jpayne@68: public final StatsContainer statstRNA=new StatsContainer(tRNA); jpayne@68: public final StatsContainer stats16S=new StatsContainer(r16S); jpayne@68: public final StatsContainer stats23S=new StatsContainer(r23S); jpayne@68: public final StatsContainer stats5S=new StatsContainer(r5S); jpayne@68: public final StatsContainer stats18S=new StatsContainer(r18S); jpayne@68: jpayne@68: final StatsContainer[] rnaContainers=new StatsContainer[] {statstRNA, stats16S, stats23S, stats5S, stats18S}; jpayne@68: final StatsContainer[] allContainers=new StatsContainer[] {statsCDS, statstRNA, stats16S, stats23S, stats5S, stats18S}; jpayne@68: //public static int CDS=0, tRNA=1, r16S=2, r23S=3, r5S=4, r18S=5, r28S=6, RNA=7; jpayne@68: jpayne@68: // public final FrameStats innerKmerStats=new FrameStats("innerKmerStats", innerKmerLength, 3, 0); jpayne@68: // public final FrameStats startStats=new FrameStats("startStats", endKmerLength, startFrames, startLeftOffset); jpayne@68: // public final FrameStats stopStats=new FrameStats("stopStats", endKmerLength, stopFrames, stopLeftOffset); jpayne@68: // jpayne@68: // public final FrameStats rrnaStartStats=new FrameStats("rrnaStart", 2, 16, 8); jpayne@68: // public final FrameStats rrnaStopStats=new FrameStats("rrnaStop", 2, 16, 8); jpayne@68: // jpayne@68: // public final FrameStats trnaStats=new FrameStats("tRNA", rnaKmerLength, 1, 0); jpayne@68: // public final FrameStats rrna16Sstats=new FrameStats("16S", rnaKmerLength, 1, 0); jpayne@68: // public final FrameStats rrna23Sstats=new FrameStats("23S", rnaKmerLength, 1, 0); jpayne@68: // public final FrameStats rrna5Sstats=new FrameStats("5S", rnaKmerLength, 1, 0); jpayne@68: // public final FrameStats[] rnaKmerStats=new FrameStats[] {trnaStats, rrna16Sstats, rrna23Sstats, rrna5Sstats}; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: // public ArrayList fnames=new ArrayList(); jpayne@68: public int numFiles=0; jpayne@68: public IntList taxIds=new IntList(); jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private long maxReads=-1; jpayne@68: jpayne@68: long readsProcessed=0; jpayne@68: long basesProcessed=0; jpayne@68: long genesProcessed=0; jpayne@68: long filesProcessed=0; jpayne@68: long[] baseCounts=new long[5]; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Setters ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public synchronized void setStatics(){ jpayne@68: // assert(!setStatics); jpayne@68: kInnerCDS=statsCDS.inner.k; jpayne@68: kStartCDS=statsCDS.start.k; jpayne@68: kStopCDS=statsCDS.stop.k; jpayne@68: jpayne@68: setStartLeftOffset(statsCDS.start.leftOffset); jpayne@68: setStartRightOffset(statsCDS.start.rightOffset()); jpayne@68: jpayne@68: setStopLeftOffset(statsCDS.stop.leftOffset); jpayne@68: setStopRightOffset(statsCDS.stop.rightOffset()); jpayne@68: jpayne@68: kInnerRNA=stats16S.inner.k;//TODO: Why is 16S used here? jpayne@68: kStartRNA=stats16S.start.k; jpayne@68: kStopRNA=stats16S.stop.k; jpayne@68: setStatics=true; jpayne@68: } jpayne@68: jpayne@68: public static void setInnerK(int k){ jpayne@68: kInnerCDS=k; jpayne@68: } jpayne@68: jpayne@68: public static void setStartK(int k){ jpayne@68: kStartCDS=k; jpayne@68: } jpayne@68: jpayne@68: public static void setStopK(int k){ jpayne@68: kStopCDS=k; jpayne@68: } jpayne@68: jpayne@68: public static void setStartLeftOffset(int x){ jpayne@68: startLeftOffset=x; jpayne@68: startFrames=startLeftOffset+startRightOffset+1; jpayne@68: // System.err.println("startLeftOffset="+startLeftOffset+", startRightOffset="+startRightOffset+", frames="+startFrames); jpayne@68: } jpayne@68: jpayne@68: public static void setStartRightOffset(int x){ jpayne@68: startRightOffset=x; jpayne@68: startFrames=startLeftOffset+startRightOffset+1; jpayne@68: // System.err.println("startLeftOffset="+startLeftOffset+", startRightOffset="+startRightOffset+", frames="+startFrames); jpayne@68: // assert(false) : endLeftOffset+", "+endRightOffset+", "+endFrames; jpayne@68: } jpayne@68: jpayne@68: public static void setStopLeftOffset(int x){ jpayne@68: stopLeftOffset=x; jpayne@68: stopFrames=stopLeftOffset+stopRightOffset+1; jpayne@68: // System.err.println("stopLeftOffset="+stopLeftOffset+", stopRightOffset="+stopRightOffset+", frames="+stopFrames); jpayne@68: } jpayne@68: jpayne@68: public static void setStopRightOffset(int x){ jpayne@68: stopRightOffset=x; jpayne@68: stopFrames=stopLeftOffset+stopRightOffset+1; jpayne@68: // System.err.println("stopLeftOffset="+stopLeftOffset+", stopRightOffset="+stopRightOffset+", frames="+stopFrames); jpayne@68: // assert(false) : endLeftOffset+", "+endRightOffset+", "+endFrames; jpayne@68: } jpayne@68: jpayne@68: public static final boolean isStartCodon(int code){ jpayne@68: return code>=0 && code<=63 && isStartCodon[code]; jpayne@68: } jpayne@68: public static final boolean isStopCodon(int code){ jpayne@68: return code>=0 && code<=63 && isStopCodon[code]; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Class Init ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static boolean[] makeIsCodon(String[] codons){ jpayne@68: boolean[] array=new boolean[64]; jpayne@68: for(String s : codons){ jpayne@68: int x=AminoAcid.toNumber(s); jpayne@68: array[x]=true; jpayne@68: } jpayne@68: return array; jpayne@68: } jpayne@68: jpayne@68: public static int kInnerCDS=6; jpayne@68: public static int kStartCDS=3; jpayne@68: public static int kStopCDS=3; jpayne@68: jpayne@68: static int startLeftOffset(){return startLeftOffset;} jpayne@68: static int startRightOffset(){return startRightOffset;} jpayne@68: static int startFrames(){return startFrames;} jpayne@68: jpayne@68: private static int startLeftOffset=21; //21 works well for k=4 jpayne@68: private static int startRightOffset=8; //10 works well for k=4 jpayne@68: private static int startFrames=startLeftOffset+startRightOffset+1; jpayne@68: jpayne@68: private static int stopLeftOffset=9; jpayne@68: private static int stopRightOffset=12; jpayne@68: private static int stopFrames=stopLeftOffset+stopRightOffset+1; jpayne@68: jpayne@68: private static boolean setStatics=false; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- More Statics ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: //E. coli uses 83% AUG (3542/4284), 14% (612) GUG, 3% (103) UUG[7] and one or two others (e.g., an AUU and possibly a CUG).[8][9] jpayne@68: public static String[] startCodons=new String[] {"ATG", "GTG", "TTG"}; jpayne@68: public static String[] extendedStartCodons=new String[] {"ATG", "GTG", "TTG", "ATT", "CTG", "ATA"}; jpayne@68: public static String[] stopCodons=new String[] {"TAG", "TAA", "TGA"}; jpayne@68: public static boolean[] isStartCodon=makeIsCodon(startCodons); jpayne@68: public static boolean[] isStopCodon=makeIsCodon(stopCodons); jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public static boolean errorState=false; jpayne@68: jpayne@68: }