Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/ProkObject.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package prok; import java.io.File; import dna.AminoAcid; import dna.Data; import fileIO.FileFormat; import fileIO.ReadWrite; import shared.Parse; import shared.Tools; import stream.ConcurrentReadInputStream; import stream.Read; import stream.ReadInputStream; import structures.ListNum; import structures.LongHashSet; /** Contains a lot of statics and static methods for gene-calling */ public abstract class ProkObject { public static boolean parse(String arg, String a, String b){ if(a.equalsIgnoreCase("16sstartslop") || a.equalsIgnoreCase("ssustartslop")){ ssuStartSlop=Integer.parseInt(b); }else if(a.equalsIgnoreCase("23sstartslop") || a.equalsIgnoreCase("lsustartslop")){ lsuStartSlop=Integer.parseInt(b); }else if(a.equalsIgnoreCase("5sstartslop")){ r5SStartSlop=Integer.parseInt(b); }else if(a.equalsIgnoreCase("16sstopslop") || a.equalsIgnoreCase("ssustopslop")){ ssuStopSlop=Integer.parseInt(b); }else if(a.equalsIgnoreCase("23sstopslop") || a.equalsIgnoreCase("lsustopslop")){ lsuStopSlop=Integer.parseInt(b); }else if(a.equalsIgnoreCase("5sstopslop")){ r5SStopSlop=Integer.parseInt(b); }else if(a.equals("plus")){ PROCESS_PLUS_STRAND=Parse.parseBoolean(b); }else if(a.equals("minus")){ PROCESS_MINUS_STRAND=Parse.parseBoolean(b); } else if(a.equalsIgnoreCase("min16SIdentity") || a.equalsIgnoreCase("min16SId")) { min16SIdentity=Float.parseFloat(b); }else if(a.equalsIgnoreCase("min18SIdentity") || a.equalsIgnoreCase("min18SId")) { min18SIdentity=Float.parseFloat(b); }else if(a.equalsIgnoreCase("min23SIdentity") || a.equalsIgnoreCase("min23SId")) { min23SIdentity=Float.parseFloat(b); }else if(a.equalsIgnoreCase("min5SIdentity") || a.equalsIgnoreCase("min5SId")) { min5SIdentity=Float.parseFloat(b); } else if(a.equalsIgnoreCase("align16s") || a.equalsIgnoreCase("load16SSequence")){ load16SSequence=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("align23s") || a.equalsIgnoreCase("load23SSequence")){ load23SSequence=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("align18s") || a.equalsIgnoreCase("load18SSequence")){ load18SSequence=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("align5s") || a.equalsIgnoreCase("load5SSequence")){ load5SSequence=Parse.parseBoolean(b); } else if(a.equalsIgnoreCase("load16skmers") || a.equalsIgnoreCase("load18skmers") || a.equalsIgnoreCase("loadssukmers")){ loadSSUkmers=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("load23skmers") || a.equalsIgnoreCase("load28skmers") || a.equalsIgnoreCase("loadlsukmers")){ loadLSUkmers=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("load5skmers")){ load5Skmers=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("loadtrnakmers")){ loadtRNAkmers=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("klongtrna")){ kLongTRna=Integer.parseInt(b); }else if(a.equalsIgnoreCase("longkmers")){ loadSSUkmers=loadLSUkmers=load5Skmers=loadtRNAkmers=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("klong5s")){ kLong5S=Integer.parseInt(b); }else if(a.equalsIgnoreCase("klong16s") || a.equalsIgnoreCase("klong18s") || a.equalsIgnoreCase("klongssu")){ kLongSSU=Integer.parseInt(b); }else if(a.equalsIgnoreCase("klong23s") || a.equalsIgnoreCase("klong28s") || a.equalsIgnoreCase("klonglsu")){ kLongLSU=Integer.parseInt(b); }else if(a.equalsIgnoreCase("klongtrna")){ kLongTRna=Integer.parseInt(b); } else{ return false; } return true; } /*--------------------------------------------------------------*/ public static boolean processType(int type){ return (type==CDS ? callCDS : type==r16S ? call16S : type==r23S ? call23S : type==r18S ? call18S : type==r5S ? call5S : type==tRNA ? calltRNA : true); } public static int startSlop(int type) { int slop=(type==r16S ? ssuStartSlop : type==r23S ? lsuStartSlop : type==r18S ? ssuStartSlop : type==r5S ? r5SStartSlop : 9999); return slop; } public static int stopSlop(int type) { int slop=(type==r16S ? ssuStopSlop : type==r23S ? lsuStopSlop : type==r18S ? ssuStopSlop : type==r5S ? r5SStopSlop : 9999); return slop; } public static float minID(int type) { float minIdentity=(type==r16S ? min16SIdentity : type==r23S ? min23SIdentity : type==r18S ? min18SIdentity : type==r5S ? min5SIdentity : 0); return minIdentity; } public static Read[] consensusReads(int type) { Read[] consensusReads=(type==r16S ? r16SSequence : type==r23S ? r23SSequence : type==r18S ? r18SSequence : type==r5S ? r5SSequence : null); return consensusReads; } public static LongHashSet kmerSet(int type) { LongHashSet set=(type==tRNA ? trnaKmers : type==r16S ? ssuKmers : type==r23S ? lsuKmers : type==r5S ? r5SKmers : type==r18S ? ssuKmers : null); return set; } public static int kLongLen(int type) { int kLongLen=(type==tRNA ? kLongTRna : type==r16S ? kLongSSU : type==r23S ? kLongLSU : type==r5S ? kLong5S : type==r18S ? kLongSSU : -1); return kLongLen; } public static int flagToType(int flag) { return Integer.numberOfTrailingZeros(flag)+1; } public static byte typeToFlag(int type) { assert(type<=6); return (byte)(1<<(type-1)); } public static boolean callType(int type){//TODO: Turn these functions into array lookups if(type==CDS){return callCDS;} else if(type==tRNA){return calltRNA;} else if(type==r16S){return call16S;} else if(type==r23S){return call23S;} else if(type==r5S){return call5S;} else if(type==r18S){return call18S;} assert(false) : type; return false; } /*--------------------------------------------------------------*/ /*---------------- Long Kmers ----------------*/ /*--------------------------------------------------------------*/ public static synchronized void loadLongKmers(){ // assert(ssuKmers==null); // assert(false) : load5Skmers+", "+kLong5s; if(loadedLongKmers){return;} if(loadSSUkmers){ssuKmers=loadLongKmersByType(kLongSSU, "ssu");} if(loadLSUkmers){lsuKmers=loadLongKmersByType(kLongLSU, "lsu");} if(load5Skmers){r5SKmers=loadLongKmersByType(kLong5S, "5S");} if(loadtRNAkmers){trnaKmers=loadLongKmersByType(kLongTRna, "tRNA");} loadedLongKmers=true; } // private static LongHashSet loadLongKmers(StatsContainer sc, int k, String prefix){ // String fname=Data.findPath("?"+prefix+"_"+k+"mers.fa"); // if(!new File(fname).exists()){ // fname=fname+".gz"; // if(!new File(fname).exists()){ // System.err.println("Can't find "+fname); // return null; // } // } // LongHashSet set=loadLongKmers(fname, k); // sc.kmerSet=set; // sc.kLongLen=k; // return set; // } private static LongHashSet loadLongKmersByType(int k, String prefix){ String fname=Data.findPath("?"+prefix+"_"+k+"mers.fa", true); if(!new File(fname).exists()){ fname=fname+".gz"; if(!new File(fname).exists()){ System.err.println("Can't find "+fname); return null; } } LongHashSet set=loadLongKmers(fname, k); return set; } private static LongHashSet loadLongKmers(String fname, int k){//TODO: Consider making this a LongHashSet. No reason not to... FileFormat ff=FileFormat.testInput(fname, FileFormat.FA, null, false, false); ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(-1, false, ff, null); cris.start(); //Start the stream // if(verbose){outstream.println("Started cris");} LongHashSet set=new LongHashSet(1000); ListNum<Read> ln=cris.nextList(); while(ln!=null && ln.size()>0){ processList(ln, set, k); cris.returnList(ln); ln=cris.nextList(); } if(ln!=null){cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());} ReadWrite.closeStream(cris); return set; } private static LongHashSet processList(ListNum<Read> ln, LongHashSet set, int k){ final long mask=~((-1L)<<(2*k)); for(Read r : ln){ final byte[] bases=r.bases; long kmer=0; int len=0; for(byte b : bases){ final int num=AminoAcid.baseToNumber[b]; if(num>=0){ len++; kmer=((kmer<<2)|num)&mask; if(len>=k){ set.add(kmer); } }else{ len=0; } } } return set; } /*--------------------------------------------------------------*/ /*---------------- Consensus Sequence ----------------*/ /*--------------------------------------------------------------*/ public static synchronized void loadConsensusSequenceFromFile(boolean removeMito, boolean removeChloro){ if(loadedConsensusSequence){return;} // assert(r16SSequence==null); if(load16SSequence){r16SSequence=loadConsensusSequenceType("16S", removeMito, removeChloro);} if(load18SSequence){r18SSequence=loadConsensusSequenceType("18S", removeMito, removeChloro);} if(load23SSequence){r23SSequence=loadConsensusSequenceType("23S", removeMito, removeChloro);} if(load5SSequence){r5SSequence=loadConsensusSequenceType("5S", removeMito, removeChloro);} if(loadtRNASequence){trnaSequence=loadConsensusSequenceType("tRNA", removeMito, removeChloro);} loadedConsensusSequence=true; } public static Read[] loadConsensusSequenceType(String prefix, boolean removeMito, boolean removeChloro){ String fname=null; fname=Data.findPath("?"+prefix+"_consensus_sequence.fq", false); if(fname!=null && (fname.endsWith(".jar") || new File(fname).exists())){ fname=Tools.fixExtension(fname); }else{ fname=Data.findPath("?"+prefix+"_consensus_sequence.fa", true); fname=Tools.fixExtension(fname); if(!fname.endsWith(".jar") && !new File(fname).exists()){ System.err.println("Can't find "+fname); return null; } } Read[] array=loadConsensusSequence(fname); if(removeMito){array=stripOrganelle(array, "mito");} if(removeChloro){array=stripOrganelle(array, "plastid");} return array; } private static Read[] loadConsensusSequence(String fname){ FileFormat ff=FileFormat.testInput(fname, FileFormat.FA, null, false, false); Read[] array=ReadInputStream.toReadArray(ff, -1); return array; } private static Read[] stripOrganelle(Read[] array, String key){ int removed=0; for(int j=0; j<array.length; j++){ if(array[j].id.toLowerCase().startsWith(key)) { array[j]=null; removed++; } } if(removed>0){array=Tools.condenseStrict(array);} return array; } /*--------------------------------------------------------------*/ public static final int CDS=0, tRNA=1, r16S=2, r23S=3, r5S=4, r18S=5, r28S=6, RNA=7; public static String[] typeStrings=new String[] {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"}; public static String[] typeStrings2=new String[] {"CDS", "tRNA", "rRNA", "rRNA", "rRNA", "rRNA", "rRNA", "RNA"}; public static String[] specialTypeStrings=new String[] {null, "tRNA", "16S", "23S", "5S", "18S", "28S", null}; public static boolean isSpecialType(String type){ if(type==null){return false;} for(String s : specialTypeStrings){ if(type.equalsIgnoreCase(s)){return true;} } return false; } public static int kInnerRNA=6; public static int kStartRNA=3; public static int kStopRNA=3; public static int kLongSSU=15; public static int kLongLSU=15; public static int kLong5S=15; public static int kLongTRna=15; public static float min16SIdentity=0.62f; public static float min23SIdentity=0.60f; public static float min5SIdentity=0.60f; public static float min18SIdentity=0.60f; static int ssuStartSlop=200; static int ssuStopSlop=0; static int lsuStartSlop=220; static int lsuStopSlop=0; static int r5SStartSlop=50; static int r5SStopSlop=50; public static boolean callCDS=true; public static boolean calltRNA=true; public static boolean call16S=true; public static boolean call23S=true; public static boolean call5S=true; public static boolean call18S=false; public static LongHashSet ssuKmers=null; public static LongHashSet lsuKmers=null; public static LongHashSet r5SKmers=null; public static LongHashSet trnaKmers=null; public static Read[] trnaSequence=null; public static Read[] r16SSequence=null; public static Read[] r23SSequence=null; public static Read[] r5SSequence=null; public static Read[] r18SSequence=null; public static boolean PROCESS_PLUS_STRAND=true; public static boolean PROCESS_MINUS_STRAND=true; public static boolean loadSSUkmers=true; public static boolean loadLSUkmers=true; public static boolean load5Skmers=true; public static boolean loadtRNAkmers=true; private static boolean loadedLongKmers=false; public static boolean loadtRNASequence=false; public static boolean load16SSequence=true; public static boolean load23SSequence=true; public static boolean load5SSequence=true; public static boolean load18SSequence=true; private static boolean loadedConsensusSequence=false; }