jpayne@68: package gff; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.HashMap; jpayne@68: import java.util.Locale; jpayne@68: import java.util.Map.Entry; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import prok.ProkObject; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import structures.StringNum; jpayne@68: jpayne@68: /** jpayne@68: * Compares gff files for the purpose of grading gene-calling. jpayne@68: * @author Brian Bushnell jpayne@68: * @date October 3, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class CompareGff { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: CompareGff x=new CompareGff(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public CompareGff(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables prior to parsing jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: {//Parse the arguments jpayne@68: final Parser parser=parse(args); jpayne@68: overwrite=parser.overwrite; jpayne@68: append=parser.append; jpayne@68: jpayne@68: in=parser.in1; jpayne@68: } jpayne@68: jpayne@68: fixExtensions(); //Add or remove .gz or .bz2 as needed jpayne@68: checkFileExistence(); //Ensure files can be read and written jpayne@68: checkStatics(); //Adjust file-related static fields as needed for this program jpayne@68: jpayne@68: ffin=FileFormat.testInput(in, FileFormat.GFF, null, true, true); jpayne@68: ffref=FileFormat.testInput(ref, FileFormat.GFF, null, true, true); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization Helpers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Parse arguments from the command line */ jpayne@68: private Parser parse(String[] args){ jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: if(b!=null && b.equalsIgnoreCase("null")){b=null;} jpayne@68: jpayne@68: if(a.equals("ref")){ jpayne@68: ref=b; jpayne@68: }else if(a.equals("lines")){ jpayne@68: maxLines=Long.parseLong(b); jpayne@68: if(maxLines<0){maxLines=Long.MAX_VALUE;} jpayne@68: }else if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: // ByteFile1.verbose=verbose; jpayne@68: // ByteFile2.verbose=verbose; jpayne@68: // ReadWrite.verbose=verbose; jpayne@68: }else if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(i==0 && arg.indexOf('=')<0){ jpayne@68: parser.in1=arg; jpayne@68: }else if(i==1 && arg.indexOf('=')<0 && ref==null){ jpayne@68: ref=arg; jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: return parser; jpayne@68: } jpayne@68: jpayne@68: /** Add or remove .gz or .bz2 as needed */ jpayne@68: private void fixExtensions(){ jpayne@68: in=Tools.fixExtension(in); jpayne@68: ref=Tools.fixExtension(ref); jpayne@68: if(in==null || ref==null){throw new RuntimeException("Error - at least two input files are required.");} jpayne@68: } jpayne@68: jpayne@68: /** Ensure files can be read and written */ jpayne@68: private void checkFileExistence(){ jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(true, true, in, ref)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** Adjust file-related static fields as needed for this program */ jpayne@68: private static void checkStatics(){ jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: // if(!ByteFile.FORCE_MODE_BF2){ jpayne@68: // ByteFile.FORCE_MODE_BF2=false; jpayne@68: // ByteFile.FORCE_MODE_BF1=true; jpayne@68: // } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: ByteFile bf=ByteFile.makeByteFile(ffin); jpayne@68: jpayne@68: processInner(bf); jpayne@68: jpayne@68: errorState|=bf.close(); jpayne@68: jpayne@68: t.stop(); jpayne@68: jpayne@68: outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8)); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println("Ref count: \t"+refCount); jpayne@68: outstream.println("Query count: \t"+queryCount); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println("Ref-relative counts:"); jpayne@68: outstream.println("True Positive Start: \t"+truePositiveStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStart*100.0/refCount))); jpayne@68: outstream.println("True Positive Stop: \t"+truePositiveStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStop*100.0/refCount))); jpayne@68: // outstream.println("False Positive Start:\t"+falsePositiveStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStart*100.0/refCount))); jpayne@68: // outstream.println("False Positive Stop: \t"+falsePositiveStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStop*100.0/refCount))); jpayne@68: outstream.println("False Negative Start:\t"+falseNegativeStart+"\t"+(String.format(Locale.ROOT, "%.3f%%", falseNegativeStart*100.0/refCount))); jpayne@68: outstream.println("False Negative Stop: \t"+falseNegativeStop+"\t"+(String.format(Locale.ROOT, "%.3f%%", falseNegativeStop*100.0/refCount))); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println("Query-relative counts:"); jpayne@68: outstream.println("True Positive Start: \t"+truePositiveStart2+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStart2*100.0/queryCount))); jpayne@68: outstream.println("True Positive Stop: \t"+truePositiveStop2+"\t"+(String.format(Locale.ROOT, "%.3f%%", truePositiveStop2*100.0/queryCount))); jpayne@68: outstream.println("False Positive Start:\t"+falsePositiveStart2+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStart2*100.0/queryCount))); jpayne@68: outstream.println("False Positive Stop: \t"+falsePositiveStop2+"\t"+(String.format(Locale.ROOT, "%.3f%%", falsePositiveStop2*100.0/queryCount))); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println("SNR: \t"+String.format(Locale.ROOT, "%.4f", 10*Math.log10((truePositiveStart2+truePositiveStop2+0.1)/(falsePositiveStart2+falsePositiveStop2+0.1)))); jpayne@68: jpayne@68: if(errorState){ jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: @SuppressWarnings("unchecked") jpayne@68: private void processInner(ByteFile bf){ jpayne@68: byte[] line=bf.nextLine(); jpayne@68: jpayne@68: { jpayne@68: ArrayList refLines=GffLine.loadGffFile(ffref, "CDS,rRNA,tRNA", true); jpayne@68: jpayne@68: refCount=refLines.size(); jpayne@68: lineMap=new HashMap(); jpayne@68: startCountMap=new HashMap(); jpayne@68: stopCountMap=new HashMap(); jpayne@68: jpayne@68: for(GffLine gline : refLines){ jpayne@68: final int stop=gline.trueStop(); jpayne@68: StringNum sn=new StringNum(gline.seqid, stop); jpayne@68: lineMap.put(sn, gline); jpayne@68: startCountMap.put(sn, 0); jpayne@68: stopCountMap.put(sn, 0); jpayne@68: assert(lineMap.get(sn)==gline); jpayne@68: // assert(false) : "\n\nsn='"+sn+"'\n"+lineMap.containsKey(sn)+"\n"+lineMap.keySet(); jpayne@68: } jpayne@68: if(verbose){ jpayne@68: System.err.println(lineMap); jpayne@68: System.err.println(startCountMap); jpayne@68: System.err.println(stopCountMap); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: while(line!=null){ jpayne@68: if(line.length>0){ jpayne@68: if(maxLines>0 && linesProcessed>=maxLines){break;} jpayne@68: linesProcessed++; jpayne@68: bytesProcessed+=(line.length+1); jpayne@68: jpayne@68: final boolean valid=(line[0]!='#'); jpayne@68: if(valid){ jpayne@68: queryCount++; jpayne@68: GffLine gline=new GffLine(line); jpayne@68: processLine(gline); jpayne@68: } jpayne@68: } jpayne@68: line=bf.nextLine(); jpayne@68: } jpayne@68: jpayne@68: for(Entry e : startCountMap.entrySet()){ jpayne@68: if(e.getValue()<1){ jpayne@68: falseNegativeStart++; jpayne@68: } jpayne@68: } jpayne@68: for(Entry e : stopCountMap.entrySet()){ jpayne@68: if(e.getValue()<1){ jpayne@68: falseNegativeStop++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void processLine(GffLine gline){ jpayne@68: // boolean cds=gline.type.equals("CDS"); jpayne@68: // boolean trna=gline.type.equals("tRNA"); jpayne@68: // boolean rrna=gline.type.equals("rRNA"); jpayne@68: // if(!cds && !trna && !rrna){return;} jpayne@68: // if(cds && !ProkObject.callCDS){return;} jpayne@68: // if(trna && !ProkObject.calltRNA){return;} jpayne@68: // if(rrna){ jpayne@68: // int type=gline.prokType(); jpayne@68: // if(ProkObject.processType(type)){return;} jpayne@68: // } jpayne@68: int type=gline.prokType(); jpayne@68: if(!ProkObject.processType(type)){return;} jpayne@68: jpayne@68: final int stop=gline.trueStop(); jpayne@68: final int start=gline.trueStart(); jpayne@68: jpayne@68: // System.err.println("Considering "+start+", "+stop); jpayne@68: jpayne@68: StringNum sn=new StringNum(gline.seqid, stop); jpayne@68: GffLine refline=lineMap.get(sn); jpayne@68: jpayne@68: boolean fail=(refline==null || refline.strand!=gline.strand || !refline.type.equals(gline.type)); jpayne@68: if(fail){ jpayne@68: if(verbose){ jpayne@68: System.err.println("Can't find "+sn+"\n"+gline+"\n"+refline); jpayne@68: assert(false) : "\n\nsn='"+sn+"'\n"+lineMap.containsKey(sn)+"\n"+lineMap.keySet(); jpayne@68: } jpayne@68: falsePositiveStart++; jpayne@68: falsePositiveStop++; jpayne@68: falsePositiveStart2++; jpayne@68: falsePositiveStop2++; jpayne@68: }else{ jpayne@68: assert(stop==refline.trueStop()); jpayne@68: truePositiveStop++; jpayne@68: truePositiveStop2++; jpayne@68: stopCountMap.put(sn, stopCountMap.get(sn)+1); jpayne@68: if(start==refline.trueStart()){ jpayne@68: truePositiveStart++; jpayne@68: truePositiveStart2++; jpayne@68: startCountMap.put(sn, startCountMap.get(sn)+1); jpayne@68: }else{ jpayne@68: falsePositiveStart++; jpayne@68: falsePositiveStart2++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private String in=null; jpayne@68: private String ref=null; jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private HashMap lineMap; jpayne@68: private HashMap startCountMap; jpayne@68: private HashMap stopCountMap; jpayne@68: jpayne@68: // private HashMap> map; jpayne@68: // private HashSet stopSet; jpayne@68: // private HashSet startSet; jpayne@68: // private HashSet stopSetM; jpayne@68: // private HashSet startSetM; jpayne@68: jpayne@68: private long linesProcessed=0; jpayne@68: private long linesOut=0; jpayne@68: private long bytesProcessed=0; jpayne@68: private long bytesOut=0; jpayne@68: jpayne@68: private long maxLines=Long.MAX_VALUE; jpayne@68: jpayne@68: private long falsePositiveStart=0; jpayne@68: private long falsePositiveStop=0; jpayne@68: private long truePositiveStart=0; jpayne@68: private long truePositiveStop=0; jpayne@68: private long falseNegativeStart=0; jpayne@68: private long falseNegativeStop=0; jpayne@68: jpayne@68: private long falsePositiveStart2=0; jpayne@68: private long falsePositiveStop2=0; jpayne@68: private long truePositiveStart2=0; jpayne@68: private long truePositiveStop2=0; jpayne@68: jpayne@68: private long refCount=0; jpayne@68: private long queryCount=0; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat ffin; jpayne@68: private final FileFormat ffref; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public boolean errorState=false; jpayne@68: private boolean overwrite=false; jpayne@68: private boolean append=false; jpayne@68: jpayne@68: }