Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffLocus.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffLocus.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,251 @@ +package gff; + +import java.util.ArrayList; + +import fileIO.ByteStreamWriter; +import shared.Tools; + +public class GbffLocus { + + public GbffLocus(ArrayList<byte[]> lines) { + while(num<lines.size()){ + parseBlock(lines); + } + } + + int parseBlock(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); + if(Tools.startsWith(line, " ")){ + assert(false) : line; + num++; + }else if(Tools.startsWith(line, "LOCUS ")){ + parseLocus(lines); + }else if(Tools.startsWith(line, "DEFINITION ")){ + parseDefinition(lines); + }else if(Tools.startsWith(line, "ACCESSION ")){ + parseAccession(lines); + }else if(Tools.startsWith(line, "VERSION ")){ + parseVersion(lines); + }else if(Tools.startsWith(line, "DBLINK ")){ + parseDBLink(lines); + }else if(Tools.startsWith(line, "KEYWORDS ")){ + parseKeywords(lines); + }else if(Tools.startsWith(line, "SOURCE ")){ + parseSource(lines); + }else if(Tools.startsWith(line, "REFERENCE ")){ + parseReference(lines); + }else if(Tools.startsWith(line, "COMMENT ")){ + parseComment(lines); + }else if(Tools.startsWith(line, "FEATURES ")){ + parseFeatures(lines); + }else if(Tools.startsWith(line, "CONTIG ")){ + parseContig(lines); + }else if(Tools.startsWith(line, "ORIGIN ")){ + parseOrigin(lines); + }else if(Tools.startsWith(line, "PRIMARY ")){ + parsePrimary(lines); + }else{ + assert(false) : "Unhandled block type: "+new String(line); + } + return num; + } + + private byte[] nextLine(ArrayList<byte[]> lines){ + byte[] line=null; + for(final int lim=lines.size()-1; num<lim && (line==null || line.length==0); ){ +// System.err.println(num+", "+lim); + num++; + line=lines.get(num); + } +// System.err.println(line); +// assert(line!=null); + return line; + } + + private byte[] getLine(ArrayList<byte[]> lines){ + return num>=lines.size() ? null : lines.get(num); + } + + /** Move pointer to next block start */ + private int advanceBlock(ArrayList<byte[]> lines){ + for(num++; num<lines.size(); num++){ + byte[] line=lines.get(num); + if(line!=null && line.length>0 && line[0]!=' '){break;} + } + return num; + } + + /** Move pointer to next block start */ + private int advanceFeature(ArrayList<byte[]> lines){ + for(num++; num<lines.size(); num++){ + byte[] line=lines.get(num); + if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){break;} + } + return num; + } + + private String trimBlockName(byte[] line){ + assert(line.length>=12 && line[11]==' ') : new String(line); + return new String(line, 12, line.length-12); + } + + private String toFeatureType(byte[] line){ + assert(line[4]==' '); + assert(line[5]!=' '); + assert(line[20]==' '); + int start=5, stop=6; + for(; stop<21 && line[stop]!=' '; stop++){} + return new String(line, start, stop-start); + } + + private int parseLocus(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); +// assert(Tools.startsWith(line, "LOCUS")) : new String(line); + if(accession==null){ + String s=trimBlockName(line); + String[] split=Tools.whitespacePlus.split(s); + accession=split.length>0 ? split[0] : null; + } + return advanceBlock(lines); + } + + private int parseDefinition(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); + if(organism==null){ + String s=trimBlockName(line); + String[] split=Tools.commaPattern.split(s); + organism=split.length>0 ? split[0] : null; + } + return advanceBlock(lines); + } + + private int parseAccession(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); + if(accession==null){ + String s=trimBlockName(line); + String[] split=Tools.whitespacePlus.split(s); + accession=split.length>0 ? split[0] : null; + } + return advanceBlock(lines); + } + + private int parseVersion(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); + String s=trimBlockName(line); + String[] split=Tools.whitespacePlus.split(s); + s=split.length>0 ? split[0] : null; + if(accession==null || (s!=null && s.length()>1)){ + accession=s; + } + return advanceBlock(lines); + } + + private int parseDBLink(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parseKeywords(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parseSource(ArrayList<byte[]> lines){ + byte[] line=lines.get(num); + if(species==null){ + species=trimBlockName(line); + } + return advanceBlock(lines); + } + + private int parseReference(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parseComment(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parseFeatures(ArrayList<byte[]> lines){ + for(byte[] line=nextLine(lines); line!=null && line[0]==' '; line=getLine(lines)){ +// System.err.println(num+": "+new String(line)); + String type=toFeatureType(line); + int idx=Tools.find(type, featureTypes); +// System.err.println("idx="+idx+" for '"+type+"'"); + if(idx>=0){ +// System.err.println("parseFeature"); + parseFeature(lines, type); +// System.err.println(features.get(features.size()-1)); + }else{ +// System.err.println("advanceFeature"); + advanceFeature(lines); + } + } + return num; + } + + /** Move pointer to next block start */ + private int parseFeature(ArrayList<byte[]> lines, String type){ + ArrayList<byte[]> flist=new ArrayList<byte[]>(); + flist.add(lines.get(num)); + for(num++; num<lines.size(); num++){ + byte[] line=lines.get(num); + if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){ +// assert(false) : Character.toString(line[0])+", "+Character.toString(line[5])+", "+Character.toString(line[6])+"\n"+new String(line); + break; + } + flist.add(line); + } + GbffFeature f=new GbffFeature(flist, type, accession); + if(!f.error){ + features.add(f); + }else{ +// System.err.println("Failed to parse feature "+f); + } + return num; + } + + private int parseContig(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parseOrigin(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + private int parsePrimary(ArrayList<byte[]> lines){ + return advanceBlock(lines); + } + + public void toGff(ByteStreamWriter bsw) { + final byte[] accessionB=accession.getBytes(); + bsw.print(seqRegB); + bsw.print(accessionB); + if(start>0 && stop>0){ + bsw.print(' ').print(start).print(' ').print(stop); + } + bsw.println(); + for(GbffFeature f : features){ + if(f.type==GbffFeature.CDS || f.type==GbffFeature.tRNA || f.type==GbffFeature.rRNA){ + if(!f.pseudo && !f.error){ + f.toGff(bsw); + } + } + } + } + + + /** Line number */ + int num=0; + + boolean printGene=false; + boolean printRepeat=false; + + public static String[] featureTypes=GbffFeature.typeStrings; + private static final byte[] seqRegB="##sequence-region ".getBytes(); + + String accession; + String organism; + String species; + int start; + int stop; + ArrayList<GbffFeature> features=new ArrayList<GbffFeature>(); +}