Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffFeature.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package gff; import java.util.ArrayList; import java.util.Arrays; import fileIO.ByteStreamWriter; import shared.Shared; import shared.Tools; import structures.ByteBuilder; public class GbffFeature { public GbffFeature(final ArrayList<byte[]> lines0, final String typeString, final String accessionString){ accession=accessionString; setType(typeString); parseSlow(lines0); if(type==rRNA){ setSubtype(); } if(stop<start){error=true;} } private void parseSlow(final ArrayList<byte[]> lines0){ ArrayList<byte[]> lines=fixLines(lines0); parseStartStop(lines.get(0)); for(int i=1; i<lines.size(); i++){ byte[] line=lines.get(i); if(Tools.startsWith(line, "product=")){ product=parseLine(line); }else if(Tools.startsWith(line, "locus_tag=")){ locus_tag=parseLine(line); }else if(Tools.equals(line, "pseudo")){ pseudo=true; } // else if(Tools.startsWith(line, "ID=")){ // id=parseLine(line); // }else if(Tools.startsWith(line, "Name=")){ // name=parseLine(line); // } } // System.err.println("\nvvvvv"); // for(byte[] line : lines0){ // System.err.println("'"+new String(line)+"'"); // } // for(byte[] line : lines){ // System.err.println("'"+new String(line)+"'"); // } // System.err.println("^^^^^"); } ArrayList<byte[]> fixLines(ArrayList<byte[]> lines){ ArrayList<byte[]> fixed=new ArrayList<byte[]>(); ByteBuilder bb=new ByteBuilder(); for(byte[] line : lines){ if(bb.length()>0 && line[21]=='/'){ fixed.add(bb.toBytes()); bb.clear(); } append(bb, line); } if(bb.length()>0){ fixed.add(bb.toBytes()); bb.clear(); } return fixed; } void append(ByteBuilder bb, byte[] line){ assert(line[20]==' '); assert(line.length>21); // assert(line[21]!=' ') : "'"+new String(line)+"'"; if(line[21]=='/'){ bb.append(line, 22, line.length-22); }else{ // System.err.println(line.length+", "+21+", "+(line.length-21+1)+"\n'"+new String(line)+"'"); if(bb.length>0){bb.append(' ');} bb.append(line, 21, line.length-21); } } void setType(String typeString){ int x=Tools.find(typeString, typeStrings); assert(x>=0) : x+", "+typeString; type=x; } void parseStartStop(final byte[] line0){ byte[] line=line0; if(line[0]=='c'){ assert(Tools.startsWith(line, "complement(")); line=Arrays.copyOfRange(line, 11, line.length-1); strand=Shared.MINUS; } if(line[0]=='j'){ assert(Tools.startsWith(line, "join(")); line=Arrays.copyOfRange(line, 5, line.length-1); strand=Shared.MINUS; } int i=0; for(start=0; i<line.length; i++){ int x=line[i]; if(x=='.'){break;} else if(x!='<'){ if(Tools.isDigit(x)){ start=start*10+(x-'0'); }else{ //if(!error){System.err.println(new String(line0)+"\n"+new String(line));} error=true; } } } // while(line[i]=='.'){i++;} //Not needed for(stop=0; i<line.length; i++){ int x=line[i]; if(x=='.' || x==','){ stop=0; }else if(x==' '){ //do nothing; line wrap }else if(x!='>'){ if(Tools.isDigit(x)){ stop=stop*10+(x-'0'); }else{ //if(!error){System.err.println(new String(line0)+"\n"+new String(line));} error=true; } } } } String parseLine(byte[] line){ String[] split=Tools.equalsPattern.split(new String(line)); String s=split[1]; return s.substring(1, s.length()-1); } void setSubtype(){ subtype=-1; if(product==null){return;} String[] split=Tools.spacePattern.split(product); subtype=Tools.find(split[0], typeStrings); // assert(false) : type+", "+subtype+", "+split[0]+", "+this.toString()+"\n"+product; } public void toGff(ByteStreamWriter bsw) { ByteBuilder bb=bsw.getBuffer(); appendGff(bb); bb.nl(); bsw.flushBuffer(false); } public ByteBuilder appendGff(ByteBuilder bb) { // bsw.print("#seqid source type start end score strand phase attributes\n".getBytes()); bb.append(accession).tab(); bb.append('.').tab(); bb.append((pseudo && type==GENE) ? "pseudogene" : typeStringsGff[type]).tab(); bb.append(start).tab(); bb.append(stop).tab(); bb.append('.').tab(); bb.append(Shared.strandCodes2[strand]).tab(); bb.append('.').tab(); boolean attributes=false; // if(id!=null){ // bb.append("ID=").append(id); // attributes=true; // } // if(name!=null){ // if(attributes){bb.append(';');} // bb.append("Name=").append(name); // attributes=true; // } if(product!=null){ if(attributes){bb.append(';');} bb.append("product=").append(product); attributes=true; } if(locus_tag!=null){ if(attributes){bb.append(';');} bb.append("locus_tag=").append(locus_tag); attributes=true; } if(subtype>-1){ if(attributes){bb.append(';');} bb.append("subtype=").append(typeStringsGff[subtype]); attributes=true; } if(!attributes){bb.append('.');} return bb; } @Override public String toString(){ return appendGff(new ByteBuilder()).toString(); } public int type=-1; public int subtype=-1; //TODO: could have coding amino, for tRNA public String product; public String locus_tag; // public String id; // public String name; public int start; public int stop; public byte strand=Shared.PLUS; public String accession; public boolean pseudo=false; public boolean error=false; public static final String[] typeStrings={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region", "5'UTR", "3'UTR", "intron", "exon", "5S", "16S", "23S"}; public static final String[] typeStringsGff={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region", "five_prime_UTR", "three_prime_UTR", "intron", "exon", "5S", "16S", "23S"}; //types public static final int GENE=0, CDS=1, rRNA=2, tRNA=3, ncRNA=4, repeat_region=5, UTR5=6, UTR3=7, intron=8, exon=9; //subtypes public static final int r5S=10, r16S=11, r23S=12; }