annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffFeature.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package gff;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.Arrays;
jpayne@68 5
jpayne@68 6 import fileIO.ByteStreamWriter;
jpayne@68 7 import shared.Shared;
jpayne@68 8 import shared.Tools;
jpayne@68 9 import structures.ByteBuilder;
jpayne@68 10
jpayne@68 11 public class GbffFeature {
jpayne@68 12
jpayne@68 13 public GbffFeature(final ArrayList<byte[]> lines0, final String typeString, final String accessionString){
jpayne@68 14 accession=accessionString;
jpayne@68 15 setType(typeString);
jpayne@68 16 parseSlow(lines0);
jpayne@68 17 if(type==rRNA){
jpayne@68 18 setSubtype();
jpayne@68 19 }
jpayne@68 20 if(stop<start){error=true;}
jpayne@68 21 }
jpayne@68 22
jpayne@68 23 private void parseSlow(final ArrayList<byte[]> lines0){
jpayne@68 24 ArrayList<byte[]> lines=fixLines(lines0);
jpayne@68 25 parseStartStop(lines.get(0));
jpayne@68 26 for(int i=1; i<lines.size(); i++){
jpayne@68 27 byte[] line=lines.get(i);
jpayne@68 28 if(Tools.startsWith(line, "product=")){
jpayne@68 29 product=parseLine(line);
jpayne@68 30 }else if(Tools.startsWith(line, "locus_tag=")){
jpayne@68 31 locus_tag=parseLine(line);
jpayne@68 32 }else if(Tools.equals(line, "pseudo")){
jpayne@68 33 pseudo=true;
jpayne@68 34 }
jpayne@68 35
jpayne@68 36 // else if(Tools.startsWith(line, "ID=")){
jpayne@68 37 // id=parseLine(line);
jpayne@68 38 // }else if(Tools.startsWith(line, "Name=")){
jpayne@68 39 // name=parseLine(line);
jpayne@68 40 // }
jpayne@68 41 }
jpayne@68 42 // System.err.println("\nvvvvv");
jpayne@68 43 // for(byte[] line : lines0){
jpayne@68 44 // System.err.println("'"+new String(line)+"'");
jpayne@68 45 // }
jpayne@68 46 // for(byte[] line : lines){
jpayne@68 47 // System.err.println("'"+new String(line)+"'");
jpayne@68 48 // }
jpayne@68 49 // System.err.println("^^^^^");
jpayne@68 50 }
jpayne@68 51
jpayne@68 52 ArrayList<byte[]> fixLines(ArrayList<byte[]> lines){
jpayne@68 53 ArrayList<byte[]> fixed=new ArrayList<byte[]>();
jpayne@68 54 ByteBuilder bb=new ByteBuilder();
jpayne@68 55 for(byte[] line : lines){
jpayne@68 56 if(bb.length()>0 && line[21]=='/'){
jpayne@68 57 fixed.add(bb.toBytes());
jpayne@68 58 bb.clear();
jpayne@68 59 }
jpayne@68 60 append(bb, line);
jpayne@68 61 }
jpayne@68 62 if(bb.length()>0){
jpayne@68 63 fixed.add(bb.toBytes());
jpayne@68 64 bb.clear();
jpayne@68 65 }
jpayne@68 66 return fixed;
jpayne@68 67 }
jpayne@68 68
jpayne@68 69 void append(ByteBuilder bb, byte[] line){
jpayne@68 70 assert(line[20]==' ');
jpayne@68 71 assert(line.length>21);
jpayne@68 72 // assert(line[21]!=' ') : "'"+new String(line)+"'";
jpayne@68 73 if(line[21]=='/'){
jpayne@68 74 bb.append(line, 22, line.length-22);
jpayne@68 75 }else{
jpayne@68 76 // System.err.println(line.length+", "+21+", "+(line.length-21+1)+"\n'"+new String(line)+"'");
jpayne@68 77 if(bb.length>0){bb.append(' ');}
jpayne@68 78 bb.append(line, 21, line.length-21);
jpayne@68 79 }
jpayne@68 80 }
jpayne@68 81
jpayne@68 82 void setType(String typeString){
jpayne@68 83 int x=Tools.find(typeString, typeStrings);
jpayne@68 84 assert(x>=0) : x+", "+typeString;
jpayne@68 85 type=x;
jpayne@68 86 }
jpayne@68 87
jpayne@68 88 void parseStartStop(final byte[] line0){
jpayne@68 89 byte[] line=line0;
jpayne@68 90
jpayne@68 91 if(line[0]=='c'){
jpayne@68 92 assert(Tools.startsWith(line, "complement("));
jpayne@68 93 line=Arrays.copyOfRange(line, 11, line.length-1);
jpayne@68 94 strand=Shared.MINUS;
jpayne@68 95 }
jpayne@68 96 if(line[0]=='j'){
jpayne@68 97 assert(Tools.startsWith(line, "join("));
jpayne@68 98 line=Arrays.copyOfRange(line, 5, line.length-1);
jpayne@68 99 strand=Shared.MINUS;
jpayne@68 100 }
jpayne@68 101
jpayne@68 102 int i=0;
jpayne@68 103 for(start=0; i<line.length; i++){
jpayne@68 104 int x=line[i];
jpayne@68 105 if(x=='.'){break;}
jpayne@68 106 else if(x!='<'){
jpayne@68 107 if(Tools.isDigit(x)){
jpayne@68 108 start=start*10+(x-'0');
jpayne@68 109 }else{
jpayne@68 110 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));}
jpayne@68 111 error=true;
jpayne@68 112 }
jpayne@68 113 }
jpayne@68 114 }
jpayne@68 115 // while(line[i]=='.'){i++;} //Not needed
jpayne@68 116 for(stop=0; i<line.length; i++){
jpayne@68 117 int x=line[i];
jpayne@68 118 if(x=='.' || x==','){
jpayne@68 119 stop=0;
jpayne@68 120 }else if(x==' '){
jpayne@68 121 //do nothing; line wrap
jpayne@68 122 }else if(x!='>'){
jpayne@68 123 if(Tools.isDigit(x)){
jpayne@68 124 stop=stop*10+(x-'0');
jpayne@68 125 }else{
jpayne@68 126 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));}
jpayne@68 127 error=true;
jpayne@68 128 }
jpayne@68 129 }
jpayne@68 130 }
jpayne@68 131 }
jpayne@68 132
jpayne@68 133 String parseLine(byte[] line){
jpayne@68 134 String[] split=Tools.equalsPattern.split(new String(line));
jpayne@68 135 String s=split[1];
jpayne@68 136 return s.substring(1, s.length()-1);
jpayne@68 137 }
jpayne@68 138
jpayne@68 139 void setSubtype(){
jpayne@68 140 subtype=-1;
jpayne@68 141 if(product==null){return;}
jpayne@68 142 String[] split=Tools.spacePattern.split(product);
jpayne@68 143 subtype=Tools.find(split[0], typeStrings);
jpayne@68 144 // assert(false) : type+", "+subtype+", "+split[0]+", "+this.toString()+"\n"+product;
jpayne@68 145 }
jpayne@68 146
jpayne@68 147 public void toGff(ByteStreamWriter bsw) {
jpayne@68 148 ByteBuilder bb=bsw.getBuffer();
jpayne@68 149 appendGff(bb);
jpayne@68 150 bb.nl();
jpayne@68 151 bsw.flushBuffer(false);
jpayne@68 152 }
jpayne@68 153
jpayne@68 154 public ByteBuilder appendGff(ByteBuilder bb) {
jpayne@68 155 // bsw.print("#seqid source type start end score strand phase attributes\n".getBytes());
jpayne@68 156 bb.append(accession).tab();
jpayne@68 157 bb.append('.').tab();
jpayne@68 158 bb.append((pseudo && type==GENE) ? "pseudogene" : typeStringsGff[type]).tab();
jpayne@68 159 bb.append(start).tab();
jpayne@68 160 bb.append(stop).tab();
jpayne@68 161 bb.append('.').tab();
jpayne@68 162 bb.append(Shared.strandCodes2[strand]).tab();
jpayne@68 163 bb.append('.').tab();
jpayne@68 164
jpayne@68 165 boolean attributes=false;
jpayne@68 166 // if(id!=null){
jpayne@68 167 // bb.append("ID=").append(id);
jpayne@68 168 // attributes=true;
jpayne@68 169 // }
jpayne@68 170 // if(name!=null){
jpayne@68 171 // if(attributes){bb.append(';');}
jpayne@68 172 // bb.append("Name=").append(name);
jpayne@68 173 // attributes=true;
jpayne@68 174 // }
jpayne@68 175 if(product!=null){
jpayne@68 176 if(attributes){bb.append(';');}
jpayne@68 177 bb.append("product=").append(product);
jpayne@68 178 attributes=true;
jpayne@68 179 }
jpayne@68 180 if(locus_tag!=null){
jpayne@68 181 if(attributes){bb.append(';');}
jpayne@68 182 bb.append("locus_tag=").append(locus_tag);
jpayne@68 183 attributes=true;
jpayne@68 184 }
jpayne@68 185 if(subtype>-1){
jpayne@68 186 if(attributes){bb.append(';');}
jpayne@68 187 bb.append("subtype=").append(typeStringsGff[subtype]);
jpayne@68 188 attributes=true;
jpayne@68 189 }
jpayne@68 190 if(!attributes){bb.append('.');}
jpayne@68 191 return bb;
jpayne@68 192 }
jpayne@68 193
jpayne@68 194
jpayne@68 195 @Override
jpayne@68 196 public String toString(){
jpayne@68 197 return appendGff(new ByteBuilder()).toString();
jpayne@68 198 }
jpayne@68 199
jpayne@68 200 public int type=-1;
jpayne@68 201 public int subtype=-1;
jpayne@68 202 //TODO: could have coding amino, for tRNA
jpayne@68 203 public String product;
jpayne@68 204 public String locus_tag;
jpayne@68 205 // public String id;
jpayne@68 206 // public String name;
jpayne@68 207
jpayne@68 208 public int start;
jpayne@68 209 public int stop;
jpayne@68 210 public byte strand=Shared.PLUS;
jpayne@68 211 public String accession;
jpayne@68 212 public boolean pseudo=false;
jpayne@68 213 public boolean error=false;
jpayne@68 214
jpayne@68 215 public static final String[] typeStrings={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region",
jpayne@68 216 "5'UTR", "3'UTR", "intron", "exon", "5S", "16S", "23S"};
jpayne@68 217 public static final String[] typeStringsGff={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region",
jpayne@68 218 "five_prime_UTR", "three_prime_UTR", "intron", "exon", "5S", "16S", "23S"};
jpayne@68 219
jpayne@68 220 //types
jpayne@68 221 public static final int GENE=0, CDS=1, rRNA=2, tRNA=3, ncRNA=4, repeat_region=5, UTR5=6, UTR3=7, intron=8, exon=9;
jpayne@68 222 //subtypes
jpayne@68 223 public static final int r5S=10, r16S=11, r23S=12;
jpayne@68 224
jpayne@68 225 }