comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffFeature.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package gff;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5
6 import fileIO.ByteStreamWriter;
7 import shared.Shared;
8 import shared.Tools;
9 import structures.ByteBuilder;
10
11 public class GbffFeature {
12
13 public GbffFeature(final ArrayList<byte[]> lines0, final String typeString, final String accessionString){
14 accession=accessionString;
15 setType(typeString);
16 parseSlow(lines0);
17 if(type==rRNA){
18 setSubtype();
19 }
20 if(stop<start){error=true;}
21 }
22
23 private void parseSlow(final ArrayList<byte[]> lines0){
24 ArrayList<byte[]> lines=fixLines(lines0);
25 parseStartStop(lines.get(0));
26 for(int i=1; i<lines.size(); i++){
27 byte[] line=lines.get(i);
28 if(Tools.startsWith(line, "product=")){
29 product=parseLine(line);
30 }else if(Tools.startsWith(line, "locus_tag=")){
31 locus_tag=parseLine(line);
32 }else if(Tools.equals(line, "pseudo")){
33 pseudo=true;
34 }
35
36 // else if(Tools.startsWith(line, "ID=")){
37 // id=parseLine(line);
38 // }else if(Tools.startsWith(line, "Name=")){
39 // name=parseLine(line);
40 // }
41 }
42 // System.err.println("\nvvvvv");
43 // for(byte[] line : lines0){
44 // System.err.println("'"+new String(line)+"'");
45 // }
46 // for(byte[] line : lines){
47 // System.err.println("'"+new String(line)+"'");
48 // }
49 // System.err.println("^^^^^");
50 }
51
52 ArrayList<byte[]> fixLines(ArrayList<byte[]> lines){
53 ArrayList<byte[]> fixed=new ArrayList<byte[]>();
54 ByteBuilder bb=new ByteBuilder();
55 for(byte[] line : lines){
56 if(bb.length()>0 && line[21]=='/'){
57 fixed.add(bb.toBytes());
58 bb.clear();
59 }
60 append(bb, line);
61 }
62 if(bb.length()>0){
63 fixed.add(bb.toBytes());
64 bb.clear();
65 }
66 return fixed;
67 }
68
69 void append(ByteBuilder bb, byte[] line){
70 assert(line[20]==' ');
71 assert(line.length>21);
72 // assert(line[21]!=' ') : "'"+new String(line)+"'";
73 if(line[21]=='/'){
74 bb.append(line, 22, line.length-22);
75 }else{
76 // System.err.println(line.length+", "+21+", "+(line.length-21+1)+"\n'"+new String(line)+"'");
77 if(bb.length>0){bb.append(' ');}
78 bb.append(line, 21, line.length-21);
79 }
80 }
81
82 void setType(String typeString){
83 int x=Tools.find(typeString, typeStrings);
84 assert(x>=0) : x+", "+typeString;
85 type=x;
86 }
87
88 void parseStartStop(final byte[] line0){
89 byte[] line=line0;
90
91 if(line[0]=='c'){
92 assert(Tools.startsWith(line, "complement("));
93 line=Arrays.copyOfRange(line, 11, line.length-1);
94 strand=Shared.MINUS;
95 }
96 if(line[0]=='j'){
97 assert(Tools.startsWith(line, "join("));
98 line=Arrays.copyOfRange(line, 5, line.length-1);
99 strand=Shared.MINUS;
100 }
101
102 int i=0;
103 for(start=0; i<line.length; i++){
104 int x=line[i];
105 if(x=='.'){break;}
106 else if(x!='<'){
107 if(Tools.isDigit(x)){
108 start=start*10+(x-'0');
109 }else{
110 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));}
111 error=true;
112 }
113 }
114 }
115 // while(line[i]=='.'){i++;} //Not needed
116 for(stop=0; i<line.length; i++){
117 int x=line[i];
118 if(x=='.' || x==','){
119 stop=0;
120 }else if(x==' '){
121 //do nothing; line wrap
122 }else if(x!='>'){
123 if(Tools.isDigit(x)){
124 stop=stop*10+(x-'0');
125 }else{
126 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));}
127 error=true;
128 }
129 }
130 }
131 }
132
133 String parseLine(byte[] line){
134 String[] split=Tools.equalsPattern.split(new String(line));
135 String s=split[1];
136 return s.substring(1, s.length()-1);
137 }
138
139 void setSubtype(){
140 subtype=-1;
141 if(product==null){return;}
142 String[] split=Tools.spacePattern.split(product);
143 subtype=Tools.find(split[0], typeStrings);
144 // assert(false) : type+", "+subtype+", "+split[0]+", "+this.toString()+"\n"+product;
145 }
146
147 public void toGff(ByteStreamWriter bsw) {
148 ByteBuilder bb=bsw.getBuffer();
149 appendGff(bb);
150 bb.nl();
151 bsw.flushBuffer(false);
152 }
153
154 public ByteBuilder appendGff(ByteBuilder bb) {
155 // bsw.print("#seqid source type start end score strand phase attributes\n".getBytes());
156 bb.append(accession).tab();
157 bb.append('.').tab();
158 bb.append((pseudo && type==GENE) ? "pseudogene" : typeStringsGff[type]).tab();
159 bb.append(start).tab();
160 bb.append(stop).tab();
161 bb.append('.').tab();
162 bb.append(Shared.strandCodes2[strand]).tab();
163 bb.append('.').tab();
164
165 boolean attributes=false;
166 // if(id!=null){
167 // bb.append("ID=").append(id);
168 // attributes=true;
169 // }
170 // if(name!=null){
171 // if(attributes){bb.append(';');}
172 // bb.append("Name=").append(name);
173 // attributes=true;
174 // }
175 if(product!=null){
176 if(attributes){bb.append(';');}
177 bb.append("product=").append(product);
178 attributes=true;
179 }
180 if(locus_tag!=null){
181 if(attributes){bb.append(';');}
182 bb.append("locus_tag=").append(locus_tag);
183 attributes=true;
184 }
185 if(subtype>-1){
186 if(attributes){bb.append(';');}
187 bb.append("subtype=").append(typeStringsGff[subtype]);
188 attributes=true;
189 }
190 if(!attributes){bb.append('.');}
191 return bb;
192 }
193
194
195 @Override
196 public String toString(){
197 return appendGff(new ByteBuilder()).toString();
198 }
199
200 public int type=-1;
201 public int subtype=-1;
202 //TODO: could have coding amino, for tRNA
203 public String product;
204 public String locus_tag;
205 // public String id;
206 // public String name;
207
208 public int start;
209 public int stop;
210 public byte strand=Shared.PLUS;
211 public String accession;
212 public boolean pseudo=false;
213 public boolean error=false;
214
215 public static final String[] typeStrings={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region",
216 "5'UTR", "3'UTR", "intron", "exon", "5S", "16S", "23S"};
217 public static final String[] typeStringsGff={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region",
218 "five_prime_UTR", "three_prime_UTR", "intron", "exon", "5S", "16S", "23S"};
219
220 //types
221 public static final int GENE=0, CDS=1, rRNA=2, tRNA=3, ncRNA=4, repeat_region=5, UTR5=6, UTR3=7, intron=8, exon=9;
222 //subtypes
223 public static final int r5S=10, r16S=11, r23S=12;
224
225 }