Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffFeature.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package gff; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Arrays; | |
5 | |
6 import fileIO.ByteStreamWriter; | |
7 import shared.Shared; | |
8 import shared.Tools; | |
9 import structures.ByteBuilder; | |
10 | |
11 public class GbffFeature { | |
12 | |
13 public GbffFeature(final ArrayList<byte[]> lines0, final String typeString, final String accessionString){ | |
14 accession=accessionString; | |
15 setType(typeString); | |
16 parseSlow(lines0); | |
17 if(type==rRNA){ | |
18 setSubtype(); | |
19 } | |
20 if(stop<start){error=true;} | |
21 } | |
22 | |
23 private void parseSlow(final ArrayList<byte[]> lines0){ | |
24 ArrayList<byte[]> lines=fixLines(lines0); | |
25 parseStartStop(lines.get(0)); | |
26 for(int i=1; i<lines.size(); i++){ | |
27 byte[] line=lines.get(i); | |
28 if(Tools.startsWith(line, "product=")){ | |
29 product=parseLine(line); | |
30 }else if(Tools.startsWith(line, "locus_tag=")){ | |
31 locus_tag=parseLine(line); | |
32 }else if(Tools.equals(line, "pseudo")){ | |
33 pseudo=true; | |
34 } | |
35 | |
36 // else if(Tools.startsWith(line, "ID=")){ | |
37 // id=parseLine(line); | |
38 // }else if(Tools.startsWith(line, "Name=")){ | |
39 // name=parseLine(line); | |
40 // } | |
41 } | |
42 // System.err.println("\nvvvvv"); | |
43 // for(byte[] line : lines0){ | |
44 // System.err.println("'"+new String(line)+"'"); | |
45 // } | |
46 // for(byte[] line : lines){ | |
47 // System.err.println("'"+new String(line)+"'"); | |
48 // } | |
49 // System.err.println("^^^^^"); | |
50 } | |
51 | |
52 ArrayList<byte[]> fixLines(ArrayList<byte[]> lines){ | |
53 ArrayList<byte[]> fixed=new ArrayList<byte[]>(); | |
54 ByteBuilder bb=new ByteBuilder(); | |
55 for(byte[] line : lines){ | |
56 if(bb.length()>0 && line[21]=='/'){ | |
57 fixed.add(bb.toBytes()); | |
58 bb.clear(); | |
59 } | |
60 append(bb, line); | |
61 } | |
62 if(bb.length()>0){ | |
63 fixed.add(bb.toBytes()); | |
64 bb.clear(); | |
65 } | |
66 return fixed; | |
67 } | |
68 | |
69 void append(ByteBuilder bb, byte[] line){ | |
70 assert(line[20]==' '); | |
71 assert(line.length>21); | |
72 // assert(line[21]!=' ') : "'"+new String(line)+"'"; | |
73 if(line[21]=='/'){ | |
74 bb.append(line, 22, line.length-22); | |
75 }else{ | |
76 // System.err.println(line.length+", "+21+", "+(line.length-21+1)+"\n'"+new String(line)+"'"); | |
77 if(bb.length>0){bb.append(' ');} | |
78 bb.append(line, 21, line.length-21); | |
79 } | |
80 } | |
81 | |
82 void setType(String typeString){ | |
83 int x=Tools.find(typeString, typeStrings); | |
84 assert(x>=0) : x+", "+typeString; | |
85 type=x; | |
86 } | |
87 | |
88 void parseStartStop(final byte[] line0){ | |
89 byte[] line=line0; | |
90 | |
91 if(line[0]=='c'){ | |
92 assert(Tools.startsWith(line, "complement(")); | |
93 line=Arrays.copyOfRange(line, 11, line.length-1); | |
94 strand=Shared.MINUS; | |
95 } | |
96 if(line[0]=='j'){ | |
97 assert(Tools.startsWith(line, "join(")); | |
98 line=Arrays.copyOfRange(line, 5, line.length-1); | |
99 strand=Shared.MINUS; | |
100 } | |
101 | |
102 int i=0; | |
103 for(start=0; i<line.length; i++){ | |
104 int x=line[i]; | |
105 if(x=='.'){break;} | |
106 else if(x!='<'){ | |
107 if(Tools.isDigit(x)){ | |
108 start=start*10+(x-'0'); | |
109 }else{ | |
110 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));} | |
111 error=true; | |
112 } | |
113 } | |
114 } | |
115 // while(line[i]=='.'){i++;} //Not needed | |
116 for(stop=0; i<line.length; i++){ | |
117 int x=line[i]; | |
118 if(x=='.' || x==','){ | |
119 stop=0; | |
120 }else if(x==' '){ | |
121 //do nothing; line wrap | |
122 }else if(x!='>'){ | |
123 if(Tools.isDigit(x)){ | |
124 stop=stop*10+(x-'0'); | |
125 }else{ | |
126 //if(!error){System.err.println(new String(line0)+"\n"+new String(line));} | |
127 error=true; | |
128 } | |
129 } | |
130 } | |
131 } | |
132 | |
133 String parseLine(byte[] line){ | |
134 String[] split=Tools.equalsPattern.split(new String(line)); | |
135 String s=split[1]; | |
136 return s.substring(1, s.length()-1); | |
137 } | |
138 | |
139 void setSubtype(){ | |
140 subtype=-1; | |
141 if(product==null){return;} | |
142 String[] split=Tools.spacePattern.split(product); | |
143 subtype=Tools.find(split[0], typeStrings); | |
144 // assert(false) : type+", "+subtype+", "+split[0]+", "+this.toString()+"\n"+product; | |
145 } | |
146 | |
147 public void toGff(ByteStreamWriter bsw) { | |
148 ByteBuilder bb=bsw.getBuffer(); | |
149 appendGff(bb); | |
150 bb.nl(); | |
151 bsw.flushBuffer(false); | |
152 } | |
153 | |
154 public ByteBuilder appendGff(ByteBuilder bb) { | |
155 // bsw.print("#seqid source type start end score strand phase attributes\n".getBytes()); | |
156 bb.append(accession).tab(); | |
157 bb.append('.').tab(); | |
158 bb.append((pseudo && type==GENE) ? "pseudogene" : typeStringsGff[type]).tab(); | |
159 bb.append(start).tab(); | |
160 bb.append(stop).tab(); | |
161 bb.append('.').tab(); | |
162 bb.append(Shared.strandCodes2[strand]).tab(); | |
163 bb.append('.').tab(); | |
164 | |
165 boolean attributes=false; | |
166 // if(id!=null){ | |
167 // bb.append("ID=").append(id); | |
168 // attributes=true; | |
169 // } | |
170 // if(name!=null){ | |
171 // if(attributes){bb.append(';');} | |
172 // bb.append("Name=").append(name); | |
173 // attributes=true; | |
174 // } | |
175 if(product!=null){ | |
176 if(attributes){bb.append(';');} | |
177 bb.append("product=").append(product); | |
178 attributes=true; | |
179 } | |
180 if(locus_tag!=null){ | |
181 if(attributes){bb.append(';');} | |
182 bb.append("locus_tag=").append(locus_tag); | |
183 attributes=true; | |
184 } | |
185 if(subtype>-1){ | |
186 if(attributes){bb.append(';');} | |
187 bb.append("subtype=").append(typeStringsGff[subtype]); | |
188 attributes=true; | |
189 } | |
190 if(!attributes){bb.append('.');} | |
191 return bb; | |
192 } | |
193 | |
194 | |
195 @Override | |
196 public String toString(){ | |
197 return appendGff(new ByteBuilder()).toString(); | |
198 } | |
199 | |
200 public int type=-1; | |
201 public int subtype=-1; | |
202 //TODO: could have coding amino, for tRNA | |
203 public String product; | |
204 public String locus_tag; | |
205 // public String id; | |
206 // public String name; | |
207 | |
208 public int start; | |
209 public int stop; | |
210 public byte strand=Shared.PLUS; | |
211 public String accession; | |
212 public boolean pseudo=false; | |
213 public boolean error=false; | |
214 | |
215 public static final String[] typeStrings={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region", | |
216 "5'UTR", "3'UTR", "intron", "exon", "5S", "16S", "23S"}; | |
217 public static final String[] typeStringsGff={"gene", "CDS", "rRNA", "tRNA", "ncRNA", "repeat_region", | |
218 "five_prime_UTR", "three_prime_UTR", "intron", "exon", "5S", "16S", "23S"}; | |
219 | |
220 //types | |
221 public static final int GENE=0, CDS=1, rRNA=2, tRNA=3, ncRNA=4, repeat_region=5, UTR5=6, UTR3=7, intron=8, exon=9; | |
222 //subtypes | |
223 public static final int r5S=10, r16S=11, r23S=12; | |
224 | |
225 } |