jpayne@68
|
1 package gff;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4
|
jpayne@68
|
5 import fileIO.ByteStreamWriter;
|
jpayne@68
|
6 import shared.Tools;
|
jpayne@68
|
7
|
jpayne@68
|
8 public class GbffLocus {
|
jpayne@68
|
9
|
jpayne@68
|
10 public GbffLocus(ArrayList<byte[]> lines) {
|
jpayne@68
|
11 while(num<lines.size()){
|
jpayne@68
|
12 parseBlock(lines);
|
jpayne@68
|
13 }
|
jpayne@68
|
14 }
|
jpayne@68
|
15
|
jpayne@68
|
16 int parseBlock(ArrayList<byte[]> lines){
|
jpayne@68
|
17 byte[] line=lines.get(num);
|
jpayne@68
|
18 if(Tools.startsWith(line, " ")){
|
jpayne@68
|
19 assert(false) : line;
|
jpayne@68
|
20 num++;
|
jpayne@68
|
21 }else if(Tools.startsWith(line, "LOCUS ")){
|
jpayne@68
|
22 parseLocus(lines);
|
jpayne@68
|
23 }else if(Tools.startsWith(line, "DEFINITION ")){
|
jpayne@68
|
24 parseDefinition(lines);
|
jpayne@68
|
25 }else if(Tools.startsWith(line, "ACCESSION ")){
|
jpayne@68
|
26 parseAccession(lines);
|
jpayne@68
|
27 }else if(Tools.startsWith(line, "VERSION ")){
|
jpayne@68
|
28 parseVersion(lines);
|
jpayne@68
|
29 }else if(Tools.startsWith(line, "DBLINK ")){
|
jpayne@68
|
30 parseDBLink(lines);
|
jpayne@68
|
31 }else if(Tools.startsWith(line, "KEYWORDS ")){
|
jpayne@68
|
32 parseKeywords(lines);
|
jpayne@68
|
33 }else if(Tools.startsWith(line, "SOURCE ")){
|
jpayne@68
|
34 parseSource(lines);
|
jpayne@68
|
35 }else if(Tools.startsWith(line, "REFERENCE ")){
|
jpayne@68
|
36 parseReference(lines);
|
jpayne@68
|
37 }else if(Tools.startsWith(line, "COMMENT ")){
|
jpayne@68
|
38 parseComment(lines);
|
jpayne@68
|
39 }else if(Tools.startsWith(line, "FEATURES ")){
|
jpayne@68
|
40 parseFeatures(lines);
|
jpayne@68
|
41 }else if(Tools.startsWith(line, "CONTIG ")){
|
jpayne@68
|
42 parseContig(lines);
|
jpayne@68
|
43 }else if(Tools.startsWith(line, "ORIGIN ")){
|
jpayne@68
|
44 parseOrigin(lines);
|
jpayne@68
|
45 }else if(Tools.startsWith(line, "PRIMARY ")){
|
jpayne@68
|
46 parsePrimary(lines);
|
jpayne@68
|
47 }else{
|
jpayne@68
|
48 assert(false) : "Unhandled block type: "+new String(line);
|
jpayne@68
|
49 }
|
jpayne@68
|
50 return num;
|
jpayne@68
|
51 }
|
jpayne@68
|
52
|
jpayne@68
|
53 private byte[] nextLine(ArrayList<byte[]> lines){
|
jpayne@68
|
54 byte[] line=null;
|
jpayne@68
|
55 for(final int lim=lines.size()-1; num<lim && (line==null || line.length==0); ){
|
jpayne@68
|
56 // System.err.println(num+", "+lim);
|
jpayne@68
|
57 num++;
|
jpayne@68
|
58 line=lines.get(num);
|
jpayne@68
|
59 }
|
jpayne@68
|
60 // System.err.println(line);
|
jpayne@68
|
61 // assert(line!=null);
|
jpayne@68
|
62 return line;
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 private byte[] getLine(ArrayList<byte[]> lines){
|
jpayne@68
|
66 return num>=lines.size() ? null : lines.get(num);
|
jpayne@68
|
67 }
|
jpayne@68
|
68
|
jpayne@68
|
69 /** Move pointer to next block start */
|
jpayne@68
|
70 private int advanceBlock(ArrayList<byte[]> lines){
|
jpayne@68
|
71 for(num++; num<lines.size(); num++){
|
jpayne@68
|
72 byte[] line=lines.get(num);
|
jpayne@68
|
73 if(line!=null && line.length>0 && line[0]!=' '){break;}
|
jpayne@68
|
74 }
|
jpayne@68
|
75 return num;
|
jpayne@68
|
76 }
|
jpayne@68
|
77
|
jpayne@68
|
78 /** Move pointer to next block start */
|
jpayne@68
|
79 private int advanceFeature(ArrayList<byte[]> lines){
|
jpayne@68
|
80 for(num++; num<lines.size(); num++){
|
jpayne@68
|
81 byte[] line=lines.get(num);
|
jpayne@68
|
82 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){break;}
|
jpayne@68
|
83 }
|
jpayne@68
|
84 return num;
|
jpayne@68
|
85 }
|
jpayne@68
|
86
|
jpayne@68
|
87 private String trimBlockName(byte[] line){
|
jpayne@68
|
88 assert(line.length>=12 && line[11]==' ') : new String(line);
|
jpayne@68
|
89 return new String(line, 12, line.length-12);
|
jpayne@68
|
90 }
|
jpayne@68
|
91
|
jpayne@68
|
92 private String toFeatureType(byte[] line){
|
jpayne@68
|
93 assert(line[4]==' ');
|
jpayne@68
|
94 assert(line[5]!=' ');
|
jpayne@68
|
95 assert(line[20]==' ');
|
jpayne@68
|
96 int start=5, stop=6;
|
jpayne@68
|
97 for(; stop<21 && line[stop]!=' '; stop++){}
|
jpayne@68
|
98 return new String(line, start, stop-start);
|
jpayne@68
|
99 }
|
jpayne@68
|
100
|
jpayne@68
|
101 private int parseLocus(ArrayList<byte[]> lines){
|
jpayne@68
|
102 byte[] line=lines.get(num);
|
jpayne@68
|
103 // assert(Tools.startsWith(line, "LOCUS")) : new String(line);
|
jpayne@68
|
104 if(accession==null){
|
jpayne@68
|
105 String s=trimBlockName(line);
|
jpayne@68
|
106 String[] split=Tools.whitespacePlus.split(s);
|
jpayne@68
|
107 accession=split.length>0 ? split[0] : null;
|
jpayne@68
|
108 }
|
jpayne@68
|
109 return advanceBlock(lines);
|
jpayne@68
|
110 }
|
jpayne@68
|
111
|
jpayne@68
|
112 private int parseDefinition(ArrayList<byte[]> lines){
|
jpayne@68
|
113 byte[] line=lines.get(num);
|
jpayne@68
|
114 if(organism==null){
|
jpayne@68
|
115 String s=trimBlockName(line);
|
jpayne@68
|
116 String[] split=Tools.commaPattern.split(s);
|
jpayne@68
|
117 organism=split.length>0 ? split[0] : null;
|
jpayne@68
|
118 }
|
jpayne@68
|
119 return advanceBlock(lines);
|
jpayne@68
|
120 }
|
jpayne@68
|
121
|
jpayne@68
|
122 private int parseAccession(ArrayList<byte[]> lines){
|
jpayne@68
|
123 byte[] line=lines.get(num);
|
jpayne@68
|
124 if(accession==null){
|
jpayne@68
|
125 String s=trimBlockName(line);
|
jpayne@68
|
126 String[] split=Tools.whitespacePlus.split(s);
|
jpayne@68
|
127 accession=split.length>0 ? split[0] : null;
|
jpayne@68
|
128 }
|
jpayne@68
|
129 return advanceBlock(lines);
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 private int parseVersion(ArrayList<byte[]> lines){
|
jpayne@68
|
133 byte[] line=lines.get(num);
|
jpayne@68
|
134 String s=trimBlockName(line);
|
jpayne@68
|
135 String[] split=Tools.whitespacePlus.split(s);
|
jpayne@68
|
136 s=split.length>0 ? split[0] : null;
|
jpayne@68
|
137 if(accession==null || (s!=null && s.length()>1)){
|
jpayne@68
|
138 accession=s;
|
jpayne@68
|
139 }
|
jpayne@68
|
140 return advanceBlock(lines);
|
jpayne@68
|
141 }
|
jpayne@68
|
142
|
jpayne@68
|
143 private int parseDBLink(ArrayList<byte[]> lines){
|
jpayne@68
|
144 return advanceBlock(lines);
|
jpayne@68
|
145 }
|
jpayne@68
|
146
|
jpayne@68
|
147 private int parseKeywords(ArrayList<byte[]> lines){
|
jpayne@68
|
148 return advanceBlock(lines);
|
jpayne@68
|
149 }
|
jpayne@68
|
150
|
jpayne@68
|
151 private int parseSource(ArrayList<byte[]> lines){
|
jpayne@68
|
152 byte[] line=lines.get(num);
|
jpayne@68
|
153 if(species==null){
|
jpayne@68
|
154 species=trimBlockName(line);
|
jpayne@68
|
155 }
|
jpayne@68
|
156 return advanceBlock(lines);
|
jpayne@68
|
157 }
|
jpayne@68
|
158
|
jpayne@68
|
159 private int parseReference(ArrayList<byte[]> lines){
|
jpayne@68
|
160 return advanceBlock(lines);
|
jpayne@68
|
161 }
|
jpayne@68
|
162
|
jpayne@68
|
163 private int parseComment(ArrayList<byte[]> lines){
|
jpayne@68
|
164 return advanceBlock(lines);
|
jpayne@68
|
165 }
|
jpayne@68
|
166
|
jpayne@68
|
167 private int parseFeatures(ArrayList<byte[]> lines){
|
jpayne@68
|
168 for(byte[] line=nextLine(lines); line!=null && line[0]==' '; line=getLine(lines)){
|
jpayne@68
|
169 // System.err.println(num+": "+new String(line));
|
jpayne@68
|
170 String type=toFeatureType(line);
|
jpayne@68
|
171 int idx=Tools.find(type, featureTypes);
|
jpayne@68
|
172 // System.err.println("idx="+idx+" for '"+type+"'");
|
jpayne@68
|
173 if(idx>=0){
|
jpayne@68
|
174 // System.err.println("parseFeature");
|
jpayne@68
|
175 parseFeature(lines, type);
|
jpayne@68
|
176 // System.err.println(features.get(features.size()-1));
|
jpayne@68
|
177 }else{
|
jpayne@68
|
178 // System.err.println("advanceFeature");
|
jpayne@68
|
179 advanceFeature(lines);
|
jpayne@68
|
180 }
|
jpayne@68
|
181 }
|
jpayne@68
|
182 return num;
|
jpayne@68
|
183 }
|
jpayne@68
|
184
|
jpayne@68
|
185 /** Move pointer to next block start */
|
jpayne@68
|
186 private int parseFeature(ArrayList<byte[]> lines, String type){
|
jpayne@68
|
187 ArrayList<byte[]> flist=new ArrayList<byte[]>();
|
jpayne@68
|
188 flist.add(lines.get(num));
|
jpayne@68
|
189 for(num++; num<lines.size(); num++){
|
jpayne@68
|
190 byte[] line=lines.get(num);
|
jpayne@68
|
191 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){
|
jpayne@68
|
192 // assert(false) : Character.toString(line[0])+", "+Character.toString(line[5])+", "+Character.toString(line[6])+"\n"+new String(line);
|
jpayne@68
|
193 break;
|
jpayne@68
|
194 }
|
jpayne@68
|
195 flist.add(line);
|
jpayne@68
|
196 }
|
jpayne@68
|
197 GbffFeature f=new GbffFeature(flist, type, accession);
|
jpayne@68
|
198 if(!f.error){
|
jpayne@68
|
199 features.add(f);
|
jpayne@68
|
200 }else{
|
jpayne@68
|
201 // System.err.println("Failed to parse feature "+f);
|
jpayne@68
|
202 }
|
jpayne@68
|
203 return num;
|
jpayne@68
|
204 }
|
jpayne@68
|
205
|
jpayne@68
|
206 private int parseContig(ArrayList<byte[]> lines){
|
jpayne@68
|
207 return advanceBlock(lines);
|
jpayne@68
|
208 }
|
jpayne@68
|
209
|
jpayne@68
|
210 private int parseOrigin(ArrayList<byte[]> lines){
|
jpayne@68
|
211 return advanceBlock(lines);
|
jpayne@68
|
212 }
|
jpayne@68
|
213
|
jpayne@68
|
214 private int parsePrimary(ArrayList<byte[]> lines){
|
jpayne@68
|
215 return advanceBlock(lines);
|
jpayne@68
|
216 }
|
jpayne@68
|
217
|
jpayne@68
|
218 public void toGff(ByteStreamWriter bsw) {
|
jpayne@68
|
219 final byte[] accessionB=accession.getBytes();
|
jpayne@68
|
220 bsw.print(seqRegB);
|
jpayne@68
|
221 bsw.print(accessionB);
|
jpayne@68
|
222 if(start>0 && stop>0){
|
jpayne@68
|
223 bsw.print(' ').print(start).print(' ').print(stop);
|
jpayne@68
|
224 }
|
jpayne@68
|
225 bsw.println();
|
jpayne@68
|
226 for(GbffFeature f : features){
|
jpayne@68
|
227 if(f.type==GbffFeature.CDS || f.type==GbffFeature.tRNA || f.type==GbffFeature.rRNA){
|
jpayne@68
|
228 if(!f.pseudo && !f.error){
|
jpayne@68
|
229 f.toGff(bsw);
|
jpayne@68
|
230 }
|
jpayne@68
|
231 }
|
jpayne@68
|
232 }
|
jpayne@68
|
233 }
|
jpayne@68
|
234
|
jpayne@68
|
235
|
jpayne@68
|
236 /** Line number */
|
jpayne@68
|
237 int num=0;
|
jpayne@68
|
238
|
jpayne@68
|
239 boolean printGene=false;
|
jpayne@68
|
240 boolean printRepeat=false;
|
jpayne@68
|
241
|
jpayne@68
|
242 public static String[] featureTypes=GbffFeature.typeStrings;
|
jpayne@68
|
243 private static final byte[] seqRegB="##sequence-region ".getBytes();
|
jpayne@68
|
244
|
jpayne@68
|
245 String accession;
|
jpayne@68
|
246 String organism;
|
jpayne@68
|
247 String species;
|
jpayne@68
|
248 int start;
|
jpayne@68
|
249 int stop;
|
jpayne@68
|
250 ArrayList<GbffFeature> features=new ArrayList<GbffFeature>();
|
jpayne@68
|
251 }
|