Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffLocus.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package gff; | |
2 | |
3 import java.util.ArrayList; | |
4 | |
5 import fileIO.ByteStreamWriter; | |
6 import shared.Tools; | |
7 | |
8 public class GbffLocus { | |
9 | |
10 public GbffLocus(ArrayList<byte[]> lines) { | |
11 while(num<lines.size()){ | |
12 parseBlock(lines); | |
13 } | |
14 } | |
15 | |
16 int parseBlock(ArrayList<byte[]> lines){ | |
17 byte[] line=lines.get(num); | |
18 if(Tools.startsWith(line, " ")){ | |
19 assert(false) : line; | |
20 num++; | |
21 }else if(Tools.startsWith(line, "LOCUS ")){ | |
22 parseLocus(lines); | |
23 }else if(Tools.startsWith(line, "DEFINITION ")){ | |
24 parseDefinition(lines); | |
25 }else if(Tools.startsWith(line, "ACCESSION ")){ | |
26 parseAccession(lines); | |
27 }else if(Tools.startsWith(line, "VERSION ")){ | |
28 parseVersion(lines); | |
29 }else if(Tools.startsWith(line, "DBLINK ")){ | |
30 parseDBLink(lines); | |
31 }else if(Tools.startsWith(line, "KEYWORDS ")){ | |
32 parseKeywords(lines); | |
33 }else if(Tools.startsWith(line, "SOURCE ")){ | |
34 parseSource(lines); | |
35 }else if(Tools.startsWith(line, "REFERENCE ")){ | |
36 parseReference(lines); | |
37 }else if(Tools.startsWith(line, "COMMENT ")){ | |
38 parseComment(lines); | |
39 }else if(Tools.startsWith(line, "FEATURES ")){ | |
40 parseFeatures(lines); | |
41 }else if(Tools.startsWith(line, "CONTIG ")){ | |
42 parseContig(lines); | |
43 }else if(Tools.startsWith(line, "ORIGIN ")){ | |
44 parseOrigin(lines); | |
45 }else if(Tools.startsWith(line, "PRIMARY ")){ | |
46 parsePrimary(lines); | |
47 }else{ | |
48 assert(false) : "Unhandled block type: "+new String(line); | |
49 } | |
50 return num; | |
51 } | |
52 | |
53 private byte[] nextLine(ArrayList<byte[]> lines){ | |
54 byte[] line=null; | |
55 for(final int lim=lines.size()-1; num<lim && (line==null || line.length==0); ){ | |
56 // System.err.println(num+", "+lim); | |
57 num++; | |
58 line=lines.get(num); | |
59 } | |
60 // System.err.println(line); | |
61 // assert(line!=null); | |
62 return line; | |
63 } | |
64 | |
65 private byte[] getLine(ArrayList<byte[]> lines){ | |
66 return num>=lines.size() ? null : lines.get(num); | |
67 } | |
68 | |
69 /** Move pointer to next block start */ | |
70 private int advanceBlock(ArrayList<byte[]> lines){ | |
71 for(num++; num<lines.size(); num++){ | |
72 byte[] line=lines.get(num); | |
73 if(line!=null && line.length>0 && line[0]!=' '){break;} | |
74 } | |
75 return num; | |
76 } | |
77 | |
78 /** Move pointer to next block start */ | |
79 private int advanceFeature(ArrayList<byte[]> lines){ | |
80 for(num++; num<lines.size(); num++){ | |
81 byte[] line=lines.get(num); | |
82 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){break;} | |
83 } | |
84 return num; | |
85 } | |
86 | |
87 private String trimBlockName(byte[] line){ | |
88 assert(line.length>=12 && line[11]==' ') : new String(line); | |
89 return new String(line, 12, line.length-12); | |
90 } | |
91 | |
92 private String toFeatureType(byte[] line){ | |
93 assert(line[4]==' '); | |
94 assert(line[5]!=' '); | |
95 assert(line[20]==' '); | |
96 int start=5, stop=6; | |
97 for(; stop<21 && line[stop]!=' '; stop++){} | |
98 return new String(line, start, stop-start); | |
99 } | |
100 | |
101 private int parseLocus(ArrayList<byte[]> lines){ | |
102 byte[] line=lines.get(num); | |
103 // assert(Tools.startsWith(line, "LOCUS")) : new String(line); | |
104 if(accession==null){ | |
105 String s=trimBlockName(line); | |
106 String[] split=Tools.whitespacePlus.split(s); | |
107 accession=split.length>0 ? split[0] : null; | |
108 } | |
109 return advanceBlock(lines); | |
110 } | |
111 | |
112 private int parseDefinition(ArrayList<byte[]> lines){ | |
113 byte[] line=lines.get(num); | |
114 if(organism==null){ | |
115 String s=trimBlockName(line); | |
116 String[] split=Tools.commaPattern.split(s); | |
117 organism=split.length>0 ? split[0] : null; | |
118 } | |
119 return advanceBlock(lines); | |
120 } | |
121 | |
122 private int parseAccession(ArrayList<byte[]> lines){ | |
123 byte[] line=lines.get(num); | |
124 if(accession==null){ | |
125 String s=trimBlockName(line); | |
126 String[] split=Tools.whitespacePlus.split(s); | |
127 accession=split.length>0 ? split[0] : null; | |
128 } | |
129 return advanceBlock(lines); | |
130 } | |
131 | |
132 private int parseVersion(ArrayList<byte[]> lines){ | |
133 byte[] line=lines.get(num); | |
134 String s=trimBlockName(line); | |
135 String[] split=Tools.whitespacePlus.split(s); | |
136 s=split.length>0 ? split[0] : null; | |
137 if(accession==null || (s!=null && s.length()>1)){ | |
138 accession=s; | |
139 } | |
140 return advanceBlock(lines); | |
141 } | |
142 | |
143 private int parseDBLink(ArrayList<byte[]> lines){ | |
144 return advanceBlock(lines); | |
145 } | |
146 | |
147 private int parseKeywords(ArrayList<byte[]> lines){ | |
148 return advanceBlock(lines); | |
149 } | |
150 | |
151 private int parseSource(ArrayList<byte[]> lines){ | |
152 byte[] line=lines.get(num); | |
153 if(species==null){ | |
154 species=trimBlockName(line); | |
155 } | |
156 return advanceBlock(lines); | |
157 } | |
158 | |
159 private int parseReference(ArrayList<byte[]> lines){ | |
160 return advanceBlock(lines); | |
161 } | |
162 | |
163 private int parseComment(ArrayList<byte[]> lines){ | |
164 return advanceBlock(lines); | |
165 } | |
166 | |
167 private int parseFeatures(ArrayList<byte[]> lines){ | |
168 for(byte[] line=nextLine(lines); line!=null && line[0]==' '; line=getLine(lines)){ | |
169 // System.err.println(num+": "+new String(line)); | |
170 String type=toFeatureType(line); | |
171 int idx=Tools.find(type, featureTypes); | |
172 // System.err.println("idx="+idx+" for '"+type+"'"); | |
173 if(idx>=0){ | |
174 // System.err.println("parseFeature"); | |
175 parseFeature(lines, type); | |
176 // System.err.println(features.get(features.size()-1)); | |
177 }else{ | |
178 // System.err.println("advanceFeature"); | |
179 advanceFeature(lines); | |
180 } | |
181 } | |
182 return num; | |
183 } | |
184 | |
185 /** Move pointer to next block start */ | |
186 private int parseFeature(ArrayList<byte[]> lines, String type){ | |
187 ArrayList<byte[]> flist=new ArrayList<byte[]>(); | |
188 flist.add(lines.get(num)); | |
189 for(num++; num<lines.size(); num++){ | |
190 byte[] line=lines.get(num); | |
191 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){ | |
192 // assert(false) : Character.toString(line[0])+", "+Character.toString(line[5])+", "+Character.toString(line[6])+"\n"+new String(line); | |
193 break; | |
194 } | |
195 flist.add(line); | |
196 } | |
197 GbffFeature f=new GbffFeature(flist, type, accession); | |
198 if(!f.error){ | |
199 features.add(f); | |
200 }else{ | |
201 // System.err.println("Failed to parse feature "+f); | |
202 } | |
203 return num; | |
204 } | |
205 | |
206 private int parseContig(ArrayList<byte[]> lines){ | |
207 return advanceBlock(lines); | |
208 } | |
209 | |
210 private int parseOrigin(ArrayList<byte[]> lines){ | |
211 return advanceBlock(lines); | |
212 } | |
213 | |
214 private int parsePrimary(ArrayList<byte[]> lines){ | |
215 return advanceBlock(lines); | |
216 } | |
217 | |
218 public void toGff(ByteStreamWriter bsw) { | |
219 final byte[] accessionB=accession.getBytes(); | |
220 bsw.print(seqRegB); | |
221 bsw.print(accessionB); | |
222 if(start>0 && stop>0){ | |
223 bsw.print(' ').print(start).print(' ').print(stop); | |
224 } | |
225 bsw.println(); | |
226 for(GbffFeature f : features){ | |
227 if(f.type==GbffFeature.CDS || f.type==GbffFeature.tRNA || f.type==GbffFeature.rRNA){ | |
228 if(!f.pseudo && !f.error){ | |
229 f.toGff(bsw); | |
230 } | |
231 } | |
232 } | |
233 } | |
234 | |
235 | |
236 /** Line number */ | |
237 int num=0; | |
238 | |
239 boolean printGene=false; | |
240 boolean printRepeat=false; | |
241 | |
242 public static String[] featureTypes=GbffFeature.typeStrings; | |
243 private static final byte[] seqRegB="##sequence-region ".getBytes(); | |
244 | |
245 String accession; | |
246 String organism; | |
247 String species; | |
248 int start; | |
249 int stop; | |
250 ArrayList<GbffFeature> features=new ArrayList<GbffFeature>(); | |
251 } |