comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffLocus.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package gff;
2
3 import java.util.ArrayList;
4
5 import fileIO.ByteStreamWriter;
6 import shared.Tools;
7
8 public class GbffLocus {
9
10 public GbffLocus(ArrayList<byte[]> lines) {
11 while(num<lines.size()){
12 parseBlock(lines);
13 }
14 }
15
16 int parseBlock(ArrayList<byte[]> lines){
17 byte[] line=lines.get(num);
18 if(Tools.startsWith(line, " ")){
19 assert(false) : line;
20 num++;
21 }else if(Tools.startsWith(line, "LOCUS ")){
22 parseLocus(lines);
23 }else if(Tools.startsWith(line, "DEFINITION ")){
24 parseDefinition(lines);
25 }else if(Tools.startsWith(line, "ACCESSION ")){
26 parseAccession(lines);
27 }else if(Tools.startsWith(line, "VERSION ")){
28 parseVersion(lines);
29 }else if(Tools.startsWith(line, "DBLINK ")){
30 parseDBLink(lines);
31 }else if(Tools.startsWith(line, "KEYWORDS ")){
32 parseKeywords(lines);
33 }else if(Tools.startsWith(line, "SOURCE ")){
34 parseSource(lines);
35 }else if(Tools.startsWith(line, "REFERENCE ")){
36 parseReference(lines);
37 }else if(Tools.startsWith(line, "COMMENT ")){
38 parseComment(lines);
39 }else if(Tools.startsWith(line, "FEATURES ")){
40 parseFeatures(lines);
41 }else if(Tools.startsWith(line, "CONTIG ")){
42 parseContig(lines);
43 }else if(Tools.startsWith(line, "ORIGIN ")){
44 parseOrigin(lines);
45 }else if(Tools.startsWith(line, "PRIMARY ")){
46 parsePrimary(lines);
47 }else{
48 assert(false) : "Unhandled block type: "+new String(line);
49 }
50 return num;
51 }
52
53 private byte[] nextLine(ArrayList<byte[]> lines){
54 byte[] line=null;
55 for(final int lim=lines.size()-1; num<lim && (line==null || line.length==0); ){
56 // System.err.println(num+", "+lim);
57 num++;
58 line=lines.get(num);
59 }
60 // System.err.println(line);
61 // assert(line!=null);
62 return line;
63 }
64
65 private byte[] getLine(ArrayList<byte[]> lines){
66 return num>=lines.size() ? null : lines.get(num);
67 }
68
69 /** Move pointer to next block start */
70 private int advanceBlock(ArrayList<byte[]> lines){
71 for(num++; num<lines.size(); num++){
72 byte[] line=lines.get(num);
73 if(line!=null && line.length>0 && line[0]!=' '){break;}
74 }
75 return num;
76 }
77
78 /** Move pointer to next block start */
79 private int advanceFeature(ArrayList<byte[]> lines){
80 for(num++; num<lines.size(); num++){
81 byte[] line=lines.get(num);
82 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){break;}
83 }
84 return num;
85 }
86
87 private String trimBlockName(byte[] line){
88 assert(line.length>=12 && line[11]==' ') : new String(line);
89 return new String(line, 12, line.length-12);
90 }
91
92 private String toFeatureType(byte[] line){
93 assert(line[4]==' ');
94 assert(line[5]!=' ');
95 assert(line[20]==' ');
96 int start=5, stop=6;
97 for(; stop<21 && line[stop]!=' '; stop++){}
98 return new String(line, start, stop-start);
99 }
100
101 private int parseLocus(ArrayList<byte[]> lines){
102 byte[] line=lines.get(num);
103 // assert(Tools.startsWith(line, "LOCUS")) : new String(line);
104 if(accession==null){
105 String s=trimBlockName(line);
106 String[] split=Tools.whitespacePlus.split(s);
107 accession=split.length>0 ? split[0] : null;
108 }
109 return advanceBlock(lines);
110 }
111
112 private int parseDefinition(ArrayList<byte[]> lines){
113 byte[] line=lines.get(num);
114 if(organism==null){
115 String s=trimBlockName(line);
116 String[] split=Tools.commaPattern.split(s);
117 organism=split.length>0 ? split[0] : null;
118 }
119 return advanceBlock(lines);
120 }
121
122 private int parseAccession(ArrayList<byte[]> lines){
123 byte[] line=lines.get(num);
124 if(accession==null){
125 String s=trimBlockName(line);
126 String[] split=Tools.whitespacePlus.split(s);
127 accession=split.length>0 ? split[0] : null;
128 }
129 return advanceBlock(lines);
130 }
131
132 private int parseVersion(ArrayList<byte[]> lines){
133 byte[] line=lines.get(num);
134 String s=trimBlockName(line);
135 String[] split=Tools.whitespacePlus.split(s);
136 s=split.length>0 ? split[0] : null;
137 if(accession==null || (s!=null && s.length()>1)){
138 accession=s;
139 }
140 return advanceBlock(lines);
141 }
142
143 private int parseDBLink(ArrayList<byte[]> lines){
144 return advanceBlock(lines);
145 }
146
147 private int parseKeywords(ArrayList<byte[]> lines){
148 return advanceBlock(lines);
149 }
150
151 private int parseSource(ArrayList<byte[]> lines){
152 byte[] line=lines.get(num);
153 if(species==null){
154 species=trimBlockName(line);
155 }
156 return advanceBlock(lines);
157 }
158
159 private int parseReference(ArrayList<byte[]> lines){
160 return advanceBlock(lines);
161 }
162
163 private int parseComment(ArrayList<byte[]> lines){
164 return advanceBlock(lines);
165 }
166
167 private int parseFeatures(ArrayList<byte[]> lines){
168 for(byte[] line=nextLine(lines); line!=null && line[0]==' '; line=getLine(lines)){
169 // System.err.println(num+": "+new String(line));
170 String type=toFeatureType(line);
171 int idx=Tools.find(type, featureTypes);
172 // System.err.println("idx="+idx+" for '"+type+"'");
173 if(idx>=0){
174 // System.err.println("parseFeature");
175 parseFeature(lines, type);
176 // System.err.println(features.get(features.size()-1));
177 }else{
178 // System.err.println("advanceFeature");
179 advanceFeature(lines);
180 }
181 }
182 return num;
183 }
184
185 /** Move pointer to next block start */
186 private int parseFeature(ArrayList<byte[]> lines, String type){
187 ArrayList<byte[]> flist=new ArrayList<byte[]>();
188 flist.add(lines.get(num));
189 for(num++; num<lines.size(); num++){
190 byte[] line=lines.get(num);
191 if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){
192 // assert(false) : Character.toString(line[0])+", "+Character.toString(line[5])+", "+Character.toString(line[6])+"\n"+new String(line);
193 break;
194 }
195 flist.add(line);
196 }
197 GbffFeature f=new GbffFeature(flist, type, accession);
198 if(!f.error){
199 features.add(f);
200 }else{
201 // System.err.println("Failed to parse feature "+f);
202 }
203 return num;
204 }
205
206 private int parseContig(ArrayList<byte[]> lines){
207 return advanceBlock(lines);
208 }
209
210 private int parseOrigin(ArrayList<byte[]> lines){
211 return advanceBlock(lines);
212 }
213
214 private int parsePrimary(ArrayList<byte[]> lines){
215 return advanceBlock(lines);
216 }
217
218 public void toGff(ByteStreamWriter bsw) {
219 final byte[] accessionB=accession.getBytes();
220 bsw.print(seqRegB);
221 bsw.print(accessionB);
222 if(start>0 && stop>0){
223 bsw.print(' ').print(start).print(' ').print(stop);
224 }
225 bsw.println();
226 for(GbffFeature f : features){
227 if(f.type==GbffFeature.CDS || f.type==GbffFeature.tRNA || f.type==GbffFeature.rRNA){
228 if(!f.pseudo && !f.error){
229 f.toGff(bsw);
230 }
231 }
232 }
233 }
234
235
236 /** Line number */
237 int num=0;
238
239 boolean printGene=false;
240 boolean printRepeat=false;
241
242 public static String[] featureTypes=GbffFeature.typeStrings;
243 private static final byte[] seqRegB="##sequence-region ".getBytes();
244
245 String accession;
246 String organism;
247 String species;
248 int start;
249 int stop;
250 ArrayList<GbffFeature> features=new ArrayList<GbffFeature>();
251 }