Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package tax; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 | |
6 import dna.Data; | |
7 import fileIO.ByteFile; | |
8 import fileIO.ByteFile1; | |
9 import fileIO.ByteFile2; | |
10 import fileIO.ByteStreamWriter; | |
11 import fileIO.FileFormat; | |
12 import fileIO.ReadWrite; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.ReadStats; | |
17 import shared.Shared; | |
18 import shared.Timer; | |
19 import shared.Tools; | |
20 import stream.FastaReadInputStream; | |
21 import structures.ByteBuilder; | |
22 | |
23 /** | |
24 * @author Brian Bushnell | |
25 * @date April 4, 2017 | |
26 * | |
27 */ | |
28 public class ShrinkAccession { | |
29 | |
30 public static void main(String[] args){ | |
31 Timer t=new Timer(); | |
32 ShrinkAccession x=new ShrinkAccession(args); | |
33 x.process(t); | |
34 | |
35 //Close the print stream if it was redirected | |
36 Shared.closeStream(x.outstream); | |
37 } | |
38 | |
39 public ShrinkAccession(String[] args){ | |
40 | |
41 {//Preparse block for help, config files, and outstream | |
42 PreParser pp=new PreParser(args, getClass(), false); | |
43 args=pp.args; | |
44 outstream=pp.outstream; | |
45 } | |
46 | |
47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
48 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
49 if(Data.PIGZ()){ | |
50 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6); | |
51 } | |
52 | |
53 Parser parser=new Parser(); | |
54 for(int i=0; i<args.length; i++){ | |
55 String arg=args[i]; | |
56 String[] split=arg.split("="); | |
57 String a=split[0].toLowerCase(); | |
58 String b=split.length>1 ? split[1] : null; | |
59 | |
60 if(parser.parse(arg, a, b)){ | |
61 //do nothing | |
62 }else if(a.equals("verbose")){ | |
63 verbose=Parse.parseBoolean(b); | |
64 ByteFile1.verbose=verbose; | |
65 ByteFile2.verbose=verbose; | |
66 ReadWrite.verbose=verbose; | |
67 }else if(a.equals("gi")){ | |
68 KEEP_GI_NUMBERS=Parse.parseBoolean(b); | |
69 }else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){ | |
70 giOut=b; | |
71 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ | |
72 parser.in1=arg; | |
73 }else if(parser.out1==null && i==1 && !arg.contains("=")){ | |
74 parser.out1=arg; | |
75 }else{ | |
76 outstream.println("Unknown parameter "+args[i]); | |
77 assert(false) : "Unknown parameter "+args[i]; | |
78 // throw new RuntimeException("Unknown parameter "+args[i]); | |
79 } | |
80 } | |
81 | |
82 {//Process parser fields | |
83 Parser.processQuality(); | |
84 | |
85 overwrite=ReadStats.overwrite=parser.overwrite; | |
86 append=ReadStats.append=parser.append; | |
87 | |
88 in=parser.in1; | |
89 | |
90 out=parser.out1; | |
91 } | |
92 | |
93 assert(FastaReadInputStream.settingsOK()); | |
94 | |
95 if(in==null){throw new RuntimeException("Error - at least one input file is required.");} | |
96 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ | |
97 ByteFile.FORCE_MODE_BF2=false; | |
98 ByteFile.FORCE_MODE_BF1=true; | |
99 } | |
100 | |
101 if(out!=null && out.equalsIgnoreCase("null")){out=null;} | |
102 | |
103 if(!Tools.testOutputFiles(overwrite, append, false, out)){ | |
104 outstream.println((out==null)+", "+out); | |
105 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); | |
106 } | |
107 | |
108 ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false); | |
109 ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false); | |
110 ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true); | |
111 | |
112 } | |
113 | |
114 void process(Timer t){ | |
115 | |
116 ByteFile bf=ByteFile.makeByteFile(ffin); | |
117 ByteStreamWriter bsw=new ByteStreamWriter(ffout); | |
118 bsw.start(); | |
119 | |
120 long linesProcessed=0; | |
121 long charsProcessed=0; | |
122 long badLines=0; | |
123 | |
124 byte[] line=bf.nextLine(); | |
125 ByteBuilder bb=new ByteBuilder(10000); | |
126 int columns=4; | |
127 while(line!=null){ | |
128 if(Tools.startsWith(line, "accession\t")){ | |
129 bb.append(line); | |
130 bb.nl(); | |
131 }else if(Tools.startsWith(line, "accession.version\ttaxid")){ | |
132 columns=2; | |
133 bb.append("accession\t\ttaxid\t");//dummy header | |
134 bb.nl(); | |
135 }else{ | |
136 charsProcessed+=line.length+1; | |
137 linesProcessed++; | |
138 | |
139 final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') : | |
140 AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t')); | |
141 if(tid<1){ | |
142 badLines++; | |
143 }else{ | |
144 int i=0; | |
145 | |
146 while(i<line.length){//Accession | |
147 byte b=line[i]; | |
148 bb.append(b); | |
149 i++; | |
150 if(b=='\t'){break;} | |
151 } | |
152 | |
153 if(columns==4){ | |
154 while(i<line.length){//Accession with decimal | |
155 byte b=line[i]; | |
156 // bb.append(b); | |
157 i++; | |
158 if(b=='\t'){break;} | |
159 } | |
160 } | |
161 bb.append('\t'); | |
162 | |
163 while(i<line.length){//Taxid | |
164 byte b=line[i]; | |
165 bb.append(b); | |
166 i++; | |
167 if(b=='\t'){break;} | |
168 } | |
169 | |
170 if(KEEP_GI_NUMBERS){ | |
171 if(line.length>i && Tools.isDigit(line[i])){//GI number or "na" | |
172 while(i<line.length){ | |
173 byte b=line[i]; | |
174 bb.append(b); | |
175 i++; | |
176 // if(b=='\t'){break;} | |
177 } | |
178 } | |
179 } | |
180 bb.nl(); | |
181 } | |
182 | |
183 // String[] split=new String(line).split("\t"); | |
184 // bb.append(split[0]); | |
185 // bb.tab(); | |
186 // bb.tab(); | |
187 // bb.append(split[2]); | |
188 // bb.tab(); | |
189 // bb.nl(); | |
190 } | |
191 if(bb.length()>8000){ | |
192 bsw.print(bb); | |
193 bb.clear(); | |
194 } | |
195 line=bf.nextLine(); | |
196 } | |
197 if(bb.length()>0){ | |
198 bsw.print(bb); | |
199 bb.clear(); | |
200 } | |
201 | |
202 errorState|=bf.close(); | |
203 if(bsw!=null){errorState|=bsw.poisonAndWait();} | |
204 | |
205 t.stop(); | |
206 outstream.println("Discarded "+badLines+" lines.\n"); | |
207 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8)); | |
208 | |
209 if(errorState){ | |
210 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
211 } | |
212 } | |
213 | |
214 /*--------------------------------------------------------------*/ | |
215 | |
216 | |
217 /*--------------------------------------------------------------*/ | |
218 | |
219 private String in=null; | |
220 private String out=null; | |
221 private String giOut=null; | |
222 | |
223 /*--------------------------------------------------------------*/ | |
224 | |
225 private final FileFormat ffin; | |
226 private final FileFormat ffout; | |
227 private final FileFormat ffoutGi; | |
228 | |
229 /*--------------------------------------------------------------*/ | |
230 | |
231 private PrintStream outstream=System.err; | |
232 public static boolean verbose=false; | |
233 public static boolean KEEP_GI_NUMBERS=true; | |
234 public boolean errorState=false; | |
235 private boolean overwrite=false; | |
236 private boolean append=false; | |
237 | |
238 } |