comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package tax;
2
3 import java.io.File;
4 import java.io.PrintStream;
5
6 import dna.Data;
7 import fileIO.ByteFile;
8 import fileIO.ByteFile1;
9 import fileIO.ByteFile2;
10 import fileIO.ByteStreamWriter;
11 import fileIO.FileFormat;
12 import fileIO.ReadWrite;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.ReadStats;
17 import shared.Shared;
18 import shared.Timer;
19 import shared.Tools;
20 import stream.FastaReadInputStream;
21 import structures.ByteBuilder;
22
23 /**
24 * @author Brian Bushnell
25 * @date April 4, 2017
26 *
27 */
28 public class ShrinkAccession {
29
30 public static void main(String[] args){
31 Timer t=new Timer();
32 ShrinkAccession x=new ShrinkAccession(args);
33 x.process(t);
34
35 //Close the print stream if it was redirected
36 Shared.closeStream(x.outstream);
37 }
38
39 public ShrinkAccession(String[] args){
40
41 {//Preparse block for help, config files, and outstream
42 PreParser pp=new PreParser(args, getClass(), false);
43 args=pp.args;
44 outstream=pp.outstream;
45 }
46
47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
48 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
49 if(Data.PIGZ()){
50 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
51 }
52
53 Parser parser=new Parser();
54 for(int i=0; i<args.length; i++){
55 String arg=args[i];
56 String[] split=arg.split("=");
57 String a=split[0].toLowerCase();
58 String b=split.length>1 ? split[1] : null;
59
60 if(parser.parse(arg, a, b)){
61 //do nothing
62 }else if(a.equals("verbose")){
63 verbose=Parse.parseBoolean(b);
64 ByteFile1.verbose=verbose;
65 ByteFile2.verbose=verbose;
66 ReadWrite.verbose=verbose;
67 }else if(a.equals("gi")){
68 KEEP_GI_NUMBERS=Parse.parseBoolean(b);
69 }else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){
70 giOut=b;
71 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
72 parser.in1=arg;
73 }else if(parser.out1==null && i==1 && !arg.contains("=")){
74 parser.out1=arg;
75 }else{
76 outstream.println("Unknown parameter "+args[i]);
77 assert(false) : "Unknown parameter "+args[i];
78 // throw new RuntimeException("Unknown parameter "+args[i]);
79 }
80 }
81
82 {//Process parser fields
83 Parser.processQuality();
84
85 overwrite=ReadStats.overwrite=parser.overwrite;
86 append=ReadStats.append=parser.append;
87
88 in=parser.in1;
89
90 out=parser.out1;
91 }
92
93 assert(FastaReadInputStream.settingsOK());
94
95 if(in==null){throw new RuntimeException("Error - at least one input file is required.");}
96 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
97 ByteFile.FORCE_MODE_BF2=false;
98 ByteFile.FORCE_MODE_BF1=true;
99 }
100
101 if(out!=null && out.equalsIgnoreCase("null")){out=null;}
102
103 if(!Tools.testOutputFiles(overwrite, append, false, out)){
104 outstream.println((out==null)+", "+out);
105 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
106 }
107
108 ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false);
109 ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false);
110 ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true);
111
112 }
113
114 void process(Timer t){
115
116 ByteFile bf=ByteFile.makeByteFile(ffin);
117 ByteStreamWriter bsw=new ByteStreamWriter(ffout);
118 bsw.start();
119
120 long linesProcessed=0;
121 long charsProcessed=0;
122 long badLines=0;
123
124 byte[] line=bf.nextLine();
125 ByteBuilder bb=new ByteBuilder(10000);
126 int columns=4;
127 while(line!=null){
128 if(Tools.startsWith(line, "accession\t")){
129 bb.append(line);
130 bb.nl();
131 }else if(Tools.startsWith(line, "accession.version\ttaxid")){
132 columns=2;
133 bb.append("accession\t\ttaxid\t");//dummy header
134 bb.nl();
135 }else{
136 charsProcessed+=line.length+1;
137 linesProcessed++;
138
139 final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') :
140 AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t'));
141 if(tid<1){
142 badLines++;
143 }else{
144 int i=0;
145
146 while(i<line.length){//Accession
147 byte b=line[i];
148 bb.append(b);
149 i++;
150 if(b=='\t'){break;}
151 }
152
153 if(columns==4){
154 while(i<line.length){//Accession with decimal
155 byte b=line[i];
156 // bb.append(b);
157 i++;
158 if(b=='\t'){break;}
159 }
160 }
161 bb.append('\t');
162
163 while(i<line.length){//Taxid
164 byte b=line[i];
165 bb.append(b);
166 i++;
167 if(b=='\t'){break;}
168 }
169
170 if(KEEP_GI_NUMBERS){
171 if(line.length>i && Tools.isDigit(line[i])){//GI number or "na"
172 while(i<line.length){
173 byte b=line[i];
174 bb.append(b);
175 i++;
176 // if(b=='\t'){break;}
177 }
178 }
179 }
180 bb.nl();
181 }
182
183 // String[] split=new String(line).split("\t");
184 // bb.append(split[0]);
185 // bb.tab();
186 // bb.tab();
187 // bb.append(split[2]);
188 // bb.tab();
189 // bb.nl();
190 }
191 if(bb.length()>8000){
192 bsw.print(bb);
193 bb.clear();
194 }
195 line=bf.nextLine();
196 }
197 if(bb.length()>0){
198 bsw.print(bb);
199 bb.clear();
200 }
201
202 errorState|=bf.close();
203 if(bsw!=null){errorState|=bsw.poisonAndWait();}
204
205 t.stop();
206 outstream.println("Discarded "+badLines+" lines.\n");
207 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8));
208
209 if(errorState){
210 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
211 }
212 }
213
214 /*--------------------------------------------------------------*/
215
216
217 /*--------------------------------------------------------------*/
218
219 private String in=null;
220 private String out=null;
221 private String giOut=null;
222
223 /*--------------------------------------------------------------*/
224
225 private final FileFormat ffin;
226 private final FileFormat ffout;
227 private final FileFormat ffoutGi;
228
229 /*--------------------------------------------------------------*/
230
231 private PrintStream outstream=System.err;
232 public static boolean verbose=false;
233 public static boolean KEEP_GI_NUMBERS=true;
234 public boolean errorState=false;
235 private boolean overwrite=false;
236 private boolean append=false;
237
238 }