comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameIMG.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package tax;
2
3 import java.io.File;
4 import java.io.PrintStream;
5
6 import fileIO.ByteFile;
7 import fileIO.ByteFile1;
8 import fileIO.ByteFile2;
9 import fileIO.ByteStreamWriter;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import shared.Parse;
13 import shared.Parser;
14 import shared.PreParser;
15 import shared.Shared;
16 import shared.Timer;
17 import shared.Tools;
18 import stream.ConcurrentGenericReadInputStream;
19 import stream.FastaReadInputStream;
20 import structures.ByteBuilder;
21 import structures.IntHashSet;
22
23 /**
24 * @author Brian Bushnell
25 * @date May 9, 2016
26 *
27 */
28 public class RenameIMG {
29
30 public static void main(String[] args){
31 Timer t=new Timer();
32 RenameIMG x=new RenameIMG(args);
33 x.process(t);
34
35 //Close the print stream if it was redirected
36 Shared.closeStream(x.outstream);
37 }
38
39 public RenameIMG(String[] args){
40
41 {//Preparse block for help, config files, and outstream
42 PreParser pp=new PreParser(args, getClass(), false);
43 args=pp.args;
44 outstream=pp.outstream;
45 }
46
47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
48 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
49
50 Parser parser=new Parser();
51 for(int i=0; i<args.length; i++){
52 String arg=args[i];
53 String[] split=arg.split("=");
54 String a=split[0].toLowerCase();
55 String b=split.length>1 ? split[1] : null;
56
57 if(a.equals("lines")){
58 maxLines=Long.parseLong(b);
59 if(maxLines<0){maxLines=Long.MAX_VALUE;}
60 }else if(a.equals("verbose")){
61 verbose=Parse.parseBoolean(b);
62 ByteFile1.verbose=verbose;
63 ByteFile2.verbose=verbose;
64 stream.FastaReadInputStream.verbose=verbose;
65 ConcurrentGenericReadInputStream.verbose=verbose;
66 stream.FastqReadInputStream.verbose=verbose;
67 ReadWrite.verbose=verbose;
68 }else if(a.equals("img")){
69 imgFile=b;
70 }else if(parser.parse(arg, a, b)){
71 //do nothing
72 }else{
73 outstream.println("Unknown parameter "+args[i]);
74 assert(false) : "Unknown parameter "+args[i];
75 // throw new RuntimeException("Unknown parameter "+args[i]);
76 }
77 }
78
79 {//Process parser fields
80 overwrite=parser.overwrite;
81 append=parser.append;
82
83 in1=parser.in1;
84
85 out1=parser.out1;
86 }
87
88 assert(FastaReadInputStream.settingsOK());
89
90 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
91 if("auto".equalsIgnoreCase(imgFile)){imgFile=TaxTree.defaultImgFile();}//TODO: why are these set to the same default?
92 if("auto".equalsIgnoreCase(in1)){in1=TaxTree.defaultImgFile();}
93
94 if(!ByteFile.FORCE_MODE_BF2){
95 ByteFile.FORCE_MODE_BF2=false;
96 ByteFile.FORCE_MODE_BF1=true;
97 }
98
99 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
100
101 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
102 outstream.println((out1==null)+", "+out1);
103 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
104 }
105
106 ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false);
107 }
108
109 void copyFiles(ImgRecord[] array){
110 if(useSet){set=new IntHashSet(10000);}
111 ByteStreamWriter bsw=new ByteStreamWriter(ffout1);
112 bsw.start();
113 for(ImgRecord ir : array){
114 if(ir.taxID>0){set.add(ir.taxID);}
115 else{unknownTaxid++;}
116 FileFormat ffin=FileFormat.testInput(ir.path(), FileFormat.FA, null, true, true);
117 process_inner(ffin, bsw, ir.imgID);
118 }
119 knownTaxid=set.size();
120 set=null;
121 if(bsw!=null){errorState|=bsw.poisonAndWait();}
122 }
123
124 void process(Timer t){
125 ImgRecord[] array=ImgRecord.toArray(in1, TaxTree.IMG_HQ);
126 if(imgFile==null){
127 TaxTree.loadIMG(array);
128 }else{
129 ImgRecord[] array2=ImgRecord.toArray(imgFile, TaxTree.IMG_HQ);
130 TaxTree.loadIMG(array2);
131 }
132
133 copyFiles(array);
134
135 t.stop();
136
137 final int spaces=8;
138 String fpstring=""+filesProcessed;
139 String cpstring=Tools.padKM(sequencesProcessed, spaces);
140 String bapstring=Tools.padKM(basesProcessed, spaces);
141 String tpstring=""+knownTaxid;
142
143 outstream.println("Time: \t"+t);
144 outstream.println("Files Processed: "+fpstring);
145 outstream.println("Contigs Processed: "+cpstring);
146 outstream.println("Bases Processed: "+bapstring);
147 if(useSet){outstream.println("TaxIDs Processed: "+tpstring+" \t"+"("+unknownTaxid+" unknown)");}
148 outstream.println(Tools.linesBytesProcessed(t.elapsed, linesProcessed, bytesProcessed, spaces));
149
150 outstream.println();
151 outstream.println("Valid Files: \t"+filesValid);
152 outstream.println("Invalid Files: \t"+(filesProcessed-filesValid));
153 outstream.println("Valid Lines: \t"+linesValid);
154 outstream.println("Invalid Lines: \t"+(linesProcessed-linesValid));
155
156 if(errorState){
157 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
158 }
159 }
160
161 void process_inner(final FileFormat ffin, final ByteStreamWriter bsw, final long img){
162
163 filesProcessed++;
164 {
165 File f=new File(ffin.name());
166 if(!f.exists() || !f.canRead()){
167 System.err.println("Can't find "+f);
168 errorState=true;
169 return;
170 }
171 }
172 final int tid=TaxTree.imgToTaxid(img);
173 ByteFile bf=ByteFile.makeByteFile(ffin);
174
175 byte[] line=bf.nextLine();
176 ByteBuilder bb=new ByteBuilder();
177
178 while(line!=null){
179 if(line.length>0){
180 if(maxLines>0 && linesProcessed>=maxLines){break;}
181 linesProcessed++;
182 bytesProcessed+=line.length;
183
184 linesValid++;
185 if(line[0]=='>'){
186 sequencesProcessed++;
187 bb.append('>');
188 if(tid>=0){
189 bb.append("tid|");
190 bb.append(tid);
191 bb.append('|');
192 }
193 bb.append("img|");
194 bb.append(img);
195 bb.append(' ');
196 for(int i=1; i<line.length; i++){
197 bb.append(line[i]);
198 }
199 }else{
200 basesProcessed+=line.length;
201 bb.append(line);
202 }
203 bb.nl();
204 bsw.print(bb.toBytes());
205 bb.clear();
206 }
207 line=bf.nextLine();
208 }
209
210 filesValid++;
211 errorState|=bf.close();
212 }
213
214 /*--------------------------------------------------------------*/
215
216
217 /*--------------------------------------------------------------*/
218
219 private String in1=null;
220 private String out1=null;
221 private String imgFile=null;
222
223 /*--------------------------------------------------------------*/
224
225 private IntHashSet set=null;
226 private int knownTaxid=0;
227 private int unknownTaxid=0;
228 private boolean useSet=true;
229
230 private long linesProcessed=0;
231 private long linesValid=0;
232 private long bytesProcessed=0;
233
234 private long basesProcessed=0;
235 private long sequencesProcessed=0;
236 private long filesProcessed=0;
237 private long filesValid=0;
238
239 private long maxLines=Long.MAX_VALUE;
240
241 /*--------------------------------------------------------------*/
242
243 private final FileFormat ffout1;
244
245
246 /*--------------------------------------------------------------*/
247
248 private PrintStream outstream=System.err;
249 public static boolean verbose=false;
250 public boolean errorState=false;
251 private boolean overwrite=false;
252 private boolean append=false;
253
254 }