jpayne@68
|
1 package tax;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5
|
jpayne@68
|
6 import fileIO.ByteFile;
|
jpayne@68
|
7 import fileIO.ByteFile1;
|
jpayne@68
|
8 import fileIO.ByteFile2;
|
jpayne@68
|
9 import fileIO.ByteStreamWriter;
|
jpayne@68
|
10 import fileIO.FileFormat;
|
jpayne@68
|
11 import fileIO.ReadWrite;
|
jpayne@68
|
12 import shared.Parse;
|
jpayne@68
|
13 import shared.Parser;
|
jpayne@68
|
14 import shared.PreParser;
|
jpayne@68
|
15 import shared.Shared;
|
jpayne@68
|
16 import shared.Timer;
|
jpayne@68
|
17 import shared.Tools;
|
jpayne@68
|
18 import stream.ConcurrentGenericReadInputStream;
|
jpayne@68
|
19 import stream.FastaReadInputStream;
|
jpayne@68
|
20 import structures.ByteBuilder;
|
jpayne@68
|
21 import structures.IntHashSet;
|
jpayne@68
|
22
|
jpayne@68
|
23 /**
|
jpayne@68
|
24 * @author Brian Bushnell
|
jpayne@68
|
25 * @date May 9, 2016
|
jpayne@68
|
26 *
|
jpayne@68
|
27 */
|
jpayne@68
|
28 public class RenameIMG {
|
jpayne@68
|
29
|
jpayne@68
|
30 public static void main(String[] args){
|
jpayne@68
|
31 Timer t=new Timer();
|
jpayne@68
|
32 RenameIMG x=new RenameIMG(args);
|
jpayne@68
|
33 x.process(t);
|
jpayne@68
|
34
|
jpayne@68
|
35 //Close the print stream if it was redirected
|
jpayne@68
|
36 Shared.closeStream(x.outstream);
|
jpayne@68
|
37 }
|
jpayne@68
|
38
|
jpayne@68
|
39 public RenameIMG(String[] args){
|
jpayne@68
|
40
|
jpayne@68
|
41 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
42 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
43 args=pp.args;
|
jpayne@68
|
44 outstream=pp.outstream;
|
jpayne@68
|
45 }
|
jpayne@68
|
46
|
jpayne@68
|
47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
48 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
49
|
jpayne@68
|
50 Parser parser=new Parser();
|
jpayne@68
|
51 for(int i=0; i<args.length; i++){
|
jpayne@68
|
52 String arg=args[i];
|
jpayne@68
|
53 String[] split=arg.split("=");
|
jpayne@68
|
54 String a=split[0].toLowerCase();
|
jpayne@68
|
55 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
56
|
jpayne@68
|
57 if(a.equals("lines")){
|
jpayne@68
|
58 maxLines=Long.parseLong(b);
|
jpayne@68
|
59 if(maxLines<0){maxLines=Long.MAX_VALUE;}
|
jpayne@68
|
60 }else if(a.equals("verbose")){
|
jpayne@68
|
61 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
62 ByteFile1.verbose=verbose;
|
jpayne@68
|
63 ByteFile2.verbose=verbose;
|
jpayne@68
|
64 stream.FastaReadInputStream.verbose=verbose;
|
jpayne@68
|
65 ConcurrentGenericReadInputStream.verbose=verbose;
|
jpayne@68
|
66 stream.FastqReadInputStream.verbose=verbose;
|
jpayne@68
|
67 ReadWrite.verbose=verbose;
|
jpayne@68
|
68 }else if(a.equals("img")){
|
jpayne@68
|
69 imgFile=b;
|
jpayne@68
|
70 }else if(parser.parse(arg, a, b)){
|
jpayne@68
|
71 //do nothing
|
jpayne@68
|
72 }else{
|
jpayne@68
|
73 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
74 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
75 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
76 }
|
jpayne@68
|
77 }
|
jpayne@68
|
78
|
jpayne@68
|
79 {//Process parser fields
|
jpayne@68
|
80 overwrite=parser.overwrite;
|
jpayne@68
|
81 append=parser.append;
|
jpayne@68
|
82
|
jpayne@68
|
83 in1=parser.in1;
|
jpayne@68
|
84
|
jpayne@68
|
85 out1=parser.out1;
|
jpayne@68
|
86 }
|
jpayne@68
|
87
|
jpayne@68
|
88 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
89
|
jpayne@68
|
90 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
91 if("auto".equalsIgnoreCase(imgFile)){imgFile=TaxTree.defaultImgFile();}//TODO: why are these set to the same default?
|
jpayne@68
|
92 if("auto".equalsIgnoreCase(in1)){in1=TaxTree.defaultImgFile();}
|
jpayne@68
|
93
|
jpayne@68
|
94 if(!ByteFile.FORCE_MODE_BF2){
|
jpayne@68
|
95 ByteFile.FORCE_MODE_BF2=false;
|
jpayne@68
|
96 ByteFile.FORCE_MODE_BF1=true;
|
jpayne@68
|
97 }
|
jpayne@68
|
98
|
jpayne@68
|
99 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
|
jpayne@68
|
100
|
jpayne@68
|
101 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
|
jpayne@68
|
102 outstream.println((out1==null)+", "+out1);
|
jpayne@68
|
103 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
|
jpayne@68
|
104 }
|
jpayne@68
|
105
|
jpayne@68
|
106 ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false);
|
jpayne@68
|
107 }
|
jpayne@68
|
108
|
jpayne@68
|
109 void copyFiles(ImgRecord[] array){
|
jpayne@68
|
110 if(useSet){set=new IntHashSet(10000);}
|
jpayne@68
|
111 ByteStreamWriter bsw=new ByteStreamWriter(ffout1);
|
jpayne@68
|
112 bsw.start();
|
jpayne@68
|
113 for(ImgRecord ir : array){
|
jpayne@68
|
114 if(ir.taxID>0){set.add(ir.taxID);}
|
jpayne@68
|
115 else{unknownTaxid++;}
|
jpayne@68
|
116 FileFormat ffin=FileFormat.testInput(ir.path(), FileFormat.FA, null, true, true);
|
jpayne@68
|
117 process_inner(ffin, bsw, ir.imgID);
|
jpayne@68
|
118 }
|
jpayne@68
|
119 knownTaxid=set.size();
|
jpayne@68
|
120 set=null;
|
jpayne@68
|
121 if(bsw!=null){errorState|=bsw.poisonAndWait();}
|
jpayne@68
|
122 }
|
jpayne@68
|
123
|
jpayne@68
|
124 void process(Timer t){
|
jpayne@68
|
125 ImgRecord[] array=ImgRecord.toArray(in1, TaxTree.IMG_HQ);
|
jpayne@68
|
126 if(imgFile==null){
|
jpayne@68
|
127 TaxTree.loadIMG(array);
|
jpayne@68
|
128 }else{
|
jpayne@68
|
129 ImgRecord[] array2=ImgRecord.toArray(imgFile, TaxTree.IMG_HQ);
|
jpayne@68
|
130 TaxTree.loadIMG(array2);
|
jpayne@68
|
131 }
|
jpayne@68
|
132
|
jpayne@68
|
133 copyFiles(array);
|
jpayne@68
|
134
|
jpayne@68
|
135 t.stop();
|
jpayne@68
|
136
|
jpayne@68
|
137 final int spaces=8;
|
jpayne@68
|
138 String fpstring=""+filesProcessed;
|
jpayne@68
|
139 String cpstring=Tools.padKM(sequencesProcessed, spaces);
|
jpayne@68
|
140 String bapstring=Tools.padKM(basesProcessed, spaces);
|
jpayne@68
|
141 String tpstring=""+knownTaxid;
|
jpayne@68
|
142
|
jpayne@68
|
143 outstream.println("Time: \t"+t);
|
jpayne@68
|
144 outstream.println("Files Processed: "+fpstring);
|
jpayne@68
|
145 outstream.println("Contigs Processed: "+cpstring);
|
jpayne@68
|
146 outstream.println("Bases Processed: "+bapstring);
|
jpayne@68
|
147 if(useSet){outstream.println("TaxIDs Processed: "+tpstring+" \t"+"("+unknownTaxid+" unknown)");}
|
jpayne@68
|
148 outstream.println(Tools.linesBytesProcessed(t.elapsed, linesProcessed, bytesProcessed, spaces));
|
jpayne@68
|
149
|
jpayne@68
|
150 outstream.println();
|
jpayne@68
|
151 outstream.println("Valid Files: \t"+filesValid);
|
jpayne@68
|
152 outstream.println("Invalid Files: \t"+(filesProcessed-filesValid));
|
jpayne@68
|
153 outstream.println("Valid Lines: \t"+linesValid);
|
jpayne@68
|
154 outstream.println("Invalid Lines: \t"+(linesProcessed-linesValid));
|
jpayne@68
|
155
|
jpayne@68
|
156 if(errorState){
|
jpayne@68
|
157 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
158 }
|
jpayne@68
|
159 }
|
jpayne@68
|
160
|
jpayne@68
|
161 void process_inner(final FileFormat ffin, final ByteStreamWriter bsw, final long img){
|
jpayne@68
|
162
|
jpayne@68
|
163 filesProcessed++;
|
jpayne@68
|
164 {
|
jpayne@68
|
165 File f=new File(ffin.name());
|
jpayne@68
|
166 if(!f.exists() || !f.canRead()){
|
jpayne@68
|
167 System.err.println("Can't find "+f);
|
jpayne@68
|
168 errorState=true;
|
jpayne@68
|
169 return;
|
jpayne@68
|
170 }
|
jpayne@68
|
171 }
|
jpayne@68
|
172 final int tid=TaxTree.imgToTaxid(img);
|
jpayne@68
|
173 ByteFile bf=ByteFile.makeByteFile(ffin);
|
jpayne@68
|
174
|
jpayne@68
|
175 byte[] line=bf.nextLine();
|
jpayne@68
|
176 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
177
|
jpayne@68
|
178 while(line!=null){
|
jpayne@68
|
179 if(line.length>0){
|
jpayne@68
|
180 if(maxLines>0 && linesProcessed>=maxLines){break;}
|
jpayne@68
|
181 linesProcessed++;
|
jpayne@68
|
182 bytesProcessed+=line.length;
|
jpayne@68
|
183
|
jpayne@68
|
184 linesValid++;
|
jpayne@68
|
185 if(line[0]=='>'){
|
jpayne@68
|
186 sequencesProcessed++;
|
jpayne@68
|
187 bb.append('>');
|
jpayne@68
|
188 if(tid>=0){
|
jpayne@68
|
189 bb.append("tid|");
|
jpayne@68
|
190 bb.append(tid);
|
jpayne@68
|
191 bb.append('|');
|
jpayne@68
|
192 }
|
jpayne@68
|
193 bb.append("img|");
|
jpayne@68
|
194 bb.append(img);
|
jpayne@68
|
195 bb.append(' ');
|
jpayne@68
|
196 for(int i=1; i<line.length; i++){
|
jpayne@68
|
197 bb.append(line[i]);
|
jpayne@68
|
198 }
|
jpayne@68
|
199 }else{
|
jpayne@68
|
200 basesProcessed+=line.length;
|
jpayne@68
|
201 bb.append(line);
|
jpayne@68
|
202 }
|
jpayne@68
|
203 bb.nl();
|
jpayne@68
|
204 bsw.print(bb.toBytes());
|
jpayne@68
|
205 bb.clear();
|
jpayne@68
|
206 }
|
jpayne@68
|
207 line=bf.nextLine();
|
jpayne@68
|
208 }
|
jpayne@68
|
209
|
jpayne@68
|
210 filesValid++;
|
jpayne@68
|
211 errorState|=bf.close();
|
jpayne@68
|
212 }
|
jpayne@68
|
213
|
jpayne@68
|
214 /*--------------------------------------------------------------*/
|
jpayne@68
|
215
|
jpayne@68
|
216
|
jpayne@68
|
217 /*--------------------------------------------------------------*/
|
jpayne@68
|
218
|
jpayne@68
|
219 private String in1=null;
|
jpayne@68
|
220 private String out1=null;
|
jpayne@68
|
221 private String imgFile=null;
|
jpayne@68
|
222
|
jpayne@68
|
223 /*--------------------------------------------------------------*/
|
jpayne@68
|
224
|
jpayne@68
|
225 private IntHashSet set=null;
|
jpayne@68
|
226 private int knownTaxid=0;
|
jpayne@68
|
227 private int unknownTaxid=0;
|
jpayne@68
|
228 private boolean useSet=true;
|
jpayne@68
|
229
|
jpayne@68
|
230 private long linesProcessed=0;
|
jpayne@68
|
231 private long linesValid=0;
|
jpayne@68
|
232 private long bytesProcessed=0;
|
jpayne@68
|
233
|
jpayne@68
|
234 private long basesProcessed=0;
|
jpayne@68
|
235 private long sequencesProcessed=0;
|
jpayne@68
|
236 private long filesProcessed=0;
|
jpayne@68
|
237 private long filesValid=0;
|
jpayne@68
|
238
|
jpayne@68
|
239 private long maxLines=Long.MAX_VALUE;
|
jpayne@68
|
240
|
jpayne@68
|
241 /*--------------------------------------------------------------*/
|
jpayne@68
|
242
|
jpayne@68
|
243 private final FileFormat ffout1;
|
jpayne@68
|
244
|
jpayne@68
|
245
|
jpayne@68
|
246 /*--------------------------------------------------------------*/
|
jpayne@68
|
247
|
jpayne@68
|
248 private PrintStream outstream=System.err;
|
jpayne@68
|
249 public static boolean verbose=false;
|
jpayne@68
|
250 public boolean errorState=false;
|
jpayne@68
|
251 private boolean overwrite=false;
|
jpayne@68
|
252 private boolean append=false;
|
jpayne@68
|
253
|
jpayne@68
|
254 }
|