annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameIMG.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package tax;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5
jpayne@68 6 import fileIO.ByteFile;
jpayne@68 7 import fileIO.ByteFile1;
jpayne@68 8 import fileIO.ByteFile2;
jpayne@68 9 import fileIO.ByteStreamWriter;
jpayne@68 10 import fileIO.FileFormat;
jpayne@68 11 import fileIO.ReadWrite;
jpayne@68 12 import shared.Parse;
jpayne@68 13 import shared.Parser;
jpayne@68 14 import shared.PreParser;
jpayne@68 15 import shared.Shared;
jpayne@68 16 import shared.Timer;
jpayne@68 17 import shared.Tools;
jpayne@68 18 import stream.ConcurrentGenericReadInputStream;
jpayne@68 19 import stream.FastaReadInputStream;
jpayne@68 20 import structures.ByteBuilder;
jpayne@68 21 import structures.IntHashSet;
jpayne@68 22
jpayne@68 23 /**
jpayne@68 24 * @author Brian Bushnell
jpayne@68 25 * @date May 9, 2016
jpayne@68 26 *
jpayne@68 27 */
jpayne@68 28 public class RenameIMG {
jpayne@68 29
jpayne@68 30 public static void main(String[] args){
jpayne@68 31 Timer t=new Timer();
jpayne@68 32 RenameIMG x=new RenameIMG(args);
jpayne@68 33 x.process(t);
jpayne@68 34
jpayne@68 35 //Close the print stream if it was redirected
jpayne@68 36 Shared.closeStream(x.outstream);
jpayne@68 37 }
jpayne@68 38
jpayne@68 39 public RenameIMG(String[] args){
jpayne@68 40
jpayne@68 41 {//Preparse block for help, config files, and outstream
jpayne@68 42 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 43 args=pp.args;
jpayne@68 44 outstream=pp.outstream;
jpayne@68 45 }
jpayne@68 46
jpayne@68 47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 48 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 49
jpayne@68 50 Parser parser=new Parser();
jpayne@68 51 for(int i=0; i<args.length; i++){
jpayne@68 52 String arg=args[i];
jpayne@68 53 String[] split=arg.split("=");
jpayne@68 54 String a=split[0].toLowerCase();
jpayne@68 55 String b=split.length>1 ? split[1] : null;
jpayne@68 56
jpayne@68 57 if(a.equals("lines")){
jpayne@68 58 maxLines=Long.parseLong(b);
jpayne@68 59 if(maxLines<0){maxLines=Long.MAX_VALUE;}
jpayne@68 60 }else if(a.equals("verbose")){
jpayne@68 61 verbose=Parse.parseBoolean(b);
jpayne@68 62 ByteFile1.verbose=verbose;
jpayne@68 63 ByteFile2.verbose=verbose;
jpayne@68 64 stream.FastaReadInputStream.verbose=verbose;
jpayne@68 65 ConcurrentGenericReadInputStream.verbose=verbose;
jpayne@68 66 stream.FastqReadInputStream.verbose=verbose;
jpayne@68 67 ReadWrite.verbose=verbose;
jpayne@68 68 }else if(a.equals("img")){
jpayne@68 69 imgFile=b;
jpayne@68 70 }else if(parser.parse(arg, a, b)){
jpayne@68 71 //do nothing
jpayne@68 72 }else{
jpayne@68 73 outstream.println("Unknown parameter "+args[i]);
jpayne@68 74 assert(false) : "Unknown parameter "+args[i];
jpayne@68 75 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 76 }
jpayne@68 77 }
jpayne@68 78
jpayne@68 79 {//Process parser fields
jpayne@68 80 overwrite=parser.overwrite;
jpayne@68 81 append=parser.append;
jpayne@68 82
jpayne@68 83 in1=parser.in1;
jpayne@68 84
jpayne@68 85 out1=parser.out1;
jpayne@68 86 }
jpayne@68 87
jpayne@68 88 assert(FastaReadInputStream.settingsOK());
jpayne@68 89
jpayne@68 90 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 91 if("auto".equalsIgnoreCase(imgFile)){imgFile=TaxTree.defaultImgFile();}//TODO: why are these set to the same default?
jpayne@68 92 if("auto".equalsIgnoreCase(in1)){in1=TaxTree.defaultImgFile();}
jpayne@68 93
jpayne@68 94 if(!ByteFile.FORCE_MODE_BF2){
jpayne@68 95 ByteFile.FORCE_MODE_BF2=false;
jpayne@68 96 ByteFile.FORCE_MODE_BF1=true;
jpayne@68 97 }
jpayne@68 98
jpayne@68 99 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
jpayne@68 100
jpayne@68 101 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
jpayne@68 102 outstream.println((out1==null)+", "+out1);
jpayne@68 103 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
jpayne@68 104 }
jpayne@68 105
jpayne@68 106 ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false);
jpayne@68 107 }
jpayne@68 108
jpayne@68 109 void copyFiles(ImgRecord[] array){
jpayne@68 110 if(useSet){set=new IntHashSet(10000);}
jpayne@68 111 ByteStreamWriter bsw=new ByteStreamWriter(ffout1);
jpayne@68 112 bsw.start();
jpayne@68 113 for(ImgRecord ir : array){
jpayne@68 114 if(ir.taxID>0){set.add(ir.taxID);}
jpayne@68 115 else{unknownTaxid++;}
jpayne@68 116 FileFormat ffin=FileFormat.testInput(ir.path(), FileFormat.FA, null, true, true);
jpayne@68 117 process_inner(ffin, bsw, ir.imgID);
jpayne@68 118 }
jpayne@68 119 knownTaxid=set.size();
jpayne@68 120 set=null;
jpayne@68 121 if(bsw!=null){errorState|=bsw.poisonAndWait();}
jpayne@68 122 }
jpayne@68 123
jpayne@68 124 void process(Timer t){
jpayne@68 125 ImgRecord[] array=ImgRecord.toArray(in1, TaxTree.IMG_HQ);
jpayne@68 126 if(imgFile==null){
jpayne@68 127 TaxTree.loadIMG(array);
jpayne@68 128 }else{
jpayne@68 129 ImgRecord[] array2=ImgRecord.toArray(imgFile, TaxTree.IMG_HQ);
jpayne@68 130 TaxTree.loadIMG(array2);
jpayne@68 131 }
jpayne@68 132
jpayne@68 133 copyFiles(array);
jpayne@68 134
jpayne@68 135 t.stop();
jpayne@68 136
jpayne@68 137 final int spaces=8;
jpayne@68 138 String fpstring=""+filesProcessed;
jpayne@68 139 String cpstring=Tools.padKM(sequencesProcessed, spaces);
jpayne@68 140 String bapstring=Tools.padKM(basesProcessed, spaces);
jpayne@68 141 String tpstring=""+knownTaxid;
jpayne@68 142
jpayne@68 143 outstream.println("Time: \t"+t);
jpayne@68 144 outstream.println("Files Processed: "+fpstring);
jpayne@68 145 outstream.println("Contigs Processed: "+cpstring);
jpayne@68 146 outstream.println("Bases Processed: "+bapstring);
jpayne@68 147 if(useSet){outstream.println("TaxIDs Processed: "+tpstring+" \t"+"("+unknownTaxid+" unknown)");}
jpayne@68 148 outstream.println(Tools.linesBytesProcessed(t.elapsed, linesProcessed, bytesProcessed, spaces));
jpayne@68 149
jpayne@68 150 outstream.println();
jpayne@68 151 outstream.println("Valid Files: \t"+filesValid);
jpayne@68 152 outstream.println("Invalid Files: \t"+(filesProcessed-filesValid));
jpayne@68 153 outstream.println("Valid Lines: \t"+linesValid);
jpayne@68 154 outstream.println("Invalid Lines: \t"+(linesProcessed-linesValid));
jpayne@68 155
jpayne@68 156 if(errorState){
jpayne@68 157 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 158 }
jpayne@68 159 }
jpayne@68 160
jpayne@68 161 void process_inner(final FileFormat ffin, final ByteStreamWriter bsw, final long img){
jpayne@68 162
jpayne@68 163 filesProcessed++;
jpayne@68 164 {
jpayne@68 165 File f=new File(ffin.name());
jpayne@68 166 if(!f.exists() || !f.canRead()){
jpayne@68 167 System.err.println("Can't find "+f);
jpayne@68 168 errorState=true;
jpayne@68 169 return;
jpayne@68 170 }
jpayne@68 171 }
jpayne@68 172 final int tid=TaxTree.imgToTaxid(img);
jpayne@68 173 ByteFile bf=ByteFile.makeByteFile(ffin);
jpayne@68 174
jpayne@68 175 byte[] line=bf.nextLine();
jpayne@68 176 ByteBuilder bb=new ByteBuilder();
jpayne@68 177
jpayne@68 178 while(line!=null){
jpayne@68 179 if(line.length>0){
jpayne@68 180 if(maxLines>0 && linesProcessed>=maxLines){break;}
jpayne@68 181 linesProcessed++;
jpayne@68 182 bytesProcessed+=line.length;
jpayne@68 183
jpayne@68 184 linesValid++;
jpayne@68 185 if(line[0]=='>'){
jpayne@68 186 sequencesProcessed++;
jpayne@68 187 bb.append('>');
jpayne@68 188 if(tid>=0){
jpayne@68 189 bb.append("tid|");
jpayne@68 190 bb.append(tid);
jpayne@68 191 bb.append('|');
jpayne@68 192 }
jpayne@68 193 bb.append("img|");
jpayne@68 194 bb.append(img);
jpayne@68 195 bb.append(' ');
jpayne@68 196 for(int i=1; i<line.length; i++){
jpayne@68 197 bb.append(line[i]);
jpayne@68 198 }
jpayne@68 199 }else{
jpayne@68 200 basesProcessed+=line.length;
jpayne@68 201 bb.append(line);
jpayne@68 202 }
jpayne@68 203 bb.nl();
jpayne@68 204 bsw.print(bb.toBytes());
jpayne@68 205 bb.clear();
jpayne@68 206 }
jpayne@68 207 line=bf.nextLine();
jpayne@68 208 }
jpayne@68 209
jpayne@68 210 filesValid++;
jpayne@68 211 errorState|=bf.close();
jpayne@68 212 }
jpayne@68 213
jpayne@68 214 /*--------------------------------------------------------------*/
jpayne@68 215
jpayne@68 216
jpayne@68 217 /*--------------------------------------------------------------*/
jpayne@68 218
jpayne@68 219 private String in1=null;
jpayne@68 220 private String out1=null;
jpayne@68 221 private String imgFile=null;
jpayne@68 222
jpayne@68 223 /*--------------------------------------------------------------*/
jpayne@68 224
jpayne@68 225 private IntHashSet set=null;
jpayne@68 226 private int knownTaxid=0;
jpayne@68 227 private int unknownTaxid=0;
jpayne@68 228 private boolean useSet=true;
jpayne@68 229
jpayne@68 230 private long linesProcessed=0;
jpayne@68 231 private long linesValid=0;
jpayne@68 232 private long bytesProcessed=0;
jpayne@68 233
jpayne@68 234 private long basesProcessed=0;
jpayne@68 235 private long sequencesProcessed=0;
jpayne@68 236 private long filesProcessed=0;
jpayne@68 237 private long filesValid=0;
jpayne@68 238
jpayne@68 239 private long maxLines=Long.MAX_VALUE;
jpayne@68 240
jpayne@68 241 /*--------------------------------------------------------------*/
jpayne@68 242
jpayne@68 243 private final FileFormat ffout1;
jpayne@68 244
jpayne@68 245
jpayne@68 246 /*--------------------------------------------------------------*/
jpayne@68 247
jpayne@68 248 private PrintStream outstream=System.err;
jpayne@68 249 public static boolean verbose=false;
jpayne@68 250 public boolean errorState=false;
jpayne@68 251 private boolean overwrite=false;
jpayne@68 252 private boolean append=false;
jpayne@68 253
jpayne@68 254 }