annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package tax;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5
jpayne@68 6 import dna.Data;
jpayne@68 7 import fileIO.ByteFile;
jpayne@68 8 import fileIO.ByteFile1;
jpayne@68 9 import fileIO.ByteFile2;
jpayne@68 10 import fileIO.ByteStreamWriter;
jpayne@68 11 import fileIO.FileFormat;
jpayne@68 12 import fileIO.ReadWrite;
jpayne@68 13 import shared.Parse;
jpayne@68 14 import shared.Parser;
jpayne@68 15 import shared.PreParser;
jpayne@68 16 import shared.ReadStats;
jpayne@68 17 import shared.Shared;
jpayne@68 18 import shared.Timer;
jpayne@68 19 import shared.Tools;
jpayne@68 20 import stream.FastaReadInputStream;
jpayne@68 21 import structures.ByteBuilder;
jpayne@68 22
jpayne@68 23 /**
jpayne@68 24 * @author Brian Bushnell
jpayne@68 25 * @date April 4, 2017
jpayne@68 26 *
jpayne@68 27 */
jpayne@68 28 public class ShrinkAccession {
jpayne@68 29
jpayne@68 30 public static void main(String[] args){
jpayne@68 31 Timer t=new Timer();
jpayne@68 32 ShrinkAccession x=new ShrinkAccession(args);
jpayne@68 33 x.process(t);
jpayne@68 34
jpayne@68 35 //Close the print stream if it was redirected
jpayne@68 36 Shared.closeStream(x.outstream);
jpayne@68 37 }
jpayne@68 38
jpayne@68 39 public ShrinkAccession(String[] args){
jpayne@68 40
jpayne@68 41 {//Preparse block for help, config files, and outstream
jpayne@68 42 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 43 args=pp.args;
jpayne@68 44 outstream=pp.outstream;
jpayne@68 45 }
jpayne@68 46
jpayne@68 47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 48 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 49 if(Data.PIGZ()){
jpayne@68 50 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
jpayne@68 51 }
jpayne@68 52
jpayne@68 53 Parser parser=new Parser();
jpayne@68 54 for(int i=0; i<args.length; i++){
jpayne@68 55 String arg=args[i];
jpayne@68 56 String[] split=arg.split("=");
jpayne@68 57 String a=split[0].toLowerCase();
jpayne@68 58 String b=split.length>1 ? split[1] : null;
jpayne@68 59
jpayne@68 60 if(parser.parse(arg, a, b)){
jpayne@68 61 //do nothing
jpayne@68 62 }else if(a.equals("verbose")){
jpayne@68 63 verbose=Parse.parseBoolean(b);
jpayne@68 64 ByteFile1.verbose=verbose;
jpayne@68 65 ByteFile2.verbose=verbose;
jpayne@68 66 ReadWrite.verbose=verbose;
jpayne@68 67 }else if(a.equals("gi")){
jpayne@68 68 KEEP_GI_NUMBERS=Parse.parseBoolean(b);
jpayne@68 69 }else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){
jpayne@68 70 giOut=b;
jpayne@68 71 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
jpayne@68 72 parser.in1=arg;
jpayne@68 73 }else if(parser.out1==null && i==1 && !arg.contains("=")){
jpayne@68 74 parser.out1=arg;
jpayne@68 75 }else{
jpayne@68 76 outstream.println("Unknown parameter "+args[i]);
jpayne@68 77 assert(false) : "Unknown parameter "+args[i];
jpayne@68 78 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 79 }
jpayne@68 80 }
jpayne@68 81
jpayne@68 82 {//Process parser fields
jpayne@68 83 Parser.processQuality();
jpayne@68 84
jpayne@68 85 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 86 append=ReadStats.append=parser.append;
jpayne@68 87
jpayne@68 88 in=parser.in1;
jpayne@68 89
jpayne@68 90 out=parser.out1;
jpayne@68 91 }
jpayne@68 92
jpayne@68 93 assert(FastaReadInputStream.settingsOK());
jpayne@68 94
jpayne@68 95 if(in==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 96 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
jpayne@68 97 ByteFile.FORCE_MODE_BF2=false;
jpayne@68 98 ByteFile.FORCE_MODE_BF1=true;
jpayne@68 99 }
jpayne@68 100
jpayne@68 101 if(out!=null && out.equalsIgnoreCase("null")){out=null;}
jpayne@68 102
jpayne@68 103 if(!Tools.testOutputFiles(overwrite, append, false, out)){
jpayne@68 104 outstream.println((out==null)+", "+out);
jpayne@68 105 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
jpayne@68 106 }
jpayne@68 107
jpayne@68 108 ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false);
jpayne@68 109 ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false);
jpayne@68 110 ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true);
jpayne@68 111
jpayne@68 112 }
jpayne@68 113
jpayne@68 114 void process(Timer t){
jpayne@68 115
jpayne@68 116 ByteFile bf=ByteFile.makeByteFile(ffin);
jpayne@68 117 ByteStreamWriter bsw=new ByteStreamWriter(ffout);
jpayne@68 118 bsw.start();
jpayne@68 119
jpayne@68 120 long linesProcessed=0;
jpayne@68 121 long charsProcessed=0;
jpayne@68 122 long badLines=0;
jpayne@68 123
jpayne@68 124 byte[] line=bf.nextLine();
jpayne@68 125 ByteBuilder bb=new ByteBuilder(10000);
jpayne@68 126 int columns=4;
jpayne@68 127 while(line!=null){
jpayne@68 128 if(Tools.startsWith(line, "accession\t")){
jpayne@68 129 bb.append(line);
jpayne@68 130 bb.nl();
jpayne@68 131 }else if(Tools.startsWith(line, "accession.version\ttaxid")){
jpayne@68 132 columns=2;
jpayne@68 133 bb.append("accession\t\ttaxid\t");//dummy header
jpayne@68 134 bb.nl();
jpayne@68 135 }else{
jpayne@68 136 charsProcessed+=line.length+1;
jpayne@68 137 linesProcessed++;
jpayne@68 138
jpayne@68 139 final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') :
jpayne@68 140 AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t'));
jpayne@68 141 if(tid<1){
jpayne@68 142 badLines++;
jpayne@68 143 }else{
jpayne@68 144 int i=0;
jpayne@68 145
jpayne@68 146 while(i<line.length){//Accession
jpayne@68 147 byte b=line[i];
jpayne@68 148 bb.append(b);
jpayne@68 149 i++;
jpayne@68 150 if(b=='\t'){break;}
jpayne@68 151 }
jpayne@68 152
jpayne@68 153 if(columns==4){
jpayne@68 154 while(i<line.length){//Accession with decimal
jpayne@68 155 byte b=line[i];
jpayne@68 156 // bb.append(b);
jpayne@68 157 i++;
jpayne@68 158 if(b=='\t'){break;}
jpayne@68 159 }
jpayne@68 160 }
jpayne@68 161 bb.append('\t');
jpayne@68 162
jpayne@68 163 while(i<line.length){//Taxid
jpayne@68 164 byte b=line[i];
jpayne@68 165 bb.append(b);
jpayne@68 166 i++;
jpayne@68 167 if(b=='\t'){break;}
jpayne@68 168 }
jpayne@68 169
jpayne@68 170 if(KEEP_GI_NUMBERS){
jpayne@68 171 if(line.length>i && Tools.isDigit(line[i])){//GI number or "na"
jpayne@68 172 while(i<line.length){
jpayne@68 173 byte b=line[i];
jpayne@68 174 bb.append(b);
jpayne@68 175 i++;
jpayne@68 176 // if(b=='\t'){break;}
jpayne@68 177 }
jpayne@68 178 }
jpayne@68 179 }
jpayne@68 180 bb.nl();
jpayne@68 181 }
jpayne@68 182
jpayne@68 183 // String[] split=new String(line).split("\t");
jpayne@68 184 // bb.append(split[0]);
jpayne@68 185 // bb.tab();
jpayne@68 186 // bb.tab();
jpayne@68 187 // bb.append(split[2]);
jpayne@68 188 // bb.tab();
jpayne@68 189 // bb.nl();
jpayne@68 190 }
jpayne@68 191 if(bb.length()>8000){
jpayne@68 192 bsw.print(bb);
jpayne@68 193 bb.clear();
jpayne@68 194 }
jpayne@68 195 line=bf.nextLine();
jpayne@68 196 }
jpayne@68 197 if(bb.length()>0){
jpayne@68 198 bsw.print(bb);
jpayne@68 199 bb.clear();
jpayne@68 200 }
jpayne@68 201
jpayne@68 202 errorState|=bf.close();
jpayne@68 203 if(bsw!=null){errorState|=bsw.poisonAndWait();}
jpayne@68 204
jpayne@68 205 t.stop();
jpayne@68 206 outstream.println("Discarded "+badLines+" lines.\n");
jpayne@68 207 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8));
jpayne@68 208
jpayne@68 209 if(errorState){
jpayne@68 210 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 211 }
jpayne@68 212 }
jpayne@68 213
jpayne@68 214 /*--------------------------------------------------------------*/
jpayne@68 215
jpayne@68 216
jpayne@68 217 /*--------------------------------------------------------------*/
jpayne@68 218
jpayne@68 219 private String in=null;
jpayne@68 220 private String out=null;
jpayne@68 221 private String giOut=null;
jpayne@68 222
jpayne@68 223 /*--------------------------------------------------------------*/
jpayne@68 224
jpayne@68 225 private final FileFormat ffin;
jpayne@68 226 private final FileFormat ffout;
jpayne@68 227 private final FileFormat ffoutGi;
jpayne@68 228
jpayne@68 229 /*--------------------------------------------------------------*/
jpayne@68 230
jpayne@68 231 private PrintStream outstream=System.err;
jpayne@68 232 public static boolean verbose=false;
jpayne@68 233 public static boolean KEEP_GI_NUMBERS=true;
jpayne@68 234 public boolean errorState=false;
jpayne@68 235 private boolean overwrite=false;
jpayne@68 236 private boolean append=false;
jpayne@68 237
jpayne@68 238 }