jpayne@68
|
1 package gff;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4
|
jpayne@68
|
5 import fileIO.ByteFile;
|
jpayne@68
|
6 import fileIO.ByteStreamWriter;
|
jpayne@68
|
7 import fileIO.FileFormat;
|
jpayne@68
|
8 import shared.Parser;
|
jpayne@68
|
9 import shared.PreParser;
|
jpayne@68
|
10 import shared.ReadStats;
|
jpayne@68
|
11 import shared.Timer;
|
jpayne@68
|
12 import shared.Tools;
|
jpayne@68
|
13 import structures.ByteBuilder;
|
jpayne@68
|
14 import var2.VCFLine;
|
jpayne@68
|
15
|
jpayne@68
|
16 /**
|
jpayne@68
|
17 * Stripped out of GffLine into independent class.
|
jpayne@68
|
18 * @author Brian Bushnell
|
jpayne@68
|
19 * @date Sep 12, 2018
|
jpayne@68
|
20 *
|
jpayne@68
|
21 */
|
jpayne@68
|
22 public class VcfToGff {
|
jpayne@68
|
23
|
jpayne@68
|
24 /** Translates VCF to GFF */
|
jpayne@68
|
25 public static void main(String[] args){
|
jpayne@68
|
26 Timer t=new Timer();
|
jpayne@68
|
27 PrintStream outstream=System.err;
|
jpayne@68
|
28 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
29 PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false);
|
jpayne@68
|
30 args=pp.args;
|
jpayne@68
|
31 outstream=pp.outstream;
|
jpayne@68
|
32 t.outstream=outstream;
|
jpayne@68
|
33 }
|
jpayne@68
|
34
|
jpayne@68
|
35 Parser parser=new Parser();
|
jpayne@68
|
36 String in=null;
|
jpayne@68
|
37 String out=null;
|
jpayne@68
|
38 boolean overwrite=true, append=false;
|
jpayne@68
|
39
|
jpayne@68
|
40 //Parse each argument
|
jpayne@68
|
41 for(int i=0; i<args.length; i++){
|
jpayne@68
|
42 String arg=args[i];
|
jpayne@68
|
43
|
jpayne@68
|
44 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
45 String[] split=arg.split("=");
|
jpayne@68
|
46 String a=split[0].toLowerCase();
|
jpayne@68
|
47 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
48
|
jpayne@68
|
49 if(a.equals("in") || a.equals("vcf")){
|
jpayne@68
|
50 in=b;
|
jpayne@68
|
51 }else if(a.equals("out") || a.equals("gff")){
|
jpayne@68
|
52 out=b;
|
jpayne@68
|
53 }else if(parser.parse(arg, a, b)){
|
jpayne@68
|
54 //do nothing
|
jpayne@68
|
55 }else if(in==null && b==null && i==0 && Tools.canRead(arg)){
|
jpayne@68
|
56 in=arg;
|
jpayne@68
|
57 }else if(in==null && b==null && i==1){
|
jpayne@68
|
58 out=arg;
|
jpayne@68
|
59 }else{
|
jpayne@68
|
60 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
61 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
62 }
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 {//Process parser fields
|
jpayne@68
|
66 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
67 append=ReadStats.append=parser.append;
|
jpayne@68
|
68 }
|
jpayne@68
|
69
|
jpayne@68
|
70 //Ensure output files can be written
|
jpayne@68
|
71 if(!Tools.testOutputFiles(overwrite, append, false, out)){
|
jpayne@68
|
72 outstream.println((out==null)+", "+out);
|
jpayne@68
|
73 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
|
jpayne@68
|
74 }
|
jpayne@68
|
75
|
jpayne@68
|
76 //Ensure input files can be read
|
jpayne@68
|
77 if(!Tools.testInputFiles(false, true, in)){
|
jpayne@68
|
78 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
79 }
|
jpayne@68
|
80
|
jpayne@68
|
81 //Ensure that no file was specified multiple times
|
jpayne@68
|
82 if(!Tools.testForDuplicateFiles(true, in, out)){
|
jpayne@68
|
83 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
84 }
|
jpayne@68
|
85
|
jpayne@68
|
86 translate(in, out, overwrite, append);
|
jpayne@68
|
87 t.stop("Time: \t");
|
jpayne@68
|
88 }
|
jpayne@68
|
89
|
jpayne@68
|
90 /** Translates VCF to GFF */
|
jpayne@68
|
91 private static void translate(String in, String out, boolean overwrite, boolean append){
|
jpayne@68
|
92 //Create output FileFormat objects
|
jpayne@68
|
93 FileFormat ffout=FileFormat.testOutput(out, FileFormat.GFF, "gff", true, overwrite, append, false);
|
jpayne@68
|
94
|
jpayne@68
|
95 //Create input FileFormat objects
|
jpayne@68
|
96 FileFormat ffin=FileFormat.testInput(in, FileFormat.VCF, "vcf", true, true);
|
jpayne@68
|
97
|
jpayne@68
|
98 ByteFile bf=ByteFile.makeByteFile(ffin);
|
jpayne@68
|
99 ByteStreamWriter bsw=null;
|
jpayne@68
|
100 if(ffout!=null){
|
jpayne@68
|
101 bsw=new ByteStreamWriter(ffout);
|
jpayne@68
|
102 bsw.start();
|
jpayne@68
|
103 }
|
jpayne@68
|
104
|
jpayne@68
|
105 ByteBuilder bb=new ByteBuilder(17000);
|
jpayne@68
|
106 bb.append("##gff-version 3\n");
|
jpayne@68
|
107 String header="#seqid source type start end score strand phase attributes";
|
jpayne@68
|
108 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
|
jpayne@68
|
109 if(line.length>1){
|
jpayne@68
|
110 if(line[0]=='#'){
|
jpayne@68
|
111 if(Tools.startsWith(line, "##fileformat") || Tools.startsWith(line, "##FORMAT") ||
|
jpayne@68
|
112 Tools.startsWith(line, "##INFO") || Tools.startsWith(line, "#CHROM POS")){
|
jpayne@68
|
113 //skip
|
jpayne@68
|
114 }else{
|
jpayne@68
|
115 int i=1;
|
jpayne@68
|
116 while(i<line.length && line[i]=='#'){i++;}
|
jpayne@68
|
117 i--;
|
jpayne@68
|
118 bb.append(line, i, line.length-i);
|
jpayne@68
|
119 bb.nl();
|
jpayne@68
|
120 }
|
jpayne@68
|
121 }else{
|
jpayne@68
|
122 if(header!=null){
|
jpayne@68
|
123 bb.append(header).append('\n');
|
jpayne@68
|
124 header=null;
|
jpayne@68
|
125 }
|
jpayne@68
|
126 VCFLine vline=new VCFLine(line);
|
jpayne@68
|
127 GffLine gline=new GffLine(vline);
|
jpayne@68
|
128 gline.appendTo(bb);
|
jpayne@68
|
129 bb.nl();
|
jpayne@68
|
130 }
|
jpayne@68
|
131 }
|
jpayne@68
|
132 if(bb.length()>=16384){
|
jpayne@68
|
133 if(bsw!=null){
|
jpayne@68
|
134 bsw.print(bb);
|
jpayne@68
|
135 }
|
jpayne@68
|
136 bb.clear();
|
jpayne@68
|
137 }
|
jpayne@68
|
138 }
|
jpayne@68
|
139 if(bb.length()>0){
|
jpayne@68
|
140 if(bsw!=null){
|
jpayne@68
|
141 bsw.print(bb);
|
jpayne@68
|
142 }
|
jpayne@68
|
143 bb.clear();
|
jpayne@68
|
144 }
|
jpayne@68
|
145 bf.close();
|
jpayne@68
|
146 if(bsw!=null){bsw.poisonAndWait();}
|
jpayne@68
|
147 }
|
jpayne@68
|
148
|
jpayne@68
|
149 }
|