comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/fileIO/ChainBlock.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package fileIO;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.List;
6
7 import dna.Gene;
8 import shared.Shared;
9
10 /** For loading UCSC .chain files that convert one genome build to another. */
11 public class ChainBlock implements Comparable<ChainBlock>{
12
13
14 public static void main(String args[]){
15 ChainLine[][] lines=loadChainLines(args[0]);
16 for(int i=1; i<=22; i++){
17 for(ChainLine line : lines[i]){
18 System.out.println(line);
19 }
20 System.out.println();
21 }
22 }
23
24
25 public ChainBlock(List<String[]> list){
26
27 String[] head=list.get(0);
28 assert("chain".equals(head[0]));
29
30 score=Long.parseLong(head[1]);
31
32 tName=head[2];
33 tChrom=toChromosome(head[2]);
34 tSize=Integer.parseInt(head[3]);
35 tStrand=Gene.toStrand(head[4]);
36 tStart=Integer.parseInt(head[5]);
37 tStop=Integer.parseInt(head[6]);
38
39 qName=head[7];
40 qChrom=toChromosome(head[7]);
41 qSize=Integer.parseInt(head[8]);
42 qStrand=Gene.toStrand(head[9]);
43 qStart=Integer.parseInt(head[10]);
44 qStop=Integer.parseInt(head[11]);
45
46 chainID=Integer.parseInt(head[12]);
47
48 chunks=new int[list.size()-1][];
49 for(int i=1; i<list.size(); i++){
50 String[] line=list.get(i);
51 assert((i==list.size()-1) == (line.length==1));
52 assert((i!=list.size()-1) == (line.length==3));
53 chunks[i-1]=new int[line.length];
54 for(int j=0; j<line.length; j++){
55 chunks[i-1][j]=Integer.parseInt(line[j]);
56 }
57 }
58
59 }
60
61 private static int toChromosome(String s){
62 int result;
63 try{
64 result=Gene.toChromosome(s);
65 }catch(Exception e){
66 result=Gene.toChromosome("U");
67 }
68 return result;
69 }
70
71 public ChainLine[] toLines(){
72 ChainLine[] out=new ChainLine[chunks.length];
73
74 if(qStrand==Shared.PLUS){
75
76 int tloc=tStart, qloc=qStart;
77 for(int i=0; i<chunks.length; i++){
78 int[] chunk=chunks[i];
79 int tloc2=tloc+chunk[0]-1, qloc2=qloc+chunk[0]-1;
80 out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2);
81 if(chunk.length>1){
82 tloc=tloc2+chunk[1]+1;
83 qloc=qloc2+chunk[2]+1;
84 }
85 }
86 }else{
87
88 int tloc=tStart, qloc=qStop-1;
89 for(int i=0; i<chunks.length; i++){
90 int[] chunk=chunks[i];
91 int tloc2=tloc+chunk[0]-1, qloc2=qloc-chunk[0]+1;
92 out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2);
93 if(chunk.length>1){
94 tloc=tloc2+chunk[1]+1;
95 qloc=qloc2-chunk[2]-1;
96 }
97 }
98 }
99
100 return out;
101 }
102
103
104 public static ChainLine[][] loadChainLines(String fname){
105 ArrayList<ChainBlock> list=loadChainBlocks(fname);
106 ChainBlock[][] blocks=splitChain(list);
107 ChainLine[][] out=new ChainLine[blocks.length][];
108 ArrayList<ChainLine> temp=new ArrayList<ChainLine>();
109 for(int chrom=0; chrom<blocks.length; chrom++){
110 temp.clear();
111 ChainBlock[] cblocks=blocks[chrom];
112 if(cblocks.length>0){
113 for(ChainBlock block : cblocks){
114 ChainLine[] blines=block.toLines();
115 for(ChainLine line : blines){
116 temp.add(line);
117 }
118 }
119 }
120 if(temp.size()>0){
121 out[chrom]=temp.toArray(new ChainLine[temp.size()]);
122 Arrays.sort(out[chrom]);
123 }
124 }
125 return out;
126 }
127
128
129 public static ArrayList<ChainBlock> loadChainBlocks(String fname){
130 TextFile tf=new TextFile(fname, false);
131 String[] lines=tf.toStringLines();
132 tf.close();
133 String[][] text=TextFile.doublesplitWhitespace(lines, true);
134
135 ArrayList<ChainBlock> out=new ArrayList<ChainBlock>();
136 ArrayList<String[]> current=new ArrayList<String[]>(40);
137 for(int i=0; i<text.length; i++){
138 String[] line=text[i];
139 current.add(line);
140 if(line.length==1){
141 out.add(new ChainBlock(current));
142 current.clear();
143 }
144 }
145 Shared.sort(out);
146 return out;
147 }
148
149
150 public static ChainBlock[][] splitChain(ArrayList<ChainBlock> list){
151 int[] size=new int[Gene.chromCodes.length];
152
153 for(ChainBlock cb : list){size[cb.tChrom]++;}
154
155 ChainBlock[][] out=new ChainBlock[size.length][];
156 for(int i=0; i<out.length; i++){out[i]=new ChainBlock[size[i]];}
157
158 Arrays.fill(size, 0);
159 for(ChainBlock cb : list){
160 out[cb.tChrom][size[cb.tChrom]]=cb;
161 size[cb.tChrom]++;
162 }
163
164 return out;
165 }
166
167
168 @Override
169 public int compareTo(ChainBlock other) {
170 int temp;
171
172 temp=tChrom-other.tChrom;
173 if(temp!=0){return temp;}
174
175 temp=tName.compareTo(other.tName);
176 if(temp!=0){return temp;}
177
178 assert(tStrand==other.tStrand);
179
180 temp=tStart-other.tStart;
181 if(temp!=0){return temp;}
182
183 temp=tStop-other.tStop;
184 return temp;
185 }
186
187
188 public long score;
189 public String tName;
190 public int tChrom;
191 public int tSize;
192 public byte tStrand;
193 public int tStart;
194 public int tStop;
195
196 public String qName;
197 public int qChrom;
198 public int qSize;
199 public byte qStrand;
200 public int qStart;
201 public int qStop;
202
203 public int chainID;
204
205 public int[][] chunks;
206
207 //chain 3303 chr1 247249719 + 13192499 13192587 chr1 249250621 - 236203315 236203403 109
208
209 // * score -- chain score
210 // * tName -- chromosome (reference sequence)
211 // * tSize -- chromosome size (reference sequence)
212 // * tStrand -- strand (reference sequence)
213 // * tStart -- alignment start position (reference sequence)
214 // * tEnd -- alignment end position (reference sequence)
215 // * qName -- chromosome (query sequence)
216 // * qSize -- chromosome size (query sequence)
217 // * qStrand -- strand (query sequence)
218 // * qStart -- alignment start position (query sequence)
219 // * qEnd -- alignment end position (query sequence)
220 // * id -- chain ID
221
222 }