Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/fileIO/ChainBlock.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package fileIO; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Arrays; | |
5 import java.util.List; | |
6 | |
7 import dna.Gene; | |
8 import shared.Shared; | |
9 | |
10 /** For loading UCSC .chain files that convert one genome build to another. */ | |
11 public class ChainBlock implements Comparable<ChainBlock>{ | |
12 | |
13 | |
14 public static void main(String args[]){ | |
15 ChainLine[][] lines=loadChainLines(args[0]); | |
16 for(int i=1; i<=22; i++){ | |
17 for(ChainLine line : lines[i]){ | |
18 System.out.println(line); | |
19 } | |
20 System.out.println(); | |
21 } | |
22 } | |
23 | |
24 | |
25 public ChainBlock(List<String[]> list){ | |
26 | |
27 String[] head=list.get(0); | |
28 assert("chain".equals(head[0])); | |
29 | |
30 score=Long.parseLong(head[1]); | |
31 | |
32 tName=head[2]; | |
33 tChrom=toChromosome(head[2]); | |
34 tSize=Integer.parseInt(head[3]); | |
35 tStrand=Gene.toStrand(head[4]); | |
36 tStart=Integer.parseInt(head[5]); | |
37 tStop=Integer.parseInt(head[6]); | |
38 | |
39 qName=head[7]; | |
40 qChrom=toChromosome(head[7]); | |
41 qSize=Integer.parseInt(head[8]); | |
42 qStrand=Gene.toStrand(head[9]); | |
43 qStart=Integer.parseInt(head[10]); | |
44 qStop=Integer.parseInt(head[11]); | |
45 | |
46 chainID=Integer.parseInt(head[12]); | |
47 | |
48 chunks=new int[list.size()-1][]; | |
49 for(int i=1; i<list.size(); i++){ | |
50 String[] line=list.get(i); | |
51 assert((i==list.size()-1) == (line.length==1)); | |
52 assert((i!=list.size()-1) == (line.length==3)); | |
53 chunks[i-1]=new int[line.length]; | |
54 for(int j=0; j<line.length; j++){ | |
55 chunks[i-1][j]=Integer.parseInt(line[j]); | |
56 } | |
57 } | |
58 | |
59 } | |
60 | |
61 private static int toChromosome(String s){ | |
62 int result; | |
63 try{ | |
64 result=Gene.toChromosome(s); | |
65 }catch(Exception e){ | |
66 result=Gene.toChromosome("U"); | |
67 } | |
68 return result; | |
69 } | |
70 | |
71 public ChainLine[] toLines(){ | |
72 ChainLine[] out=new ChainLine[chunks.length]; | |
73 | |
74 if(qStrand==Shared.PLUS){ | |
75 | |
76 int tloc=tStart, qloc=qStart; | |
77 for(int i=0; i<chunks.length; i++){ | |
78 int[] chunk=chunks[i]; | |
79 int tloc2=tloc+chunk[0]-1, qloc2=qloc+chunk[0]-1; | |
80 out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2); | |
81 if(chunk.length>1){ | |
82 tloc=tloc2+chunk[1]+1; | |
83 qloc=qloc2+chunk[2]+1; | |
84 } | |
85 } | |
86 }else{ | |
87 | |
88 int tloc=tStart, qloc=qStop-1; | |
89 for(int i=0; i<chunks.length; i++){ | |
90 int[] chunk=chunks[i]; | |
91 int tloc2=tloc+chunk[0]-1, qloc2=qloc-chunk[0]+1; | |
92 out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2); | |
93 if(chunk.length>1){ | |
94 tloc=tloc2+chunk[1]+1; | |
95 qloc=qloc2-chunk[2]-1; | |
96 } | |
97 } | |
98 } | |
99 | |
100 return out; | |
101 } | |
102 | |
103 | |
104 public static ChainLine[][] loadChainLines(String fname){ | |
105 ArrayList<ChainBlock> list=loadChainBlocks(fname); | |
106 ChainBlock[][] blocks=splitChain(list); | |
107 ChainLine[][] out=new ChainLine[blocks.length][]; | |
108 ArrayList<ChainLine> temp=new ArrayList<ChainLine>(); | |
109 for(int chrom=0; chrom<blocks.length; chrom++){ | |
110 temp.clear(); | |
111 ChainBlock[] cblocks=blocks[chrom]; | |
112 if(cblocks.length>0){ | |
113 for(ChainBlock block : cblocks){ | |
114 ChainLine[] blines=block.toLines(); | |
115 for(ChainLine line : blines){ | |
116 temp.add(line); | |
117 } | |
118 } | |
119 } | |
120 if(temp.size()>0){ | |
121 out[chrom]=temp.toArray(new ChainLine[temp.size()]); | |
122 Arrays.sort(out[chrom]); | |
123 } | |
124 } | |
125 return out; | |
126 } | |
127 | |
128 | |
129 public static ArrayList<ChainBlock> loadChainBlocks(String fname){ | |
130 TextFile tf=new TextFile(fname, false); | |
131 String[] lines=tf.toStringLines(); | |
132 tf.close(); | |
133 String[][] text=TextFile.doublesplitWhitespace(lines, true); | |
134 | |
135 ArrayList<ChainBlock> out=new ArrayList<ChainBlock>(); | |
136 ArrayList<String[]> current=new ArrayList<String[]>(40); | |
137 for(int i=0; i<text.length; i++){ | |
138 String[] line=text[i]; | |
139 current.add(line); | |
140 if(line.length==1){ | |
141 out.add(new ChainBlock(current)); | |
142 current.clear(); | |
143 } | |
144 } | |
145 Shared.sort(out); | |
146 return out; | |
147 } | |
148 | |
149 | |
150 public static ChainBlock[][] splitChain(ArrayList<ChainBlock> list){ | |
151 int[] size=new int[Gene.chromCodes.length]; | |
152 | |
153 for(ChainBlock cb : list){size[cb.tChrom]++;} | |
154 | |
155 ChainBlock[][] out=new ChainBlock[size.length][]; | |
156 for(int i=0; i<out.length; i++){out[i]=new ChainBlock[size[i]];} | |
157 | |
158 Arrays.fill(size, 0); | |
159 for(ChainBlock cb : list){ | |
160 out[cb.tChrom][size[cb.tChrom]]=cb; | |
161 size[cb.tChrom]++; | |
162 } | |
163 | |
164 return out; | |
165 } | |
166 | |
167 | |
168 @Override | |
169 public int compareTo(ChainBlock other) { | |
170 int temp; | |
171 | |
172 temp=tChrom-other.tChrom; | |
173 if(temp!=0){return temp;} | |
174 | |
175 temp=tName.compareTo(other.tName); | |
176 if(temp!=0){return temp;} | |
177 | |
178 assert(tStrand==other.tStrand); | |
179 | |
180 temp=tStart-other.tStart; | |
181 if(temp!=0){return temp;} | |
182 | |
183 temp=tStop-other.tStop; | |
184 return temp; | |
185 } | |
186 | |
187 | |
188 public long score; | |
189 public String tName; | |
190 public int tChrom; | |
191 public int tSize; | |
192 public byte tStrand; | |
193 public int tStart; | |
194 public int tStop; | |
195 | |
196 public String qName; | |
197 public int qChrom; | |
198 public int qSize; | |
199 public byte qStrand; | |
200 public int qStart; | |
201 public int qStop; | |
202 | |
203 public int chainID; | |
204 | |
205 public int[][] chunks; | |
206 | |
207 //chain 3303 chr1 247249719 + 13192499 13192587 chr1 249250621 - 236203315 236203403 109 | |
208 | |
209 // * score -- chain score | |
210 // * tName -- chromosome (reference sequence) | |
211 // * tSize -- chromosome size (reference sequence) | |
212 // * tStrand -- strand (reference sequence) | |
213 // * tStart -- alignment start position (reference sequence) | |
214 // * tEnd -- alignment end position (reference sequence) | |
215 // * qName -- chromosome (query sequence) | |
216 // * qSize -- chromosome size (query sequence) | |
217 // * qStrand -- strand (query sequence) | |
218 // * qStart -- alignment start position (query sequence) | |
219 // * qEnd -- alignment end position (query sequence) | |
220 // * id -- chain ID | |
221 | |
222 } |