Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/fileIO/ChainBlock.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package fileIO; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import dna.Gene; import shared.Shared; /** For loading UCSC .chain files that convert one genome build to another. */ public class ChainBlock implements Comparable<ChainBlock>{ public static void main(String args[]){ ChainLine[][] lines=loadChainLines(args[0]); for(int i=1; i<=22; i++){ for(ChainLine line : lines[i]){ System.out.println(line); } System.out.println(); } } public ChainBlock(List<String[]> list){ String[] head=list.get(0); assert("chain".equals(head[0])); score=Long.parseLong(head[1]); tName=head[2]; tChrom=toChromosome(head[2]); tSize=Integer.parseInt(head[3]); tStrand=Gene.toStrand(head[4]); tStart=Integer.parseInt(head[5]); tStop=Integer.parseInt(head[6]); qName=head[7]; qChrom=toChromosome(head[7]); qSize=Integer.parseInt(head[8]); qStrand=Gene.toStrand(head[9]); qStart=Integer.parseInt(head[10]); qStop=Integer.parseInt(head[11]); chainID=Integer.parseInt(head[12]); chunks=new int[list.size()-1][]; for(int i=1; i<list.size(); i++){ String[] line=list.get(i); assert((i==list.size()-1) == (line.length==1)); assert((i!=list.size()-1) == (line.length==3)); chunks[i-1]=new int[line.length]; for(int j=0; j<line.length; j++){ chunks[i-1][j]=Integer.parseInt(line[j]); } } } private static int toChromosome(String s){ int result; try{ result=Gene.toChromosome(s); }catch(Exception e){ result=Gene.toChromosome("U"); } return result; } public ChainLine[] toLines(){ ChainLine[] out=new ChainLine[chunks.length]; if(qStrand==Shared.PLUS){ int tloc=tStart, qloc=qStart; for(int i=0; i<chunks.length; i++){ int[] chunk=chunks[i]; int tloc2=tloc+chunk[0]-1, qloc2=qloc+chunk[0]-1; out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2); if(chunk.length>1){ tloc=tloc2+chunk[1]+1; qloc=qloc2+chunk[2]+1; } } }else{ int tloc=tStart, qloc=qStop-1; for(int i=0; i<chunks.length; i++){ int[] chunk=chunks[i]; int tloc2=tloc+chunk[0]-1, qloc2=qloc-chunk[0]+1; out[i]=new ChainLine(tChrom, tStrand, tloc, tloc2, qChrom, qStrand, qloc, qloc2); if(chunk.length>1){ tloc=tloc2+chunk[1]+1; qloc=qloc2-chunk[2]-1; } } } return out; } public static ChainLine[][] loadChainLines(String fname){ ArrayList<ChainBlock> list=loadChainBlocks(fname); ChainBlock[][] blocks=splitChain(list); ChainLine[][] out=new ChainLine[blocks.length][]; ArrayList<ChainLine> temp=new ArrayList<ChainLine>(); for(int chrom=0; chrom<blocks.length; chrom++){ temp.clear(); ChainBlock[] cblocks=blocks[chrom]; if(cblocks.length>0){ for(ChainBlock block : cblocks){ ChainLine[] blines=block.toLines(); for(ChainLine line : blines){ temp.add(line); } } } if(temp.size()>0){ out[chrom]=temp.toArray(new ChainLine[temp.size()]); Arrays.sort(out[chrom]); } } return out; } public static ArrayList<ChainBlock> loadChainBlocks(String fname){ TextFile tf=new TextFile(fname, false); String[] lines=tf.toStringLines(); tf.close(); String[][] text=TextFile.doublesplitWhitespace(lines, true); ArrayList<ChainBlock> out=new ArrayList<ChainBlock>(); ArrayList<String[]> current=new ArrayList<String[]>(40); for(int i=0; i<text.length; i++){ String[] line=text[i]; current.add(line); if(line.length==1){ out.add(new ChainBlock(current)); current.clear(); } } Shared.sort(out); return out; } public static ChainBlock[][] splitChain(ArrayList<ChainBlock> list){ int[] size=new int[Gene.chromCodes.length]; for(ChainBlock cb : list){size[cb.tChrom]++;} ChainBlock[][] out=new ChainBlock[size.length][]; for(int i=0; i<out.length; i++){out[i]=new ChainBlock[size[i]];} Arrays.fill(size, 0); for(ChainBlock cb : list){ out[cb.tChrom][size[cb.tChrom]]=cb; size[cb.tChrom]++; } return out; } @Override public int compareTo(ChainBlock other) { int temp; temp=tChrom-other.tChrom; if(temp!=0){return temp;} temp=tName.compareTo(other.tName); if(temp!=0){return temp;} assert(tStrand==other.tStrand); temp=tStart-other.tStart; if(temp!=0){return temp;} temp=tStop-other.tStop; return temp; } public long score; public String tName; public int tChrom; public int tSize; public byte tStrand; public int tStart; public int tStop; public String qName; public int qChrom; public int qSize; public byte qStrand; public int qStart; public int qStop; public int chainID; public int[][] chunks; //chain 3303 chr1 247249719 + 13192499 13192587 chr1 249250621 - 236203315 236203403 109 // * score -- chain score // * tName -- chromosome (reference sequence) // * tSize -- chromosome size (reference sequence) // * tStrand -- strand (reference sequence) // * tStart -- alignment start position (reference sequence) // * tEnd -- alignment end position (reference sequence) // * qName -- chromosome (query sequence) // * qSize -- chromosome size (query sequence) // * qStrand -- strand (query sequence) // * qStart -- alignment start position (query sequence) // * qEnd -- alignment end position (query sequence) // * id -- chain ID }