annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/ZMW.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package icecream;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4
jpayne@68 5 import shared.Tools;
jpayne@68 6 import stream.Read;
jpayne@68 7 import stream.SamLine;
jpayne@68 8 import structures.IntList;
jpayne@68 9
jpayne@68 10 /**
jpayne@68 11 * Container for the list of reads from a single
jpayne@68 12 * PacBio ZMW.
jpayne@68 13 * @author Brian Bushnell
jpayne@68 14 * @date June 5, 2020
jpayne@68 15 */
jpayne@68 16 public class ZMW extends ArrayList<Read> {
jpayne@68 17
jpayne@68 18 /**
jpayne@68 19 * For serialization.
jpayne@68 20 */
jpayne@68 21 private static final long serialVersionUID = -2580124131008824113L;
jpayne@68 22
jpayne@68 23 public ZMW(){super();}
jpayne@68 24
jpayne@68 25 public ZMW(int initialSize){super(initialSize);}
jpayne@68 26
jpayne@68 27 public long countBases(){
jpayne@68 28 long x=0;
jpayne@68 29 for(Read r : this){
jpayne@68 30 x+=r.length();
jpayne@68 31 }
jpayne@68 32 return x;
jpayne@68 33 }
jpayne@68 34
jpayne@68 35 public int medianLength(boolean includeDiscarded){
jpayne@68 36 if(size()<3){return -1;}
jpayne@68 37 IntList lengths=new IntList(size()-2);
jpayne@68 38
jpayne@68 39 for(int i=1; i<size()-1; i++){
jpayne@68 40 Read r=get(i);
jpayne@68 41 if(includeDiscarded || !r.discarded()){
jpayne@68 42 lengths.add(get(i).length());
jpayne@68 43 }
jpayne@68 44 }
jpayne@68 45 lengths.sort();
jpayne@68 46 int median=lengths.get(lengths.size/2);
jpayne@68 47 return median;
jpayne@68 48 }
jpayne@68 49
jpayne@68 50 public int longestLength(boolean includeDiscarded){
jpayne@68 51 int max=0;
jpayne@68 52 for(Read r : this){
jpayne@68 53 if(includeDiscarded || !r.discarded()){
jpayne@68 54 max=Tools.max(max, r.length());
jpayne@68 55 }
jpayne@68 56 }
jpayne@68 57 return max;
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 public Read medianRead(boolean includeDiscarded){
jpayne@68 61 int len=medianLength(includeDiscarded);
jpayne@68 62 if(len<0){return longestRead(includeDiscarded);}
jpayne@68 63 for(int i=1; i<size()-1; i++){
jpayne@68 64 Read r=get(i);
jpayne@68 65 if((includeDiscarded || !r.discarded()) && r.length()==len){
jpayne@68 66 return r;
jpayne@68 67 }
jpayne@68 68 }
jpayne@68 69 return null;
jpayne@68 70 }
jpayne@68 71
jpayne@68 72 public Read longestRead(boolean includeDiscarded){
jpayne@68 73 Read max=null;
jpayne@68 74 for(Read r : this){
jpayne@68 75 if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;}
jpayne@68 76 }
jpayne@68 77 return max;
jpayne@68 78 }
jpayne@68 79
jpayne@68 80 public int zid(){
jpayne@68 81 if(zid==-1){parseZID();}
jpayne@68 82 return zid;
jpayne@68 83 }
jpayne@68 84
jpayne@68 85 private int parseZID(){
jpayne@68 86 return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id));
jpayne@68 87 }
jpayne@68 88
jpayne@68 89 public static void fixReadHeader(Read r, int leftTrim, int rightTrim){
jpayne@68 90 leftTrim=Tools.max(0, leftTrim);
jpayne@68 91 rightTrim=Tools.max(0, rightTrim);
jpayne@68 92 if(leftTrim<1 && rightTrim<1){return;}
jpayne@68 93 final int idx=r.id.lastIndexOf('/');
jpayne@68 94 if(idx>0 && idx<r.id.length()-3){
jpayne@68 95 String prefix=r.id.substring(0, idx+1);
jpayne@68 96 String suffix=r.id.substring(idx+1);
jpayne@68 97 if(suffix.indexOf('_')>0){
jpayne@68 98 String coords=suffix, comment="";
jpayne@68 99 int tab=suffix.indexOf('\t');
jpayne@68 100 if(tab<0){tab=suffix.indexOf(' ');}
jpayne@68 101 if(tab>0){
jpayne@68 102 coords=coords.substring(0, tab);
jpayne@68 103 comment=coords.substring(tab);
jpayne@68 104 }
jpayne@68 105 String[] split=Tools.underscorePattern.split(coords);
jpayne@68 106 int left=Integer.parseInt(split[0]);
jpayne@68 107 int right=Integer.parseInt(split[1]);
jpayne@68 108 left+=leftTrim;
jpayne@68 109 right-=rightTrim;
jpayne@68 110 if(left>right){left=right;}
jpayne@68 111
jpayne@68 112 if(right-left!=r.length()){right=left+r.length();}
jpayne@68 113 // System.err.println(r.length()+", "+(right-left));
jpayne@68 114
jpayne@68 115 r.id=prefix+left+"_"+right+comment;
jpayne@68 116 final SamLine sl=r.samline;
jpayne@68 117 if(sl!=null){
jpayne@68 118 sl.qname=r.id;
jpayne@68 119 if(sl.optional!=null){
jpayne@68 120 for(int i=0; i<sl.optional.size(); i++){
jpayne@68 121 String s=sl.optional.get(i);
jpayne@68 122 if(s.startsWith("qe:i:")){
jpayne@68 123 s="qe:i:"+right;
jpayne@68 124 sl.optional.set(i, s);
jpayne@68 125 }else if(s.startsWith("qs:i:")){
jpayne@68 126 s="qs:i:"+left;
jpayne@68 127 sl.optional.set(i, s);
jpayne@68 128 }
jpayne@68 129 }
jpayne@68 130 }
jpayne@68 131 }
jpayne@68 132 }
jpayne@68 133 }
jpayne@68 134 }
jpayne@68 135
jpayne@68 136 public void setDiscarded(boolean b){
jpayne@68 137 for(Read r : this){
jpayne@68 138 r.setDiscarded(b);
jpayne@68 139 }
jpayne@68 140 }
jpayne@68 141
jpayne@68 142 public int[] lengths() {
jpayne@68 143 final int size=size();
jpayne@68 144 int[] array=new int[size];
jpayne@68 145 for(int i=0; i<size; i++){
jpayne@68 146 Read r=get(i);
jpayne@68 147 array[i]=r==null ? -1 : r.length();
jpayne@68 148 }
jpayne@68 149 return array;
jpayne@68 150 }
jpayne@68 151
jpayne@68 152 public float estimatePasses(){
jpayne@68 153 final int size=size();
jpayne@68 154 if(size<1){return 0;}
jpayne@68 155 else if(size==1){return 0.25f;}
jpayne@68 156 else if(size==2){return 0.5f;}
jpayne@68 157
jpayne@68 158 int median=medianLength(true);
jpayne@68 159 int first=first().length();
jpayne@68 160 int last=last().length();
jpayne@68 161
jpayne@68 162 return size-2+estimatePasses(first, median)+estimatePasses(last, median);
jpayne@68 163 }
jpayne@68 164
jpayne@68 165 private float estimatePasses(int len, int median){
jpayne@68 166 float ratio=len/(float)median;
jpayne@68 167 //TODO: I want this to be more asymptotic
jpayne@68 168 return Tools.min(0.99f, ratio/(1+0.05f*ratio));
jpayne@68 169 }
jpayne@68 170
jpayne@68 171 public boolean discarded() {
jpayne@68 172 for(Read r : this){
jpayne@68 173 if(!r.discarded()){return false;}
jpayne@68 174 }
jpayne@68 175 return true;
jpayne@68 176 }
jpayne@68 177
jpayne@68 178 /**
jpayne@68 179 * Identifier assigned by streamer, not by PacBio.
jpayne@68 180 * First identifier is 0, then 1, etc.
jpayne@68 181 */
jpayne@68 182 public long id;
jpayne@68 183
jpayne@68 184 /**
jpayne@68 185 * ZMW ID assigned by PacBio.
jpayne@68 186 */
jpayne@68 187 private int zid=-1;
jpayne@68 188
jpayne@68 189 public Read first(){return get(0);}
jpayne@68 190 public Read last(){return get(size()-1);}
jpayne@68 191
jpayne@68 192 }