comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/ZMW.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package icecream;
2
3 import java.util.ArrayList;
4
5 import shared.Tools;
6 import stream.Read;
7 import stream.SamLine;
8 import structures.IntList;
9
10 /**
11 * Container for the list of reads from a single
12 * PacBio ZMW.
13 * @author Brian Bushnell
14 * @date June 5, 2020
15 */
16 public class ZMW extends ArrayList<Read> {
17
18 /**
19 * For serialization.
20 */
21 private static final long serialVersionUID = -2580124131008824113L;
22
23 public ZMW(){super();}
24
25 public ZMW(int initialSize){super(initialSize);}
26
27 public long countBases(){
28 long x=0;
29 for(Read r : this){
30 x+=r.length();
31 }
32 return x;
33 }
34
35 public int medianLength(boolean includeDiscarded){
36 if(size()<3){return -1;}
37 IntList lengths=new IntList(size()-2);
38
39 for(int i=1; i<size()-1; i++){
40 Read r=get(i);
41 if(includeDiscarded || !r.discarded()){
42 lengths.add(get(i).length());
43 }
44 }
45 lengths.sort();
46 int median=lengths.get(lengths.size/2);
47 return median;
48 }
49
50 public int longestLength(boolean includeDiscarded){
51 int max=0;
52 for(Read r : this){
53 if(includeDiscarded || !r.discarded()){
54 max=Tools.max(max, r.length());
55 }
56 }
57 return max;
58 }
59
60 public Read medianRead(boolean includeDiscarded){
61 int len=medianLength(includeDiscarded);
62 if(len<0){return longestRead(includeDiscarded);}
63 for(int i=1; i<size()-1; i++){
64 Read r=get(i);
65 if((includeDiscarded || !r.discarded()) && r.length()==len){
66 return r;
67 }
68 }
69 return null;
70 }
71
72 public Read longestRead(boolean includeDiscarded){
73 Read max=null;
74 for(Read r : this){
75 if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;}
76 }
77 return max;
78 }
79
80 public int zid(){
81 if(zid==-1){parseZID();}
82 return zid;
83 }
84
85 private int parseZID(){
86 return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id));
87 }
88
89 public static void fixReadHeader(Read r, int leftTrim, int rightTrim){
90 leftTrim=Tools.max(0, leftTrim);
91 rightTrim=Tools.max(0, rightTrim);
92 if(leftTrim<1 && rightTrim<1){return;}
93 final int idx=r.id.lastIndexOf('/');
94 if(idx>0 && idx<r.id.length()-3){
95 String prefix=r.id.substring(0, idx+1);
96 String suffix=r.id.substring(idx+1);
97 if(suffix.indexOf('_')>0){
98 String coords=suffix, comment="";
99 int tab=suffix.indexOf('\t');
100 if(tab<0){tab=suffix.indexOf(' ');}
101 if(tab>0){
102 coords=coords.substring(0, tab);
103 comment=coords.substring(tab);
104 }
105 String[] split=Tools.underscorePattern.split(coords);
106 int left=Integer.parseInt(split[0]);
107 int right=Integer.parseInt(split[1]);
108 left+=leftTrim;
109 right-=rightTrim;
110 if(left>right){left=right;}
111
112 if(right-left!=r.length()){right=left+r.length();}
113 // System.err.println(r.length()+", "+(right-left));
114
115 r.id=prefix+left+"_"+right+comment;
116 final SamLine sl=r.samline;
117 if(sl!=null){
118 sl.qname=r.id;
119 if(sl.optional!=null){
120 for(int i=0; i<sl.optional.size(); i++){
121 String s=sl.optional.get(i);
122 if(s.startsWith("qe:i:")){
123 s="qe:i:"+right;
124 sl.optional.set(i, s);
125 }else if(s.startsWith("qs:i:")){
126 s="qs:i:"+left;
127 sl.optional.set(i, s);
128 }
129 }
130 }
131 }
132 }
133 }
134 }
135
136 public void setDiscarded(boolean b){
137 for(Read r : this){
138 r.setDiscarded(b);
139 }
140 }
141
142 public int[] lengths() {
143 final int size=size();
144 int[] array=new int[size];
145 for(int i=0; i<size; i++){
146 Read r=get(i);
147 array[i]=r==null ? -1 : r.length();
148 }
149 return array;
150 }
151
152 public float estimatePasses(){
153 final int size=size();
154 if(size<1){return 0;}
155 else if(size==1){return 0.25f;}
156 else if(size==2){return 0.5f;}
157
158 int median=medianLength(true);
159 int first=first().length();
160 int last=last().length();
161
162 return size-2+estimatePasses(first, median)+estimatePasses(last, median);
163 }
164
165 private float estimatePasses(int len, int median){
166 float ratio=len/(float)median;
167 //TODO: I want this to be more asymptotic
168 return Tools.min(0.99f, ratio/(1+0.05f*ratio));
169 }
170
171 public boolean discarded() {
172 for(Read r : this){
173 if(!r.discarded()){return false;}
174 }
175 return true;
176 }
177
178 /**
179 * Identifier assigned by streamer, not by PacBio.
180 * First identifier is 0, then 1, etc.
181 */
182 public long id;
183
184 /**
185 * ZMW ID assigned by PacBio.
186 */
187 private int zid=-1;
188
189 public Read first(){return get(0);}
190 public Read last(){return get(size()-1);}
191
192 }