Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/ZMW.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package icecream; | |
2 | |
3 import java.util.ArrayList; | |
4 | |
5 import shared.Tools; | |
6 import stream.Read; | |
7 import stream.SamLine; | |
8 import structures.IntList; | |
9 | |
10 /** | |
11 * Container for the list of reads from a single | |
12 * PacBio ZMW. | |
13 * @author Brian Bushnell | |
14 * @date June 5, 2020 | |
15 */ | |
16 public class ZMW extends ArrayList<Read> { | |
17 | |
18 /** | |
19 * For serialization. | |
20 */ | |
21 private static final long serialVersionUID = -2580124131008824113L; | |
22 | |
23 public ZMW(){super();} | |
24 | |
25 public ZMW(int initialSize){super(initialSize);} | |
26 | |
27 public long countBases(){ | |
28 long x=0; | |
29 for(Read r : this){ | |
30 x+=r.length(); | |
31 } | |
32 return x; | |
33 } | |
34 | |
35 public int medianLength(boolean includeDiscarded){ | |
36 if(size()<3){return -1;} | |
37 IntList lengths=new IntList(size()-2); | |
38 | |
39 for(int i=1; i<size()-1; i++){ | |
40 Read r=get(i); | |
41 if(includeDiscarded || !r.discarded()){ | |
42 lengths.add(get(i).length()); | |
43 } | |
44 } | |
45 lengths.sort(); | |
46 int median=lengths.get(lengths.size/2); | |
47 return median; | |
48 } | |
49 | |
50 public int longestLength(boolean includeDiscarded){ | |
51 int max=0; | |
52 for(Read r : this){ | |
53 if(includeDiscarded || !r.discarded()){ | |
54 max=Tools.max(max, r.length()); | |
55 } | |
56 } | |
57 return max; | |
58 } | |
59 | |
60 public Read medianRead(boolean includeDiscarded){ | |
61 int len=medianLength(includeDiscarded); | |
62 if(len<0){return longestRead(includeDiscarded);} | |
63 for(int i=1; i<size()-1; i++){ | |
64 Read r=get(i); | |
65 if((includeDiscarded || !r.discarded()) && r.length()==len){ | |
66 return r; | |
67 } | |
68 } | |
69 return null; | |
70 } | |
71 | |
72 public Read longestRead(boolean includeDiscarded){ | |
73 Read max=null; | |
74 for(Read r : this){ | |
75 if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;} | |
76 } | |
77 return max; | |
78 } | |
79 | |
80 public int zid(){ | |
81 if(zid==-1){parseZID();} | |
82 return zid; | |
83 } | |
84 | |
85 private int parseZID(){ | |
86 return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id)); | |
87 } | |
88 | |
89 public static void fixReadHeader(Read r, int leftTrim, int rightTrim){ | |
90 leftTrim=Tools.max(0, leftTrim); | |
91 rightTrim=Tools.max(0, rightTrim); | |
92 if(leftTrim<1 && rightTrim<1){return;} | |
93 final int idx=r.id.lastIndexOf('/'); | |
94 if(idx>0 && idx<r.id.length()-3){ | |
95 String prefix=r.id.substring(0, idx+1); | |
96 String suffix=r.id.substring(idx+1); | |
97 if(suffix.indexOf('_')>0){ | |
98 String coords=suffix, comment=""; | |
99 int tab=suffix.indexOf('\t'); | |
100 if(tab<0){tab=suffix.indexOf(' ');} | |
101 if(tab>0){ | |
102 coords=coords.substring(0, tab); | |
103 comment=coords.substring(tab); | |
104 } | |
105 String[] split=Tools.underscorePattern.split(coords); | |
106 int left=Integer.parseInt(split[0]); | |
107 int right=Integer.parseInt(split[1]); | |
108 left+=leftTrim; | |
109 right-=rightTrim; | |
110 if(left>right){left=right;} | |
111 | |
112 if(right-left!=r.length()){right=left+r.length();} | |
113 // System.err.println(r.length()+", "+(right-left)); | |
114 | |
115 r.id=prefix+left+"_"+right+comment; | |
116 final SamLine sl=r.samline; | |
117 if(sl!=null){ | |
118 sl.qname=r.id; | |
119 if(sl.optional!=null){ | |
120 for(int i=0; i<sl.optional.size(); i++){ | |
121 String s=sl.optional.get(i); | |
122 if(s.startsWith("qe:i:")){ | |
123 s="qe:i:"+right; | |
124 sl.optional.set(i, s); | |
125 }else if(s.startsWith("qs:i:")){ | |
126 s="qs:i:"+left; | |
127 sl.optional.set(i, s); | |
128 } | |
129 } | |
130 } | |
131 } | |
132 } | |
133 } | |
134 } | |
135 | |
136 public void setDiscarded(boolean b){ | |
137 for(Read r : this){ | |
138 r.setDiscarded(b); | |
139 } | |
140 } | |
141 | |
142 public int[] lengths() { | |
143 final int size=size(); | |
144 int[] array=new int[size]; | |
145 for(int i=0; i<size; i++){ | |
146 Read r=get(i); | |
147 array[i]=r==null ? -1 : r.length(); | |
148 } | |
149 return array; | |
150 } | |
151 | |
152 public float estimatePasses(){ | |
153 final int size=size(); | |
154 if(size<1){return 0;} | |
155 else if(size==1){return 0.25f;} | |
156 else if(size==2){return 0.5f;} | |
157 | |
158 int median=medianLength(true); | |
159 int first=first().length(); | |
160 int last=last().length(); | |
161 | |
162 return size-2+estimatePasses(first, median)+estimatePasses(last, median); | |
163 } | |
164 | |
165 private float estimatePasses(int len, int median){ | |
166 float ratio=len/(float)median; | |
167 //TODO: I want this to be more asymptotic | |
168 return Tools.min(0.99f, ratio/(1+0.05f*ratio)); | |
169 } | |
170 | |
171 public boolean discarded() { | |
172 for(Read r : this){ | |
173 if(!r.discarded()){return false;} | |
174 } | |
175 return true; | |
176 } | |
177 | |
178 /** | |
179 * Identifier assigned by streamer, not by PacBio. | |
180 * First identifier is 0, then 1, etc. | |
181 */ | |
182 public long id; | |
183 | |
184 /** | |
185 * ZMW ID assigned by PacBio. | |
186 */ | |
187 private int zid=-1; | |
188 | |
189 public Read first(){return get(0);} | |
190 public Read last(){return get(size()-1);} | |
191 | |
192 } |