jpayne@68
|
1 package icecream;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4
|
jpayne@68
|
5 import shared.Tools;
|
jpayne@68
|
6 import stream.Read;
|
jpayne@68
|
7 import stream.SamLine;
|
jpayne@68
|
8 import structures.IntList;
|
jpayne@68
|
9
|
jpayne@68
|
10 /**
|
jpayne@68
|
11 * Container for the list of reads from a single
|
jpayne@68
|
12 * PacBio ZMW.
|
jpayne@68
|
13 * @author Brian Bushnell
|
jpayne@68
|
14 * @date June 5, 2020
|
jpayne@68
|
15 */
|
jpayne@68
|
16 public class ZMW extends ArrayList<Read> {
|
jpayne@68
|
17
|
jpayne@68
|
18 /**
|
jpayne@68
|
19 * For serialization.
|
jpayne@68
|
20 */
|
jpayne@68
|
21 private static final long serialVersionUID = -2580124131008824113L;
|
jpayne@68
|
22
|
jpayne@68
|
23 public ZMW(){super();}
|
jpayne@68
|
24
|
jpayne@68
|
25 public ZMW(int initialSize){super(initialSize);}
|
jpayne@68
|
26
|
jpayne@68
|
27 public long countBases(){
|
jpayne@68
|
28 long x=0;
|
jpayne@68
|
29 for(Read r : this){
|
jpayne@68
|
30 x+=r.length();
|
jpayne@68
|
31 }
|
jpayne@68
|
32 return x;
|
jpayne@68
|
33 }
|
jpayne@68
|
34
|
jpayne@68
|
35 public int medianLength(boolean includeDiscarded){
|
jpayne@68
|
36 if(size()<3){return -1;}
|
jpayne@68
|
37 IntList lengths=new IntList(size()-2);
|
jpayne@68
|
38
|
jpayne@68
|
39 for(int i=1; i<size()-1; i++){
|
jpayne@68
|
40 Read r=get(i);
|
jpayne@68
|
41 if(includeDiscarded || !r.discarded()){
|
jpayne@68
|
42 lengths.add(get(i).length());
|
jpayne@68
|
43 }
|
jpayne@68
|
44 }
|
jpayne@68
|
45 lengths.sort();
|
jpayne@68
|
46 int median=lengths.get(lengths.size/2);
|
jpayne@68
|
47 return median;
|
jpayne@68
|
48 }
|
jpayne@68
|
49
|
jpayne@68
|
50 public int longestLength(boolean includeDiscarded){
|
jpayne@68
|
51 int max=0;
|
jpayne@68
|
52 for(Read r : this){
|
jpayne@68
|
53 if(includeDiscarded || !r.discarded()){
|
jpayne@68
|
54 max=Tools.max(max, r.length());
|
jpayne@68
|
55 }
|
jpayne@68
|
56 }
|
jpayne@68
|
57 return max;
|
jpayne@68
|
58 }
|
jpayne@68
|
59
|
jpayne@68
|
60 public Read medianRead(boolean includeDiscarded){
|
jpayne@68
|
61 int len=medianLength(includeDiscarded);
|
jpayne@68
|
62 if(len<0){return longestRead(includeDiscarded);}
|
jpayne@68
|
63 for(int i=1; i<size()-1; i++){
|
jpayne@68
|
64 Read r=get(i);
|
jpayne@68
|
65 if((includeDiscarded || !r.discarded()) && r.length()==len){
|
jpayne@68
|
66 return r;
|
jpayne@68
|
67 }
|
jpayne@68
|
68 }
|
jpayne@68
|
69 return null;
|
jpayne@68
|
70 }
|
jpayne@68
|
71
|
jpayne@68
|
72 public Read longestRead(boolean includeDiscarded){
|
jpayne@68
|
73 Read max=null;
|
jpayne@68
|
74 for(Read r : this){
|
jpayne@68
|
75 if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;}
|
jpayne@68
|
76 }
|
jpayne@68
|
77 return max;
|
jpayne@68
|
78 }
|
jpayne@68
|
79
|
jpayne@68
|
80 public int zid(){
|
jpayne@68
|
81 if(zid==-1){parseZID();}
|
jpayne@68
|
82 return zid;
|
jpayne@68
|
83 }
|
jpayne@68
|
84
|
jpayne@68
|
85 private int parseZID(){
|
jpayne@68
|
86 return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id));
|
jpayne@68
|
87 }
|
jpayne@68
|
88
|
jpayne@68
|
89 public static void fixReadHeader(Read r, int leftTrim, int rightTrim){
|
jpayne@68
|
90 leftTrim=Tools.max(0, leftTrim);
|
jpayne@68
|
91 rightTrim=Tools.max(0, rightTrim);
|
jpayne@68
|
92 if(leftTrim<1 && rightTrim<1){return;}
|
jpayne@68
|
93 final int idx=r.id.lastIndexOf('/');
|
jpayne@68
|
94 if(idx>0 && idx<r.id.length()-3){
|
jpayne@68
|
95 String prefix=r.id.substring(0, idx+1);
|
jpayne@68
|
96 String suffix=r.id.substring(idx+1);
|
jpayne@68
|
97 if(suffix.indexOf('_')>0){
|
jpayne@68
|
98 String coords=suffix, comment="";
|
jpayne@68
|
99 int tab=suffix.indexOf('\t');
|
jpayne@68
|
100 if(tab<0){tab=suffix.indexOf(' ');}
|
jpayne@68
|
101 if(tab>0){
|
jpayne@68
|
102 coords=coords.substring(0, tab);
|
jpayne@68
|
103 comment=coords.substring(tab);
|
jpayne@68
|
104 }
|
jpayne@68
|
105 String[] split=Tools.underscorePattern.split(coords);
|
jpayne@68
|
106 int left=Integer.parseInt(split[0]);
|
jpayne@68
|
107 int right=Integer.parseInt(split[1]);
|
jpayne@68
|
108 left+=leftTrim;
|
jpayne@68
|
109 right-=rightTrim;
|
jpayne@68
|
110 if(left>right){left=right;}
|
jpayne@68
|
111
|
jpayne@68
|
112 if(right-left!=r.length()){right=left+r.length();}
|
jpayne@68
|
113 // System.err.println(r.length()+", "+(right-left));
|
jpayne@68
|
114
|
jpayne@68
|
115 r.id=prefix+left+"_"+right+comment;
|
jpayne@68
|
116 final SamLine sl=r.samline;
|
jpayne@68
|
117 if(sl!=null){
|
jpayne@68
|
118 sl.qname=r.id;
|
jpayne@68
|
119 if(sl.optional!=null){
|
jpayne@68
|
120 for(int i=0; i<sl.optional.size(); i++){
|
jpayne@68
|
121 String s=sl.optional.get(i);
|
jpayne@68
|
122 if(s.startsWith("qe:i:")){
|
jpayne@68
|
123 s="qe:i:"+right;
|
jpayne@68
|
124 sl.optional.set(i, s);
|
jpayne@68
|
125 }else if(s.startsWith("qs:i:")){
|
jpayne@68
|
126 s="qs:i:"+left;
|
jpayne@68
|
127 sl.optional.set(i, s);
|
jpayne@68
|
128 }
|
jpayne@68
|
129 }
|
jpayne@68
|
130 }
|
jpayne@68
|
131 }
|
jpayne@68
|
132 }
|
jpayne@68
|
133 }
|
jpayne@68
|
134 }
|
jpayne@68
|
135
|
jpayne@68
|
136 public void setDiscarded(boolean b){
|
jpayne@68
|
137 for(Read r : this){
|
jpayne@68
|
138 r.setDiscarded(b);
|
jpayne@68
|
139 }
|
jpayne@68
|
140 }
|
jpayne@68
|
141
|
jpayne@68
|
142 public int[] lengths() {
|
jpayne@68
|
143 final int size=size();
|
jpayne@68
|
144 int[] array=new int[size];
|
jpayne@68
|
145 for(int i=0; i<size; i++){
|
jpayne@68
|
146 Read r=get(i);
|
jpayne@68
|
147 array[i]=r==null ? -1 : r.length();
|
jpayne@68
|
148 }
|
jpayne@68
|
149 return array;
|
jpayne@68
|
150 }
|
jpayne@68
|
151
|
jpayne@68
|
152 public float estimatePasses(){
|
jpayne@68
|
153 final int size=size();
|
jpayne@68
|
154 if(size<1){return 0;}
|
jpayne@68
|
155 else if(size==1){return 0.25f;}
|
jpayne@68
|
156 else if(size==2){return 0.5f;}
|
jpayne@68
|
157
|
jpayne@68
|
158 int median=medianLength(true);
|
jpayne@68
|
159 int first=first().length();
|
jpayne@68
|
160 int last=last().length();
|
jpayne@68
|
161
|
jpayne@68
|
162 return size-2+estimatePasses(first, median)+estimatePasses(last, median);
|
jpayne@68
|
163 }
|
jpayne@68
|
164
|
jpayne@68
|
165 private float estimatePasses(int len, int median){
|
jpayne@68
|
166 float ratio=len/(float)median;
|
jpayne@68
|
167 //TODO: I want this to be more asymptotic
|
jpayne@68
|
168 return Tools.min(0.99f, ratio/(1+0.05f*ratio));
|
jpayne@68
|
169 }
|
jpayne@68
|
170
|
jpayne@68
|
171 public boolean discarded() {
|
jpayne@68
|
172 for(Read r : this){
|
jpayne@68
|
173 if(!r.discarded()){return false;}
|
jpayne@68
|
174 }
|
jpayne@68
|
175 return true;
|
jpayne@68
|
176 }
|
jpayne@68
|
177
|
jpayne@68
|
178 /**
|
jpayne@68
|
179 * Identifier assigned by streamer, not by PacBio.
|
jpayne@68
|
180 * First identifier is 0, then 1, etc.
|
jpayne@68
|
181 */
|
jpayne@68
|
182 public long id;
|
jpayne@68
|
183
|
jpayne@68
|
184 /**
|
jpayne@68
|
185 * ZMW ID assigned by PacBio.
|
jpayne@68
|
186 */
|
jpayne@68
|
187 private int zid=-1;
|
jpayne@68
|
188
|
jpayne@68
|
189 public Read first(){return get(0);}
|
jpayne@68
|
190 public Read last(){return get(size()-1);}
|
jpayne@68
|
191
|
jpayne@68
|
192 }
|