csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pybedtools/featurefuncs.pyx annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pybedtools/featurefuncs.pyx @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 16:23:26 -0400
parents
children

rev	line source
jpayne@68	1 # cython: language_level=2
jpayne@68	2 # distutils: language = c++
jpayne@68	3 from cbedtools cimport Interval
jpayne@68	4 from cbedtools import create_interval_from_list
jpayne@68	5
jpayne@68	6
jpayne@68	7 cpdef extend_fields(Interval feature, int n):
jpayne@68	8 """
jpayne@68	9 Pads the fields of the feature with "." to a total length of `n` fields,
jpayne@68	10 """
jpayne@68	11 fields = feature.fields[:]
jpayne@68	12 while len(fields) < n:
jpayne@68	13 fields.append('.')
jpayne@68	14 i = create_interval_from_list(fields)
jpayne@68	15
jpayne@68	16 if n > 4 and (i[4] == '.'):
jpayne@68	17 i[4] = '0'
jpayne@68	18 if n > 6 and (i[6] == '.'):
jpayne@68	19 i[6] = str(i.start)
jpayne@68	20 if n > 7 and (i[7] == '.'):
jpayne@68	21 i[7] = str(i.stop)
jpayne@68	22 if n > 8 and (i[8] == '.'):
jpayne@68	23 i[8] = '0,0,0'
jpayne@68	24 return i
jpayne@68	25
jpayne@68	26
jpayne@68	27
jpayne@68	28 cpdef center(Interval feature, int width=100):
jpayne@68	29 """
jpayne@68	30 Return the width bp from the center of a feature. If a feature is
jpayne@68	31 smaller than width, then return the entire feature.
jpayne@68	32 """
jpayne@68	33 if len(feature) < width:
jpayne@68	34 return feature
jpayne@68	35 cdef int start = feature.start
jpayne@68	36 cdef int stop = feature.stop
jpayne@68	37 cdef int center = start + (stop - start) / 2
jpayne@68	38 halfwidth = width / 2
jpayne@68	39 feature.start = center - halfwidth
jpayne@68	40 if feature.start < 1:
jpayne@68	41 feature.start = 1
jpayne@68	42 if halfwidth == 0:
jpayne@68	43 halfwidth = 1
jpayne@68	44 feature.stop = center + halfwidth
jpayne@68	45 return feature
jpayne@68	46
jpayne@68	47
jpayne@68	48 cpdef midpoint(Interval feature):
jpayne@68	49 """
jpayne@68	50 Specialized version of `center()` that just returns the single-bp midpoint
jpayne@68	51 """
jpayne@68	52 start = feature.start + (feature.stop - feature.start) / 2
jpayne@68	53 stop = start + 1
jpayne@68	54 feature.start = start
jpayne@68	55 feature.stop = stop
jpayne@68	56 return feature
jpayne@68	57
jpayne@68	58
jpayne@68	59 cpdef greater_than(Interval feature, int size=100):
jpayne@68	60 """
jpayne@68	61 Return True if feature length > size
jpayne@68	62 """
jpayne@68	63 return len(feature) > size
jpayne@68	64
jpayne@68	65
jpayne@68	66 cpdef less_than(Interval feature, int size=100):
jpayne@68	67 """
jpayne@68	68 Return True if feature length < size
jpayne@68	69 """
jpayne@68	70 return len(feature) < size
jpayne@68	71
jpayne@68	72
jpayne@68	73 cpdef normalized_to_length(Interval feature, int idx=4, float scalar=0.001):
jpayne@68	74 """
jpayne@68	75 Normalizes the value at feature[idx] to the feature's length, in kb.
jpayne@68	76
jpayne@68	77 idx, by default, is the score field for a BED file, but specify any
jpayne@68	78 integer.
jpayne@68	79
jpayne@68	80 The value at idx will be replaced with its scaled value.
jpayne@68	81
jpayne@68	82 scalar will be multiplied by the value at idx, by default this is
jpayne@68	83 0.001, or per kb.
jpayne@68	84
jpayne@68	85 Useful for calculating RPKM after running intersect with counts
jpayne@68	86 """
jpayne@68	87 feature[idx] = str(float(feature[idx]) * scalar / len(feature))
jpayne@68	88 return feature
jpayne@68	89
jpayne@68	90
jpayne@68	91 cpdef rename(Interval feature, str name):
jpayne@68	92 """
jpayne@68	93 Forces a rename of all features, e.g., for renaming everything in a file
jpayne@68	94 'exon'
jpayne@68	95 """
jpayne@68	96 feature.name = name
jpayne@68	97 return feature
jpayne@68	98
jpayne@68	99
jpayne@68	100 cpdef bedgraph_scale(Interval feature, float scalar):
jpayne@68	101 feature[3] = str(float(feature[3]) * scalar)
jpayne@68	102 return feature
jpayne@68	103
jpayne@68	104
jpayne@68	105 cpdef TSS(Interval feature, int upstream=500, int downstream=500, add_to_name=None, genome=None):
jpayne@68	106 """
jpayne@68	107 Alias for five_prime.
jpayne@68	108 """
jpayne@68	109 return star_prime(feature, upstream, downstream, prime=5,
jpayne@68	110 add_to_name=add_to_name, genome=genome)
jpayne@68	111
jpayne@68	112
jpayne@68	113 cdef star_prime(Interval feature, int upstream=500, int downstream=500, int prime=5,
jpayne@68	114 add_to_name=None, genome=None):
jpayne@68	115
jpayne@68	116 if prime == 5:
jpayne@68	117 if feature.strand == '-':
jpayne@68	118 start = feature.stop - downstream
jpayne@68	119 stop = feature.stop + upstream
jpayne@68	120 else:
jpayne@68	121 start = feature.start - upstream
jpayne@68	122 stop = feature.start + downstream
jpayne@68	123 elif prime == 3:
jpayne@68	124 if feature.strand == '-':
jpayne@68	125 start = feature.start - downstream
jpayne@68	126 stop = feature.start + upstream
jpayne@68	127 else:
jpayne@68	128 start = feature.stop - upstream
jpayne@68	129 stop = feature.stop + downstream
jpayne@68	130 if add_to_name:
jpayne@68	131 try:
jpayne@68	132 feature.name += add_to_name
jpayne@68	133 except AttributeError:
jpayne@68	134 pass
jpayne@68	135 if genome is not None:
jpayne@68	136 gstart, gstop = genome[feature.chrom]
jpayne@68	137 stop = min(stop, gstop)
jpayne@68	138 start = max(start, gstart)
jpayne@68	139 if start < 0:
jpayne@68	140 start = 0
jpayne@68	141 if start > stop:
jpayne@68	142 start = stop
jpayne@68	143 feature.start = start
jpayne@68	144 feature.stop = stop
jpayne@68	145 return feature
jpayne@68	146
jpayne@68	147 cpdef five_prime(Interval feature, int upstream=500, int downstream=500,
jpayne@68	148 add_to_name=None, genome=None):
jpayne@68	149 """
jpayne@68	150 Returns the 5'-most coordinate, plus `upstream` and `downstream` bp; adds
jpayne@68	151 the string `add_to_name` to the feature's name if provided (e.g., "_TSS")
jpayne@68	152
jpayne@68	153 Parameters
jpayne@68	154 ----------
jpayne@68	155 feature : pybedtools.Interval instance
jpayne@68	156
jpayne@68	157 upstream, downstream : int
jpayne@68	158 Number of bp upstream or downstream of the strand-specific start
jpayne@68	159 position of the feature to include. Default is 500 for both upstream
jpayne@68	160 and downstream so that the returned feature is 1kb centered on the 5'
jpayne@68	161 end of the feature. Unstranded features (where strand=".") are treated
jpayne@68	162 as plus-strand features.
jpayne@68	163
jpayne@68	164 add_to_name : str or None
jpayne@68	165 If not None, append the string suffix to the name field of the feature (for
jpayne@68	166 example "_TSS").
jpayne@68	167
jpayne@68	168 genome : dict or None
jpayne@68	169 If not None, then ensure that the start/stop positions are within the
jpayne@68	170 boundaries of the chromosome.
jpayne@68	171 """
jpayne@68	172 return star_prime(feature, upstream, downstream, prime=5,
jpayne@68	173 add_to_name=add_to_name, genome=genome)
jpayne@68	174
jpayne@68	175
jpayne@68	176 cpdef three_prime(Interval feature, int upstream=500, int downstream=500,
jpayne@68	177 add_to_name=None, genome=None):
jpayne@68	178 """
jpayne@68	179 Returns the 3'-most coordinate, plus `upstream` and `downstream` bp; adds
jpayne@68	180 the string `add_to_name` to the feature's name if provided (e.g.,
jpayne@68	181 "_polyA_site")
jpayne@68	182
jpayne@68	183 Parameters
jpayne@68	184 ----------
jpayne@68	185 feature : pybedtools.Interval instance
jpayne@68	186
jpayne@68	187 upstream, downstrea : int
jpayne@68	188 Number of bp upstream or downstream of the strand-specific stop
jpayne@68	189 position of the feature to include. Default is 500 for both upstream
jpayne@68	190 and downstream so that the returned feature is 1kb centered on the 5'
jpayne@68	191 end of the feature. Unstranded features (where strand=".") are treated
jpayne@68	192 as plus-strand features.
jpayne@68	193
jpayne@68	194 add_to_name : str or None
jpayne@68	195 If not None, append the string suffix to the name field of the feature (for
jpayne@68	196 example "_TSS").
jpayne@68	197
jpayne@68	198 genome : dict or None
jpayne@68	199 If not None, then ensure that the start/stop positions are within the
jpayne@68	200 boundaries of the chromosome.
jpayne@68	201
jpayne@68	202
jpayne@68	203 """
jpayne@68	204 return star_prime(feature, upstream, downstream, prime=3,
jpayne@68	205 add_to_name=add_to_name, genome=genome)
jpayne@68	206
jpayne@68	207 cpdef add_color(Interval feature, cmap, norm):
jpayne@68	208 """
jpayne@68	209 Signature:
jpayne@68	210
jpayne@68	211 add_color(feature, cmap, norm)
jpayne@68	212
jpayne@68	213 Given the matplotlib colormap `cmap` and the matplotlib Normalize instance
jpayne@68	214 `norm`, return a new 9-field feature (extended out if needed) with the RGB
jpayne@68	215 tuple set according to the score.
jpayne@68	216 """
jpayne@68	217 if len(feature.fields) < 9:
jpayne@68	218 feature = extend_fields(feature, 9)
jpayne@68	219 feature[6] = str(feature.start)
jpayne@68	220 feature[7] = str(feature.stop)
jpayne@68	221
jpayne@68	222 rgb_float = cmap(norm(float(feature.score)))
jpayne@68	223 feature[8] = ','.join([str(int(i * 255)) for i in rgb_float[:3]])
jpayne@68	224 return feature
jpayne@68	225
jpayne@68	226
jpayne@68	227 cpdef gff2bed(Interval feature, name_field=None):
jpayne@68	228 """
jpayne@68	229 Signature:
jpayne@68	230
jpayne@68	231 gff2bed(feature, name_field=None)
jpayne@68	232
jpayne@68	233 Converts a GFF feature into a BED6 feature. By default, the name of the
jpayne@68	234 new BED will be feature.name, but if `name_field` is provided then the name
jpayne@68	235 of the new BED will be feature.attrs[name_field].
jpayne@68	236
jpayne@68	237 `name_field` can also be an integer to index into the fields of the object,
jpayne@68	238 so if you want the BED name to be the GFF featuretype, then you can use
jpayne@68	239 `name_field=2`.
jpayne@68	240
jpayne@68	241 If the specified field does not exist, then "." will be used for the name.
jpayne@68	242 """
jpayne@68	243 if name_field is None:
jpayne@68	244 name = feature.name
jpayne@68	245 else:
jpayne@68	246 try:
jpayne@68	247 if isinstance(name_field, basestring):
jpayne@68	248 name = feature.attrs[name_field]
jpayne@68	249 if isinstance(name_field, int):
jpayne@68	250 name = feature[name_field]
jpayne@68	251 except (NameError, KeyError):
jpayne@68	252 name = "."
jpayne@68	253 return create_interval_from_list([
jpayne@68	254 str(feature.chrom),
jpayne@68	255 str(feature.start),
jpayne@68	256 str(feature.stop),
jpayne@68	257 name,
jpayne@68	258 feature.score,
jpayne@68	259 feature.strand])
jpayne@68	260
jpayne@68	261
jpayne@68	262 cpdef bed2gff(Interval feature):
jpayne@68	263 """
jpayne@68	264 Signature:
jpayne@68	265
jpayne@68	266 bed2gff(feature)
jpayne@68	267
jpayne@68	268 Converts a BED feature (BED3 through BED12) into a GFF format.
jpayne@68	269
jpayne@68	270 Chrom, start, stop, score, and strand are put directly into the
jpayne@68	271 corresponding GFF fields. Other BED fields are put into the GFF attributes
jpayne@68	272 field, named according to the UCSC BED format definition.
jpayne@68	273
jpayne@68	274 If there are more than 12 BED fields, the additional fields will be added
jpayne@68	275 to the GFF attributes using the 0-based index (so starting at "12") as the
jpayne@68	276 key.
jpayne@68	277
jpayne@68	278 GFF fields that do not have a direct mapping to BED format (feature type,
jpayne@68	279 source, phase) are set to ".".
jpayne@68	280
jpayne@68	281 1 bp is added to the start position to finish the conversion to GFF.
jpayne@68	282 """
jpayne@68	283
jpayne@68	284 # Note that Interval.score, .strand, and .name have a default of ".", so no
jpayne@68	285 # need to do the extra try/except IndexError for those fields.
jpayne@68	286 mapping = (
jpayne@68	287 (6, "thickStart"),
jpayne@68	288 (7, "thickEnd"),
jpayne@68	289 (8, "itemRgb"),
jpayne@68	290 (9, "blockCount"),
jpayne@68	291 (10, "blockSizes"),
jpayne@68	292 (11, "blockStarts")
jpayne@68	293 )
jpayne@68	294
jpayne@68	295 # Add any standard BED fields we might have
jpayne@68	296 attributes = ['Name="%s"' % feature.name]
jpayne@68	297 for k, v in mapping:
jpayne@68	298 try:
jpayne@68	299 attributes.append('%s="%s"' % (v, feature.fields[k]))
jpayne@68	300 except IndexError:
jpayne@68	301 break
jpayne@68	302
jpayne@68	303 # Add any additional fields, keyed by their index
jpayne@68	304 if len(feature.fields) > 12:
jpayne@68	305 for i in range(12, len(feature.fields)):
jpayne@68	306 attributes.append('%s="%s"' % (i, feature.fields[i]))
jpayne@68	307
jpayne@68	308 attributes = '; '.join(attributes) + ';'
jpayne@68	309
jpayne@68	310 return create_interval_from_list([
jpayne@68	311 str(feature.chrom),
jpayne@68	312 '.',
jpayne@68	313 '.',
jpayne@68	314 str(feature.start + 1),
jpayne@68	315 str(feature.stop),
jpayne@68	316 feature.score,
jpayne@68	317 feature.strand,
jpayne@68	318 '.',
jpayne@68	319 attributes])
jpayne@68	320
jpayne@68	321
jpayne@68	322 class UniqueID(object):
jpayne@68	323 def __init__(self, pattern="%d", first=0):
jpayne@68	324 """
jpayne@68	325 Class to help create uniquely-named features.
jpayne@68	326
jpayne@68	327 Example usage:
jpayne@68	328
jpayne@68	329 >>> a = pybedtools.example_bedtool('a.bed')
jpayne@68	330 >>> uid = UniqueID("f_%d")
jpayne@68	331 >>> print(a.each(uid)) # doctest: +NORMALIZE_WHITESPACE
jpayne@68	332 chr1 1 100 f_0 0 +
jpayne@68	333 chr1 100 200 f_1 0 +
jpayne@68	334 chr1 150 500 f_2 0 -
jpayne@68	335 chr1 900 950 f_3 0 +
jpayne@68	336
jpayne@68	337 Parameters
jpayne@68	338 ----------
jpayne@68	339 pattern : str
jpayne@68	340
jpayne@68	341 Pattern will be filled in using `% self.count`
jpayne@68	342
jpayne@68	343 first : int
jpayne@68	344 `self.count` will be initialzed to this value.
jpayne@68	345
jpayne@68	346 """
jpayne@68	347 self.pattern = pattern
jpayne@68	348 self.first = first
jpayne@68	349 self.count = first
jpayne@68	350
jpayne@68	351 def __call__(self, feature):
jpayne@68	352 feature.name = self.pattern % self.count
jpayne@68	353 self.count += 1
jpayne@68	354 return feature
jpayne@68	355

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pybedtools/featurefuncs.pyx @ 68:5028fdace37b