Mercurial > repos > jpayne > bioproject_to_srr_2
comparison idna/intranges.py @ 7:5eb2d5e3bf22
planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author | jpayne |
---|---|
date | Sun, 05 May 2024 23:32:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:b2745907b1eb | 7:5eb2d5e3bf22 |
---|---|
1 """ | |
2 Given a list of integers, made up of (hopefully) a small number of long runs | |
3 of consecutive integers, compute a representation of the form | |
4 ((start1, end1), (start2, end2) ...). Then answer the question "was x present | |
5 in the original list?" in time O(log(# runs)). | |
6 """ | |
7 | |
8 import bisect | |
9 from typing import List, Tuple | |
10 | |
11 def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: | |
12 """Represent a list of integers as a sequence of ranges: | |
13 ((start_0, end_0), (start_1, end_1), ...), such that the original | |
14 integers are exactly those x such that start_i <= x < end_i for some i. | |
15 | |
16 Ranges are encoded as single integers (start << 32 | end), not as tuples. | |
17 """ | |
18 | |
19 sorted_list = sorted(list_) | |
20 ranges = [] | |
21 last_write = -1 | |
22 for i in range(len(sorted_list)): | |
23 if i+1 < len(sorted_list): | |
24 if sorted_list[i] == sorted_list[i+1]-1: | |
25 continue | |
26 current_range = sorted_list[last_write+1:i+1] | |
27 ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) | |
28 last_write = i | |
29 | |
30 return tuple(ranges) | |
31 | |
32 def _encode_range(start: int, end: int) -> int: | |
33 return (start << 32) | end | |
34 | |
35 def _decode_range(r: int) -> Tuple[int, int]: | |
36 return (r >> 32), (r & ((1 << 32) - 1)) | |
37 | |
38 | |
39 def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool: | |
40 """Determine if `int_` falls into one of the ranges in `ranges`.""" | |
41 tuple_ = _encode_range(int_, 0) | |
42 pos = bisect.bisect_left(ranges, tuple_) | |
43 # we could be immediately ahead of a tuple (start, end) | |
44 # with start < int_ <= end | |
45 if pos > 0: | |
46 left, right = _decode_range(ranges[pos-1]) | |
47 if left <= int_ < right: | |
48 return True | |
49 # or we could be immediately behind a tuple (int_, end) | |
50 if pos < len(ranges): | |
51 left, _ = _decode_range(ranges[pos]) | |
52 if left == int_: | |
53 return True | |
54 return False |