comparison idna/intranges.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Sun, 05 May 2024 23:32:17 -0400
parents
children
comparison
equal deleted inserted replaced
6:b2745907b1eb 7:5eb2d5e3bf22
1 """
2 Given a list of integers, made up of (hopefully) a small number of long runs
3 of consecutive integers, compute a representation of the form
4 ((start1, end1), (start2, end2) ...). Then answer the question "was x present
5 in the original list?" in time O(log(# runs)).
6 """
7
8 import bisect
9 from typing import List, Tuple
10
11 def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
12 """Represent a list of integers as a sequence of ranges:
13 ((start_0, end_0), (start_1, end_1), ...), such that the original
14 integers are exactly those x such that start_i <= x < end_i for some i.
15
16 Ranges are encoded as single integers (start << 32 | end), not as tuples.
17 """
18
19 sorted_list = sorted(list_)
20 ranges = []
21 last_write = -1
22 for i in range(len(sorted_list)):
23 if i+1 < len(sorted_list):
24 if sorted_list[i] == sorted_list[i+1]-1:
25 continue
26 current_range = sorted_list[last_write+1:i+1]
27 ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
28 last_write = i
29
30 return tuple(ranges)
31
32 def _encode_range(start: int, end: int) -> int:
33 return (start << 32) | end
34
35 def _decode_range(r: int) -> Tuple[int, int]:
36 return (r >> 32), (r & ((1 << 32) - 1))
37
38
39 def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
40 """Determine if `int_` falls into one of the ranges in `ranges`."""
41 tuple_ = _encode_range(int_, 0)
42 pos = bisect.bisect_left(ranges, tuple_)
43 # we could be immediately ahead of a tuple (start, end)
44 # with start < int_ <= end
45 if pos > 0:
46 left, right = _decode_range(ranges[pos-1])
47 if left <= int_ < right:
48 return True
49 # or we could be immediately behind a tuple (int_, end)
50 if pos < len(ranges):
51 left, _ = _decode_range(ranges[pos])
52 if left == int_:
53 return True
54 return False