comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pybedtools/helpers.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 import sys
2 import os
3 import gzip
4 import tempfile
5 import subprocess
6 import glob
7 import struct
8 import atexit
9 import re
10 import urllib
11 import urllib.error
12 import urllib.request
13
14 try: # Use genomepy to determine chrom sizes if it is installed
15 import genomepy
16 except ImportError:
17 pass
18
19 from . import settings
20 from . import filenames
21 from . import genome_registry
22 from .logger import logger
23 from .cbedtools import create_interval_from_list
24
25 BUFSIZE = -1
26
27 _tags = {}
28
29
30 def set_bedtools_path(path=""):
31 """
32 Explicitly set path to `BEDTools` installation dir.
33
34 If BEDTools is not available on your system path, specify the path to the
35 dir containing the BEDTools executables (intersectBed, subtractBed, etc)
36 with this function.
37
38 To reset and use the default system path, call this function with no
39 arguments or use path="".
40 """
41 from . import paths
42
43 paths._set_bedtools_path(path)
44
45
46 def get_bedtools_path():
47 """
48 Returns the currently-set path to bedtools
49 """
50 from . import paths
51
52 return paths._get_bedtools_path()
53
54
55 def set_R_path(path=""):
56 """
57 Explicitly set path to `R` installation dir.
58
59 If R is not available on the path, then it can be explicitly
60 specified here.
61
62 Use path="" to reset to default system path.
63 """
64 from . import paths
65
66 paths._set_R_path(path)
67
68
69 def _check_for_bedtools(force_check=False, verbose=False, override=None):
70 """
71 Checks installation as well as version (based on whether or not "bedtools
72 intersect" works, or just "intersectBed")
73 """
74 if settings._bedtools_installed and not force_check:
75 return True
76
77 if (len(settings.bedtools_version) == 0) or force_check:
78 try:
79 v = (
80 subprocess.check_output(
81 [os.path.join(settings._bedtools_path, "bedtools"), "--version"]
82 )
83 .decode("utf-8")
84 .rstrip()
85 )
86
87 if verbose:
88 print("Found bedtools version '%s'" % v)
89
90 settings._bedtools_installed = True
91
92 # Override, used for testing
93 if override is not None:
94 v = override
95
96 # To allow more complicated versions as found in Linux distributions, e.g.:
97 # bedtools v2.26.0
98 # bedtools debian/2.28.0+dfsg-2-dirty
99 m = re.search("^bedtools [^0-9]*([0-9][0-9.]*)", v)
100 if not m:
101 raise ValueError('Cannot identify version number from "{}"'.format(v))
102 vv = m.group(1)
103
104 settings.bedtools_version = [int(i) for i in vv.split(".")]
105
106 settings._v_2_27_plus = (
107 settings.bedtools_version[0] >= 2 and settings.bedtools_version[1] >= 27
108 )
109
110 settings._v_2_15_plus = (
111 settings.bedtools_version[0] >= 2 and settings.bedtools_version[1] >= 15
112 )
113
114 except subprocess.CalledProcessError:
115 if settings._bedtools_path:
116 add_msg = "(tried path '%s')" % settings._bedtools_path
117 else:
118 add_msg = ""
119 raise OSError(
120 "Please make sure you have installed BEDTools"
121 "(https://github.com/arq5x/bedtools) and that "
122 "it's on the path. %s" % add_msg
123 )
124
125
126 def _check_for_R():
127 try:
128 p = subprocess.Popen(
129 [os.path.join(settings._R_path, "R"), "--version"],
130 stdout=subprocess.PIPE,
131 stderr=subprocess.PIPE,
132 )
133 settings._R_installed = True
134 except OSError:
135 if settings._R_path:
136 add_msg = "(tried path '%s')" % settings._R_path
137 else:
138 add_msg = ""
139 raise ValueError("Please install R and ensure it is on your path %s" % add_msg)
140
141
142 class Error(Exception):
143 """Base class for this module's exceptions"""
144
145 pass
146
147
148 class pybedtoolsError(Error):
149 pass
150
151
152 class BEDToolsError(Error):
153 def __init__(self, cmd, msg):
154 self.cmd = str(cmd)
155 self.msg = str(msg)
156
157 def __str__(self):
158 m = (
159 "\nCommand was:\n\n\t"
160 + self.cmd
161 + "\n"
162 + "\nError message was:\n"
163 + self.msg
164 )
165 return m
166
167
168 def isGZIP(fn):
169 with open(fn, "rb") as f:
170 start = f.read(3)
171 if start == b"\x1f\x8b\x08":
172 return True
173 return False
174
175
176 def isBGZIP(fn):
177 """
178 Reads a filename to see if it's a BGZIPed file or not.
179 """
180 with open(fn, "rb") as fh:
181 header_str = fh.read(15)
182
183 if len(header_str) < 15:
184 return False
185
186 header = struct.unpack_from("BBBBiBBHBBB", header_str)
187
188 id1, id2, cm, flg, mtime, xfl, os_, xlen, si1, si2, slen = header
189 if (
190 (id1 == 31)
191 and (id2 == 139)
192 and (cm == 8)
193 and (flg == 4)
194 and (si1 == 66)
195 and (si2 == 67)
196 and (slen == 2)
197 ):
198 return True
199 return False
200
201
202 def isBAM(fn):
203 """
204 Returns True if the file is both BGZIPed and the compressed contents have
205 start with the magic number `BAM\\x01`, or if the file is CRAM format (see
206 isCRAM()).
207 """
208 # Note: previously we were catching ValueError when trying to open
209 # a non-BAM with pysam.Samfile. That started segfaulting, so now do it the
210 # right way with magic number.
211 with gzip.open(fn, "rb") as in_:
212 if isBGZIP(fn) and (in_.read(4).decode() == "BAM\x01"):
213 return True
214 if isCRAM(fn):
215 return True
216
217
218 def isCRAM(fn):
219 """
220 Returns True if the file starts with the bytes for the characters "CRAM".
221 """
222 with open(fn, "rb") as in_:
223 if in_.read(4).decode(errors="ignore") == "CRAM":
224 return True
225
226
227 def find_tagged(tag):
228 """
229 Returns the bedtool object with tagged with *tag*. Useful for tracking
230 down bedtools you made previously.
231 """
232 for key, item in _tags.items():
233 try:
234 if item._tag == tag:
235 return item
236 except AttributeError:
237 pass
238 raise ValueError('tag "%s" not found' % tag)
239
240
241 def _flatten_list(x):
242 nested = True
243 while nested:
244 check_again = False
245 flattened = []
246
247 for element in x:
248 if isinstance(element, list):
249 flattened.extend(element)
250 check_again = True
251 else:
252 flattened.append(element)
253 nested = check_again
254 x = flattened[:]
255 return x
256
257
258 def set_tempdir(tempdir):
259 """
260 Set the directory for temp files.
261
262 Useful for clusters that use a /scratch partition rather than a /tmp dir.
263 Convenience function to simply set tempfile.tempdir.
264 """
265 if not os.path.exists(tempdir):
266 errstr = "The tempdir you specified, %s, does not exist" % tempdir
267 raise FileNotFoundError(errstr)
268 tempfile.tempdir = tempdir
269
270
271 def get_tempdir():
272 """
273 Gets the current tempdir for the module.
274 """
275 return tempfile.gettempdir()
276
277
278 def cleanup(verbose=False, remove_all=False):
279 """
280 Deletes all temp files from the current session (or optionally *all* \
281 sessions)
282
283 If *verbose*, reports what it's doing
284
285 If *remove_all*, then ALL files matching "pybedtools.*.tmp" in the temp dir
286 will be deleted.
287 """
288 if settings.KEEP_TEMPFILES:
289 return
290 for fn in filenames.TEMPFILES:
291 if verbose:
292 print("removing", fn)
293 if os.path.exists(fn):
294 os.unlink(fn)
295 if remove_all:
296 fns = glob.glob(os.path.join(get_tempdir(), "pybedtools.*.tmp"))
297 for fn in fns:
298 os.unlink(fn)
299
300
301 def _version_2_15_plus_names(prog_name):
302 if not settings._bedtools_installed:
303 _check_for_bedtools()
304 if not settings._v_2_15_plus:
305 return [prog_name]
306 try:
307 prog_name = settings._prog_names[prog_name]
308 except KeyError:
309 if prog_name in settings._new_names:
310 pass
311 raise BEDToolsError(prog_name, prog_name + "not a recognized BEDTools program")
312 return [os.path.join(settings._bedtools_path, "bedtools"), prog_name]
313
314
315 def call_bedtools(
316 cmds,
317 tmpfn=None,
318 stdin=None,
319 check_stderr=None,
320 decode_output=True,
321 encode_input=True,
322 ):
323 """
324 Use subprocess.Popen to call BEDTools and catch any errors.
325
326 Output goes to *tmpfn*, or, if None, output stays in subprocess.PIPE and
327 can be iterated over.
328
329 *stdin* is an optional file-like object that will be sent to
330 subprocess.Popen.
331
332 Prints some useful help upon getting common errors.
333
334 *check_stderr* is a function that takes the stderr string as input and
335 returns True if it's OK (that is, it's not really an error). This is
336 needed, e.g., for calling fastaFromBed which will report that it has to
337 make a .fai for a fasta file.
338
339 *decode_output* should be set to False when you are iterating over a BAM
340 file, where the data represent binary rather than text data.
341 """
342 input_is_stream = stdin is not None
343 output_is_stream = tmpfn is None
344
345 _orig_cmds = cmds[:]
346 cmds = []
347 cmds.extend(_version_2_15_plus_names(_orig_cmds[0]))
348 cmds.extend(_orig_cmds[1:])
349
350 try:
351 # coming from an iterator, sending as iterator
352 if input_is_stream and output_is_stream:
353 logger.debug(
354 "helpers.call_bedtools(): input is stream, output is " "stream"
355 )
356 logger.debug("helpers.call_bedtools(): cmds=%s", " ".join(cmds))
357 p = subprocess.Popen(
358 cmds,
359 stdout=subprocess.PIPE,
360 stderr=subprocess.PIPE,
361 stdin=subprocess.PIPE,
362 bufsize=BUFSIZE,
363 )
364 if encode_input:
365 for line in stdin:
366 p.stdin.write(line.encode())
367 else:
368 for line in stdin:
369 p.stdin.write(line)
370
371 # This is important to prevent deadlocks
372 p.stdin.close()
373
374 if decode_output:
375 output = (i.decode("UTF-8") for i in p.stdout)
376 else:
377 output = (i for i in p.stdout)
378
379 stderr = None
380
381 # coming from an iterator, writing to file
382 if input_is_stream and not output_is_stream:
383 logger.debug("helpers.call_bedtools(): input is stream, output is file")
384 logger.debug("helpers.call_bedtools(): cmds=%s", " ".join(cmds))
385 outfile = open(tmpfn, "wb")
386 p = subprocess.Popen(
387 cmds,
388 stdout=outfile,
389 stderr=subprocess.PIPE,
390 stdin=subprocess.PIPE,
391 bufsize=BUFSIZE,
392 )
393 if hasattr(stdin, "read"):
394 stdout, stderr = p.communicate(stdin.read())
395 else:
396 for item in stdin:
397 p.stdin.write(item.encode())
398 stdout, stderr = p.communicate()
399 output = tmpfn
400 outfile.close()
401
402 # coming from a file, sending as iterator
403 if not input_is_stream and output_is_stream:
404 logger.debug(
405 "helpers.call_bedtools(): input is filename, " "output is stream"
406 )
407 logger.debug("helpers.call_bedtools(): cmds=%s", " ".join(cmds))
408 p = subprocess.Popen(
409 cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=BUFSIZE
410 )
411 if decode_output:
412 output = (i.decode("UTF-8") for i in p.stdout)
413 else:
414 output = (i for i in p.stdout)
415 stderr = None
416
417 # file-to-file
418 if not input_is_stream and not output_is_stream:
419 logger.debug(
420 "helpers.call_bedtools(): input is filename, output "
421 "is filename (%s)",
422 tmpfn,
423 )
424 cmds = list(map(str, cmds))
425 logger.debug("helpers.call_bedtools(): cmds=%s", " ".join(cmds))
426 outfile = open(tmpfn, "wb")
427 p = subprocess.Popen(
428 cmds, stdout=outfile, stderr=subprocess.PIPE, bufsize=BUFSIZE
429 )
430 stdout, stderr = p.communicate()
431 output = tmpfn
432 outfile.close()
433
434 # Check if it's OK using a provided function to check stderr. If it's
435 # OK, dump it to sys.stderr so it's printed, and reset it to None so we
436 # don't raise an exception
437 if check_stderr is not None:
438 if isinstance(stderr, bytes):
439 stderr = stderr.decode("UTF_8")
440 if check_stderr(stderr):
441 sys.stderr.write(stderr)
442 stderr = None
443
444 if stderr:
445 # Fix for issue #147. In general, we consider warnings to not be
446 # fatal, so just show 'em and continue on.
447 #
448 # bedtools source has several different ways of showing a warning,
449 # but they seem to all have "WARNING" in the first 20 or so
450 # characters
451 if isinstance(stderr, bytes):
452 stderr = stderr.decode("UTF_8")
453 if len(stderr) > 20 and "WARNING" in stderr[:20].upper():
454 sys.stderr.write(stderr)
455 else:
456 raise BEDToolsError(subprocess.list2cmdline(cmds), stderr)
457
458 except (OSError, IOError) as err:
459 print("%s: %s" % (type(err), os.strerror(err.errno)))
460 print("The command was:\n\n\t%s\n" % subprocess.list2cmdline(cmds))
461
462 problems = {
463 2: (
464 "* Did you spell the command correctly?",
465 "* Do you have BEDTools installed and on the path?",
466 ),
467 13: (
468 "* Do you have permission to write "
469 'to the output file ("%s")?' % tmpfn,
470 ),
471 24: (
472 "* Too many files open -- please submit "
473 "a bug report so that this can be fixed",
474 ),
475 32: (
476 "* Broken pipe -- if you passed a BedTool object "
477 "that was created using a generator function, "
478 "please try saving it to disk first using the "
479 ".saveas() method before calling this bedtools "
480 "command. See issue #49 for more.",
481 ),
482 }
483
484 print("Things to check:")
485 print("\n\t" + "\n\t".join(problems[err.errno]))
486 raise OSError("See above for commands that gave the error")
487
488 return output
489
490
491 def _check_sequence_stderr(x):
492 """
493 If stderr created by fastaFromBed starts with 'index file', then don't
494 consider it an error.
495 """
496 if isinstance(x, bytes):
497 x = x.decode("UTF-8")
498 if x.startswith("index file"):
499 return True
500 if x.startswith("WARNING"):
501 return True
502 return False
503
504
505 def _call_randomintersect(
506 _self,
507 other,
508 iterations,
509 intersect_kwargs,
510 shuffle_kwargs,
511 report_iterations,
512 debug,
513 _orig_processes,
514 ):
515 """
516 Helper function that list-ifies the output from randomintersection, s.t.
517 it can be pickled across a multiprocess Pool.
518 """
519 return list(
520 _self.randomintersection(
521 other,
522 iterations,
523 intersect_kwargs=intersect_kwargs,
524 shuffle_kwargs=shuffle_kwargs,
525 report_iterations=report_iterations,
526 debug=False,
527 processes=None,
528 _orig_processes=_orig_processes,
529 )
530 )
531
532
533 def close_or_delete(*args):
534 """
535 Single function that can be used to get rid of a BedTool, whether it's a
536 streaming or file-based version.
537 """
538 for x in args:
539 if isinstance(x.fn, str):
540 os.unlink(x.fn)
541 elif hasattr(x.fn, "close"):
542 x.fn.close()
543 if hasattr(x.fn, "throw"):
544 x.fn.throw(StopIteration)
545
546
547 def n_open_fds():
548 pid = os.getpid()
549 procs = subprocess.check_output(["lsof", "-w", "-Ff", "-p", str(pid)])
550 nprocs = 0
551 for i in procs.splitlines():
552 if i[1:].isdigit() and i[0] == "f":
553 nprocs += 1
554 return nprocs
555
556
557 import re
558
559 coord_re = re.compile(
560 r"""
561 (?P<chrom>.+):
562 (?P<start>\d+)-
563 (?P<stop>\d+)
564 (?:\[(?P<strand>.)\])?""",
565 re.VERBOSE,
566 )
567
568
569 def string_to_interval(s):
570 """
571 Convert string of the form "chrom:start-stop" or "chrom:start-stop[strand]"
572 to an interval.
573
574 Assumes zero-based coords.
575
576 If it's already an interval, then return it as-is.
577 """
578 if isinstance(s, str):
579 m = coord_re.search(s)
580 if m.group("strand"):
581 return create_interval_from_list(
582 [
583 m.group("chrom"),
584 m.group("start"),
585 m.group("stop"),
586 ".",
587 "0",
588 m.group("strand"),
589 ]
590 )
591 else:
592 return create_interval_from_list(
593 [m.group("chrom"), m.group("start"), m.group("stop")]
594 )
595 return s
596
597
598 class FisherOutput(object):
599 def __init__(self, s, **kwargs):
600 """
601 fisher returns text results like::
602
603 # Contingency Table
604 #_________________________________________
605 # | not in -b | in -b |
606 # not in -a | 3137160615 | 503 |
607 # in -a | 100 | 46 |
608 #_________________________________________
609 # p-values for fisher's exact test
610 left right two-tail ratio
611 1.00000 0.00000 0.00000 2868973.922
612
613 """
614 if isinstance(s, str):
615 s = open(s).read()
616 if hasattr(s, "next"):
617 s = "".join(i for i in s)
618 table = {
619 "not in -a": {"not in -b": None, "in -b": None},
620 "in -a": {"not in -b": None, "in -b": None},
621 }
622 self.text = s
623 lines = s.splitlines()
624 for i in lines:
625 if "not in -a" in i:
626 _, in_b, not_in_b, _ = i.strip().split("|")
627 table["not in -a"]["not in -b"] = int(not_in_b)
628 table["not in -a"]["in -b"] = int(in_b)
629
630 if " in -a" in i:
631 _, in_b, not_in_b, _ = i.strip().split("|")
632 table["in -a"]["not in -b"] = int(not_in_b)
633 table["in -a"]["in -b"] = int(in_b)
634 self.table = table
635 left, right, two_tail, ratio = lines[-1].split()
636 self.left_tail = float(left)
637 self.right_tail = float(right)
638 self.two_tail = float(two_tail)
639 self.ratio = float(ratio)
640
641 def __str__(self):
642 return self.text
643
644 def __repr__(self):
645 return "<%s at %s>\n%s" % (self.__class__.__name__, id(self), self.text)
646
647
648 class SplitOutput(object):
649 def __init__(self, output, **kwargs):
650 """
651 Handles output from bedtools split, which sends a report of files to
652 stdout. This class parses that list into something more convenient to
653 use within pybedtools.
654
655 Most useful is probably the .bedtools attribute, which is a list of
656 BedTool objects.
657 """
658 from .bedtool import BedTool
659
660 if isinstance(output, str):
661 output = open(output).read()
662 if hasattr(output, "next"):
663 output = "".join(i for i in output)
664
665 #: store a copy of the output
666 self.text = output
667
668 #: BedTool objects created from output
669 self.bedtools = []
670
671 #: Filenames that were created from the split
672 self.files = []
673
674 #: number of bases in each file
675 self.nbases = []
676
677 #: number of features in each file
678 self.counts = []
679
680 for line in output.splitlines():
681 toks = line.split()
682 self.files.append(toks[0])
683 self.nbases.append(int(toks[1]))
684 self.counts.append(int(toks[2]))
685 self.bedtools.append(BedTool(toks[0]))
686
687
688 def internet_on(timeout=1):
689 try:
690 response = urllib.request.urlopen("http://genome.ucsc.edu", timeout=timeout)
691 return True
692 except urllib.error.URLError as err:
693 pass
694 return False
695
696
697 def get_chromsizes_from_ucsc(
698 genome,
699 saveas=None,
700 mysql="mysql",
701 fetchchromsizes="fetchChromSizes",
702 timeout=None,
703 host_url="genome-mysql.cse.ucsc.edu",
704 ):
705 """
706 Download chrom size info for *genome* from UCSC and returns the dictionary.
707
708 Parameters
709 ----------
710
711 genome : str
712 Name of the genome assembly (e.g., "hg38")
713
714 saveas : str
715 Filename to save output to. Dictionary will still be returned.
716
717 mysql, fetchchromsizes : str
718 Paths to MySQL and fetchChromSizes.
719
720 timeout : float
721 How long to wait for a response; mostly used for testing.
722
723 host_url : str
724 URL of UCSC mirror MySQL server.
725 """
726 if not internet_on(timeout=timeout):
727 raise ValueError(
728 "It appears you don't have an internet connection "
729 "-- unable to get chromsizes from UCSC"
730 )
731 cmds = [
732 mysql,
733 "--user=genome",
734 "--host=" + host_url,
735 "-A",
736 "-e",
737 "select chrom, size from %s.chromInfo" % genome,
738 ]
739 failures = []
740 d = {}
741 try:
742 p = subprocess.Popen(
743 cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=BUFSIZE
744 )
745 stdout, stderr = p.communicate()
746 if stderr:
747 print(stderr)
748 print("Commands were:\n")
749 print((subprocess.list2cmdline(cmds)))
750
751 lines = stdout.splitlines()[1:]
752 for line in lines:
753 if isinstance(line, bytes):
754 line = line.decode("UTF-8")
755 chrom, size = line.split()
756 d[chrom] = (0, int(size))
757
758 if saveas is not None:
759 chromsizes_to_file(d, saveas)
760
761 except OSError as err:
762 if err.errno == 2:
763 failures.append("Can't find mysql at path {0}".format(mysql))
764 else:
765 raise
766 try:
767 cmds = [fetchchromsizes, genome]
768 p = subprocess.Popen(
769 cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=BUFSIZE
770 )
771 stdout, stderr = p.communicate()
772 if stderr:
773 if "INFO: trying WGET" not in str(stderr):
774 print(stderr)
775 print("Commands were:\n")
776 print((subprocess.list2cmdline(cmds)))
777
778 lines = stdout.splitlines()
779 for line in lines:
780 if isinstance(line, bytes):
781 line = line.decode("UTF-8")
782 chrom, size = line.split()
783 d[chrom] = (0, int(size))
784
785 if saveas is not None:
786 chromsizes_to_file(d, saveas)
787
788 except OSError as err:
789 if err.errno == 2:
790 failures.append("Can't find path to fetchChromsizes")
791
792 if not d:
793 raise OSError(failures)
794 return d
795
796
797 def chromsizes_to_file(chrom_sizes, fn=None):
798 """
799 Converts a *chromsizes* dictionary to a file. If *fn* is None, then a
800 tempfile is created (which can be deleted with pybedtools.cleanup()).
801
802 Returns the filename.
803 """
804 if fn is None:
805 tmpfn = tempfile.NamedTemporaryFile(
806 prefix="pybedtools.", suffix=".tmp", delete=False
807 )
808 tmpfn = tmpfn.name
809 filenames.TEMPFILES.append(tmpfn)
810 fn = tmpfn
811 if isinstance(chrom_sizes, str):
812 chrom_sizes = chromsizes(chrom_sizes)
813 fout = open(fn, "wt")
814 for chrom, bounds in chrom_sizes.items():
815 line = chrom + "\t" + str(bounds[1]) + "\n"
816 fout.write(line)
817 fout.close()
818 return fn
819
820
821 def get_chromsizes_from_genomepy(
822 genome, saveas=None,
823 ):
824 """
825 Get chrom size info for *genome* from genomepy, if genomepy is installed.
826
827 Parameters
828 ----------
829
830 genome : str
831 Name of the genome assembly (e.g., "hg38")
832
833 saveas : str
834 Filename to save output to. Dictionary will still be returned.
835 """
836 if "genomepy" not in sys.modules:
837 return None
838
839 d = {}
840 try:
841 g = genomepy.Genome(genome)
842 # Fail silently if the sizes file cannot be accessed
843 if not hasattr(g, "sizes_file"):
844 return None
845 for line in open(g.sizes_file):
846 chrom, size = line.split()
847 d[chrom] = (0, int(size))
848
849 if saveas is not None:
850 chromsizes_to_file(d, saveas)
851 except FileNotFoundError:
852 return None
853
854 return d
855
856
857 def chromsizes(genome):
858 """
859 Looks for a *genome* already included in the genome registry; if not found
860 it first tries to look it up via genomepy. If genomepy is not installed, or
861 if this lookup fails then it looks it up on UCSC. Returns the dictionary of
862 chromsize tuples where each tuple has (start,stop).
863
864 Chromsizes are described as (start, stop) tuples to allow randomization
865 within specified regions; e. g., you can make a chromsizes dictionary that
866 represents the extent of a tiling array.
867
868 Example usage:
869
870 >>> dm3_chromsizes = chromsizes('dm3')
871 >>> for i in sorted(dm3_chromsizes.items()):
872 ... print(i)
873 ('chr2L', (0, 23011544))
874 ('chr2LHet', (0, 368872))
875 ('chr2R', (0, 21146708))
876 ('chr2RHet', (0, 3288761))
877 ('chr3L', (0, 24543557))
878 ('chr3LHet', (0, 2555491))
879 ('chr3R', (0, 27905053))
880 ('chr3RHet', (0, 2517507))
881 ('chr4', (0, 1351857))
882 ('chrM', (0, 19517))
883 ('chrU', (0, 10049037))
884 ('chrUextra', (0, 29004656))
885 ('chrX', (0, 22422827))
886 ('chrXHet', (0, 204112))
887 ('chrYHet', (0, 347038))
888
889
890 """
891 try:
892 return getattr(genome_registry, genome)
893 except AttributeError:
894 chromsizes = get_chromsizes_from_genomepy(genome)
895 if chromsizes is None:
896 return get_chromsizes_from_ucsc(genome)
897 else:
898 return chromsizes
899
900
901 def get_includes():
902 """
903 Returns a list of include directories with BEDTools headers
904 """
905 dirname = os.path.abspath(os.path.join(os.path.dirname(__file__)))
906 return [dirname, os.path.join(dirname, "include")]
907
908
909 atexit.register(cleanup)