Source code for genopandas.core.tree

"""Tree-related functions/classes."""

import collections
import itertools
import operator

# pylint: disable=unused-import
from intervaltree import IntervalTree, Interval

# pylint: enable=unused-import


[docs]class GenomicIntervalTree(collections.MutableMapping): """Datastructure for efficiently accessing genomic objects by position.""" def __init__(self, *args, **kwargs): self._trees = dict() self.update(dict(*args, **kwargs)) def __getitem__(self, key): return self._trees[key] def __setitem__(self, key, value): self._trees[key] = value def __delitem__(self, key): del self._trees[key] def __iter__(self): return iter(self._trees) def __len__(self): return len(self._trees) def __repr__(self): args = ', '.join('{!r}: {}'.format(k, v) for k, v in self.items()) return 'GenomicIntervalTree(**{' + args + '})' # pylint: disable=too-many-arguments
[docs] def search(self, chromosome, start, end=None, strict_left=False, strict_right=False): """Searches the tree for objects within given range.""" overlap = self[chromosome].search(start, end) if strict_left: overlap = (int_ for int_ in overlap if int_.begin >= start) if strict_right: overlap = (int_ for int_ in overlap if int_.end <= end)
return set(overlap) # pylint: enable=too-many-arguments
[docs] def is_empty(self): """Returns True if tree is empty.""" if len(self) == 0: return True
return all(tree.isempty() for tree in self._trees.values())
[docs] @classmethod def from_tuples(cls, tuples): """Builds an instance from tuples. Assumes tuples are sorted by chromosome. """ # Group by chromosome. tuples = sorted(tuples, key=operator.itemgetter(0)) groups = itertools.groupby(tuples, key=operator.itemgetter(0)) # Build trees. trees = {} for chrom, group in groups: trees[chrom] = IntervalTree.from_tuples(tup[1:] for tup in group)
return cls(trees)
[docs] def intersection(self, other): """Returns a new tree of all intervals common to both self and other.""" shared = set(self.keys()) & set(other.keys()) return self.__class__( {k: self[k].intersection(other[k])
for k in shared})
[docs] def union(self, other): """Returns a new tree, comprising all intervals from self and other.""" merged = {chrom: tree.copy() for chrom, tree in self._trees.items()} for chrom, tree in other.items(): if chrom in merged: merged[chrom] = merged[chrom].union(tree) else: merged[chrom] = tree.copy()
return self.__class__(merged)
[docs] def difference(self, other): """Returns a new tree, comprising all intervals in self but not in other.""" diff = {} for chrom, tree in self.items(): if chrom in other: diff[chrom] = tree.difference(other[chrom]) else: diff[chrom] = tree.copy()
return self.__class__(diff) def __or__(self, other): return self.union(other) def __and__(self, other): return self.intersection(other) def __sub__(self, other):
return self.difference(other)