Source code for deepcpg.data.stats

"""Functions for computing statistic about binary CpG matrix.

CpG matrix x assumed to have shape
    * [sites, cells] for per CpG statistics
    * [sites, cells, context] for window-based statistics
"""

from __future__ import division
from __future__ import print_function

import numpy as np

from ..utils import EPS, get_from_module


[docs]def mean(x): """Mean methylation rate.""" if x.ndim > 2: x = x.mean(axis=2) return np.mean(x, 1)
[docs]def mode(x): """Mode of methylation rate.""" if x.ndim > 2: x = x.mean(axis=2) return x.mean(axis=1).round().astype(np.int8)
[docs]def var(x, *args, **kwargs): """Variance between cells.""" if x.ndim > 2: x = x.mean(axis=2) return x.var(axis=1)
[docs]def cat_var(x, nb_bin=3, *args, **kwargs): """Categorical variance between cells. Discretizes variance from :func:`var` into `nb_bin` equally-spaced bins. """ v = var(x, *args, **kwargs) bins = np.linspace(-EPS, 0.25, nb_bin + 1) cv = np.digitize(v, bins, right=True) - 1 return np.ma.masked_array(cv, v.mask)
[docs]def cat2_var(*args, **kwargs): """Binary variance between cells.""" cv = cat_var(*args, **kwargs) cv[cv > 0] = 1 return cv
[docs]def entropy(x): """Entropy of single CpG sites between cells.""" if x.ndim > 2: x = x.mean(axis=2) p1 = x.mean(axis=1) p1 = np.minimum(1 - EPS, np.maximum(EPS, p1)) p0 = 1 - p1 return -(p1 * np.log(p1) + p0 * np.log(p0))
[docs]def diff(x): """Test if CpG site is differentially methylated.""" if x.ndim > 2: x = x.mean(axis=2) return x.min(axis=1) != x.max(axis=1).astype(np.int8)
[docs]def get(name): """Return object from module by its name.""" return get_from_module(name, globals())