diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index fe3db7dd3..2f94df77a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -36,7 +36,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest - pip install taichi-nightly -i https://pypi.taichi.graphics/simple/ if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi pip uninstall brainpy -y python setup.py install @@ -103,7 +102,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest - pip install taichi-nightly -i https://pypi.taichi.graphics/simple/ if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi pip uninstall brainpy -y python setup.py install diff --git a/brainpy/__init__.py b/brainpy/__init__.py index c52358720..c8f834c6d 100644 --- a/brainpy/__init__.py +++ b/brainpy/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__version__ = "2.4.6.post4" +__version__ = "2.4.6.post5" # fundamental supporting modules from brainpy import errors, check, tools diff --git a/brainpy/_src/dnn/conv.py b/brainpy/_src/dnn/conv.py index 75b6373c5..deead1f3b 100644 --- a/brainpy/_src/dnn/conv.py +++ b/brainpy/_src/dnn/conv.py @@ -4,10 +4,10 @@ from jax import lax -from brainpy import math as bm, tools, check +from brainpy import math as bm, tools +from brainpy._src.dnn.base import Layer from brainpy._src.initialize import Initializer, XavierNormal, ZeroInit, parameter from brainpy.types import ArrayType -from brainpy._src.dnn.base import Layer __all__ = [ 'Conv1d', 'Conv2d', 'Conv3d', @@ -488,9 +488,7 @@ def __init__( mode: bm.Mode = None, name: str = None, ): - super(_GeneralConvTranspose, self).__init__(name=name, mode=mode) - - assert self.mode.is_parent_of(bm.TrainingMode, bm.BatchingMode, bm.NonBatchingMode) + super().__init__(name=name, mode=mode) self.num_spatial_dims = num_spatial_dims self.in_channels = in_channels @@ -586,22 +584,17 @@ def __init__( """Initializes the module. Args: - output_channels: Number of output channels. - kernel_shape: The shape of the kernel. Either an integer or a sequence of + in_channels: Number of input channels. + out_channels: Number of output channels. + kernel_size: The shape of the kernel. Either an integer or a sequence of length 1. stride: Optional stride for the kernel. Either an integer or a sequence of length 1. Defaults to 1. - output_shape: Output shape of the spatial dimensions of a transpose - convolution. Can be either an integer or an iterable of integers. If a - `None` value is given, a default shape is automatically calculated. padding: Optional padding algorithm. Either ``VALID`` or ``SAME``. Defaults to ``SAME``. See: https://www.tensorflow.org/xla/operation_semantics#conv_convolution. - with_bias: Whether to add a bias. By default, true. - w_init: Optional weight initialization. By default, truncated normal. - b_init: Optional bias initialization. By default, zeros. - data_format: The data format of the input. Either ``NWC`` or ``NCW``. By - default, ``NWC``. + w_initializer: Optional weight initialization. By default, truncated normal. + b_initializer: Optional bias initialization. By default, zeros. mask: Optional mask of the weights. name: The name of the module. """ @@ -648,6 +641,7 @@ def __init__( """Initializes the module. Args: + in_channels: Number of input channels. out_channels: Number of output channels. kernel_size: The shape of the kernel. Either an integer or a sequence of length 2. @@ -704,22 +698,17 @@ def __init__( """Initializes the module. Args: - output_channels: Number of output channels. - kernel_shape: The shape of the kernel. Either an integer or a sequence of + in_channels: Number of input channels. + out_channels: Number of output channels. + kernel_size: The shape of the kernel. Either an integer or a sequence of length 3. stride: Optional stride for the kernel. Either an integer or a sequence of length 3. Defaults to 1. - output_shape: Output shape of the spatial dimensions of a transpose - convolution. Can be either an integer or an iterable of integers. If a - `None` value is given, a default shape is automatically calculated. padding: Optional padding algorithm. Either ``VALID`` or ``SAME``. Defaults to ``SAME``. See: https://www.tensorflow.org/xla/operation_semantics#conv_convolution. - with_bias: Whether to add a bias. By default, true. - w_init: Optional weight initialization. By default, truncated normal. - b_init: Optional bias initialization. By default, zeros. - data_format: The data format of the input. Either ``NDHWC`` or ``NCDHW``. - By default, ``NDHWC``. + w_initializer: Optional weight initialization. By default, truncated normal. + b_initializer: Optional bias initialization. By default, zeros. mask: Optional mask of the weights. name: The name of the module. """ diff --git a/brainpy/_src/initialize/random_inits.py b/brainpy/_src/initialize/random_inits.py index 893ed06b1..871b8129e 100644 --- a/brainpy/_src/initialize/random_inits.py +++ b/brainpy/_src/initialize/random_inits.py @@ -227,7 +227,7 @@ def __call__(self, shape, dtype=None): variance = (self.scale / denominator).astype(dtype) if self.distribution == "truncated_normal": stddev = (jnp.sqrt(variance) / .87962566103423978).astype(dtype) - res = self.rng.truncated_normal(-2, 2, shape, dtype) * stddev + res = self.rng.truncated_normal(-2, 2, shape).astype(dtype) * stddev elif self.distribution == "normal": res = self.rng.randn(*shape) * jnp.sqrt(variance).astype(dtype) elif self.distribution == "uniform": diff --git a/brainpy/_src/math/random.py b/brainpy/_src/math/random.py index 84d65740a..b5366999d 100644 --- a/brainpy/_src/math/random.py +++ b/brainpy/_src/math/random.py @@ -9,11 +9,11 @@ import jax import numpy as np from jax import lax, jit, vmap, numpy as jnp, random as jr, core, dtypes +from jax._src.array import ArrayImpl from jax.experimental.host_callback import call from jax.tree_util import register_pytree_node_class -from jax._src.array import ArrayImpl -from brainpy.check import jit_error +from brainpy.check import jit_error_checking, jit_error_checking_no_args from .compat_numpy import shape from .environment import get_int from .ndarray import Array, _return @@ -60,7 +60,7 @@ def _size2shape(size): elif isinstance(size, (tuple, list)): return tuple(size) else: - return (size, ) + return (size,) def _check_shape(name, shape, *param_shapes): @@ -791,32 +791,64 @@ def uniform(self, low=0.0, high=1.0, size=None, key=None): r = jr.uniform(key, shape=_size2shape(size), minval=low, maxval=high) return _return(r) - def truncated_normal(self, lower, upper, size=None, scale=None, key=None): - lower = _as_jax_array(lower) - lower = _check_py_seq(lower) - upper = _as_jax_array(upper) - upper = _check_py_seq(upper) - scale = _as_jax_array(scale) - scale = _check_py_seq(scale) + def __norm_cdf(self, x, sqrt2, dtype): + # Computes standard normal cumulative distribution function + return (np.asarray(1., dtype) + lax.erf(x / sqrt2)) / np.asarray(2., dtype) + + def truncated_normal(self, lower, upper, size=None, loc=0., scale=1., dtype=float, key=None): + lower = _check_py_seq(_as_jax_array(lower)) + upper = _check_py_seq(_as_jax_array(upper)) + loc = _check_py_seq(_as_jax_array(loc)) + scale = _check_py_seq(_as_jax_array(scale)) + + lower = lax.convert_element_type(lower, dtype) + upper = lax.convert_element_type(upper, dtype) + loc = lax.convert_element_type(loc, dtype) + scale = lax.convert_element_type(scale, dtype) + + jit_error_checking_no_args( + jnp.any(jnp.logical_or(loc < lower - 2 * scale, loc > upper + 2 * scale)), + ValueError("mean is more than 2 std from [lower, upper] in truncated_normal. " + "The distribution of values may be incorrect.") + ) + if size is None: size = lax.broadcast_shapes(jnp.shape(lower), jnp.shape(upper), + jnp.shape(loc), jnp.shape(scale)) + + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + sqrt2 = np.array(np.sqrt(2), dtype) + l = self.__norm_cdf((lower - loc) / scale, sqrt2, dtype) + u = self.__norm_cdf((upper - loc) / scale, sqrt2, dtype) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. key = self.split_key() if key is None else _formalize_key(key) - rands = jr.truncated_normal(key, - lower=lower, - upper=upper, - shape=_size2shape(size)) - if scale is not None: - rands = rands * scale - return _return(rands) + out = jr.uniform(key, size, dtype, minval=2 * l - 1, maxval=2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + out = lax.erf_inv(out) + + # Transform to proper mean, std + out = out * scale * sqrt2 + loc + + # Clamp to ensure it's in the proper range + out = jnp.clip(out, + lax.nextafter(lax.stop_gradient(lower), np.array(np.inf, dtype=dtype)), + lax.nextafter(lax.stop_gradient(upper), np.array(-np.inf, dtype=dtype))) + return _return(out) def _check_p(self, p): raise ValueError(f'Parameter p should be within [0, 1], but we got {p}') def bernoulli(self, p, size=None, key=None): p = _check_py_seq(_as_jax_array(p)) - jit_error(jnp.any(jnp.logical_and(p < 0, p > 1)), self._check_p, p) + jit_error_checking(jnp.any(jnp.logical_and(p < 0, p > 1)), self._check_p, p) if size is None: size = jnp.shape(p) key = self.split_key() if key is None else _formalize_key(key) @@ -838,7 +870,7 @@ def lognormal(self, mean=None, sigma=None, size=None, key=None): def binomial(self, n, p, size=None, key=None): n = _check_py_seq(n.value if isinstance(n, Array) else n) p = _check_py_seq(p.value if isinstance(p, Array) else p) - jit_error(jnp.any(jnp.logical_and(p < 0, p > 1)), self._check_p, p) + jit_error_checking(jnp.any(jnp.logical_and(p < 0, p > 1)), self._check_p, p) if size is None: size = jnp.broadcast_shapes(jnp.shape(n), jnp.shape(p)) key = self.split_key() if key is None else _formalize_key(key) @@ -882,7 +914,7 @@ def multinomial(self, n, pvals, size=None, key=None): key = self.split_key() if key is None else _formalize_key(key) n = _check_py_seq(_as_jax_array(n)) pvals = _check_py_seq(_as_jax_array(pvals)) - jit_error(jnp.sum(pvals[:-1]) > 1., self._check_p2, pvals) + jit_error_checking(jnp.sum(pvals[:-1]) > 1., self._check_p2, pvals) if isinstance(n, jax.core.Tracer): raise ValueError("The total count parameter `n` should not be a jax abstract array.") size = _size2shape(size) @@ -1248,6 +1280,9 @@ def randint_like(self, input, low=0, high=None, *, dtype=None, key=None): def split_key(): + """Create a new seed from the current seed. + + This function is useful for the consistency with JAX's random paradigm.""" return DEFAULT.split_key() @@ -1554,7 +1589,7 @@ def randn(*dn, key=None): def random(size=None, key=None): - """ + r""" Return random floats in the half-open interval [0.0, 1.0). Alias for `random_sample` to ease forward-porting to the new random API. """ @@ -1613,9 +1648,9 @@ def random_sample(size=None, key=None): def ranf(size=None, key=None): - """ + r""" This is an alias of `random_sample`. See `random_sample` for the complete - documentation. + documentation. """ return DEFAULT.ranf(size, key=key) @@ -1623,7 +1658,7 @@ def ranf(size=None, key=None): def sample(size=None, key=None): """ This is an alias of `random_sample`. See `random_sample` for the complete - documentation. + documentation. """ return DEFAULT.sample(size, key=key) @@ -1840,285 +1875,2787 @@ def beta(a, b, size=None, key=None): return DEFAULT.beta(a, b, size=size, key=key) -# @wraps(np.random.exponential) def exponential(scale=None, size=None, key=None): - return DEFAULT.exponential(scale, size, key=key) - - -# @wraps(np.random.gamma) -def gamma(shape, scale=None, size=None, key=None): - return DEFAULT.gamma(shape, scale, size=size, key=key) + r""" + Draw samples from an exponential distribution. + Its probability density function is -# @wraps(np.random.gumbel) -def gumbel(loc=None, scale=None, size=None, key=None): - return DEFAULT.gumbel(loc, scale, size=size, key=key) + .. math:: f(x; \frac{1}{\beta}) = \frac{1}{\beta} \exp(-\frac{x}{\beta}), + for ``x > 0`` and 0 elsewhere. :math:`\beta` is the scale parameter, + which is the inverse of the rate parameter :math:`\lambda = 1/\beta`. + The rate parameter is an alternative, widely used parameterization + of the exponential distribution [3]_. -# @wraps(np.random.laplace) -def laplace(loc=None, scale=None, size=None, key=None): - return DEFAULT.laplace(loc, scale, size, key=key) + The exponential distribution is a continuous analogue of the + geometric distribution. It describes many common situations, such as + the size of raindrops measured over many rainstorms [1]_, or the time + between page requests to Wikipedia [2]_. + .. note:: + New code should use the `~numpy.random.Generator.exponential` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. -# @wraps(np.random.logistic) -def logistic(loc=None, scale=None, size=None, key=None): - return DEFAULT.logistic(loc, scale, size, key=key) + Parameters + ---------- + scale : float or array_like of floats + The scale parameter, :math:`\beta = 1/\lambda`. Must be + non-negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``scale`` is a scalar. Otherwise, + ``np.array(scale).size`` samples are drawn. + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized exponential distribution. -# @wraps(np.random.normal) -def normal(loc=None, scale=None, size=None, key=None): - return DEFAULT.normal(loc, scale, size, key=key) + See Also + -------- + random.Generator.exponential: which should be used for new code. + References + ---------- + .. [1] Peyton Z. Peebles Jr., "Probability, Random Variables and + Random Signal Principles", 4th ed, 2001, p. 57. + .. [2] Wikipedia, "Poisson process", + https://en.wikipedia.org/wiki/Poisson_process + .. [3] Wikipedia, "Exponential distribution", + https://en.wikipedia.org/wiki/Exponential_distribution + """ + return DEFAULT.exponential(scale, size, key=key) -# @wraps(np.random.pareto) -def pareto(a, size=None, key=None): - return DEFAULT.pareto(a, size, key=key) +def gamma(shape, scale=None, size=None, key=None): + r""" + Draw samples from a Gamma distribution. -# @wraps(np.random.poisson) -def poisson(lam=1.0, size=None, key=None): - return DEFAULT.poisson(lam, size, key=key) + Samples are drawn from a Gamma distribution with specified parameters, + `shape` (sometimes designated "k") and `scale` (sometimes designated + "theta"), where both parameters are > 0. + .. note:: + New code should use the `~numpy.random.Generator.gamma` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. -# @wraps(np.random.standard_cauchy) -def standard_cauchy(size=None, key=None): - return DEFAULT.standard_cauchy(size, key=key) + Parameters + ---------- + shape : float or array_like of floats + The shape of the gamma distribution. Must be non-negative. + scale : float or array_like of floats, optional + The scale of the gamma distribution. Must be non-negative. + Default is equal to 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``shape`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(shape, scale).size`` samples are drawn. + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized gamma distribution. -# @wraps(np.random.standard_exponential) -def standard_exponential(size=None, key=None): - return DEFAULT.standard_exponential(size, key=key) + See Also + -------- + scipy.stats.gamma : probability density function, distribution or + cumulative density function, etc. + random.Generator.gamma: which should be used for new code. + Notes + ----- + The probability density for the Gamma distribution is -# @wraps(np.random.standard_gamma) -def standard_gamma(shape, size=None, key=None): - return DEFAULT.standard_gamma(shape, size, key=key) + .. math:: p(x) = x^{k-1}\frac{e^{-x/\theta}}{\theta^k\Gamma(k)}, + where :math:`k` is the shape and :math:`\theta` the scale, + and :math:`\Gamma` is the Gamma function. -# @wraps(np.random.standard_normal) -def standard_normal(size=None, key=None): - return DEFAULT.standard_normal(size, key=key) + The Gamma distribution is often used to model the times to failure of + electronic components, and arises naturally in processes for which the + waiting times between Poisson distributed events are relevant. + References + ---------- + .. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A + Wolfram Web Resource. + http://mathworld.wolfram.com/GammaDistribution.html + .. [2] Wikipedia, "Gamma distribution", + https://en.wikipedia.org/wiki/Gamma_distribution -# @wraps(np.random.standard_t) -def standard_t(df, size=None, key=None): - return DEFAULT.standard_t(df, size, key=key) + """ + return DEFAULT.gamma(shape, scale, size=size, key=key) -# @wraps(np.random.uniform) -def uniform(low=0.0, high=1.0, size=None, key=None): - return DEFAULT.uniform(low, high, size, key=key) +def gumbel(loc=None, scale=None, size=None, key=None): + r""" + Draw samples from a Gumbel distribution. + Draw samples from a Gumbel distribution with specified location and + scale. For more information on the Gumbel distribution, see + Notes and References below. -def truncated_normal(lower, upper, size=None, scale=None, key=None): - """Sample truncated standard normal random values with given shape and dtype. + .. note:: + New code should use the `~numpy.random.Generator.gumbel` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. Parameters ---------- - lower : float, ndarray - A float or array of floats representing the lower bound for - truncation. Must be broadcast-compatible with ``upper``. - upper : float, ndarray - A float or array of floats representing the upper bound for - truncation. Must be broadcast-compatible with ``lower``. - size : optional, list of int, tuple of int - A tuple of nonnegative integers specifying the result - shape. Must be broadcast-compatible with ``lower`` and ``upper``. The - default (None) produces a result shape by broadcasting ``lower`` and - ``upper``. - scale : float, ndarray - Standard deviation (spread or "width") of the distribution. Must be - non-negative. + loc : float or array_like of floats, optional + The location of the mode of the distribution. Default is 0. + scale : float or array_like of floats, optional + The scale parameter of the distribution. Default is 1. Must be non- + negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``loc`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. Returns ------- - out : Array - A random array with the specified dtype and shape given by ``shape`` if - ``shape`` is not None, or else by broadcasting ``lower`` and ``upper``. - Returns values in the open interval ``(lower, upper)``. - """ - return DEFAULT.truncated_normal(lower, upper, size, scale, key=key) + out : ndarray or scalar + Drawn samples from the parameterized Gumbel distribution. + Notes + ----- + The Gumbel (or Smallest Extreme Value (SEV) or the Smallest Extreme + Value Type I) distribution is one of a class of Generalized Extreme + Value (GEV) distributions used in modeling extreme value problems. + The Gumbel is a special case of the Extreme Value Type I distribution + for maximums from distributions with "exponential-like" tails. -def bernoulli(p=0.5, size=None, key=None): - """Sample Bernoulli random values with given shape and mean. + The probability density for the Gumbel distribution is - Parameters - ---------- - p: float, array_like, optional - A float or array of floats for the mean of the random - variables. Must be broadcast-compatible with ``shape`` and the values - should be within [0, 1]. Default 0.5. - size: optional, tuple of int, int - A tuple of nonnegative integers representing the result - shape. Must be broadcast-compatible with ``p.shape``. The default (None) - produces a result shape equal to ``p.shape``. + .. math:: p(x) = \frac{e^{-(x - \mu)/ \beta}}{\beta} e^{ -e^{-(x - \mu)/ + \beta}}, - Returns - ------- - out: array_like - A random array with boolean dtype and shape given by ``shape`` if ``shape`` - is not None, or else ``p.shape``. - """ - return DEFAULT.bernoulli(p, size, key=key) + where :math:`\mu` is the mode, a location parameter, and + :math:`\beta` is the scale parameter. + The Gumbel (named for German mathematician Emil Julius Gumbel) was used + very early in the hydrology literature, for modeling the occurrence of + flood events. It is also used for modeling maximum wind speed and + rainfall rates. It is a "fat-tailed" distribution - the probability of + an event in the tail of the distribution is larger than if one used a + Gaussian, hence the surprisingly frequent occurrence of 100-year + floods. Floods were initially modeled as a Gaussian process, which + underestimated the frequency of extreme events. -# @wraps(np.random.lognormal) -def lognormal(mean=None, sigma=None, size=None, key=None): - return DEFAULT.lognormal(mean, sigma, size, key=key) + It is one of a class of extreme value distributions, the Generalized + Extreme Value (GEV) distributions, which also includes the Weibull and + Frechet. + The function has a mean of :math:`\mu + 0.57721\beta` and a variance + of :math:`\frac{\pi^2}{6}\beta^2`. -# @wraps(np.random.binomial) -def binomial(n, p, size=None, key=None): - return DEFAULT.binomial(n, p, size, key=key) + References + ---------- + .. [1] Gumbel, E. J., "Statistics of Extremes," + New York: Columbia University Press, 1958. + .. [2] Reiss, R.-D. and Thomas, M., "Statistical Analysis of Extreme + Values from Insurance, Finance, Hydrology and Other Fields," + Basel: Birkhauser Verlag, 2001. + """ + return DEFAULT.gumbel(loc, scale, size=size, key=key) -# @wraps(np.random.chisquare) -def chisquare(df, size=None, key=None): - return DEFAULT.chisquare(df, size, key=key) +def laplace(loc=None, scale=None, size=None, key=None): + r""" + Draw samples from the Laplace or double exponential distribution with + specified location (or mean) and scale (decay). + The Laplace distribution is similar to the Gaussian/normal distribution, + but is sharper at the peak and has fatter tails. It represents the + difference between two independent, identically distributed exponential + random variables. -# @wraps(np.random.dirichlet) -def dirichlet(alpha, size=None, key=None): - return DEFAULT.dirichlet(alpha, size, key=key) + .. note:: + New code should use the `~numpy.random.Generator.laplace` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + Parameters + ---------- + loc : float or array_like of floats, optional + The position, :math:`\mu`, of the distribution peak. Default is 0. + scale : float or array_like of floats, optional + :math:`\lambda`, the exponential decay. Default is 1. Must be non- + negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``loc`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. -# @wraps(np.random.geometric) -def geometric(p, size=None, key=None): - return DEFAULT.geometric(p, size, key=key) + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Laplace distribution. + See Also + -------- + random.Generator.laplace: which should be used for new code. -# @wraps(np.random.f) -def f(dfnum, dfden, size=None, key=None): - return DEFAULT.f(dfnum, dfden, size, key=key) + Notes + ----- + It has the probability density function + .. math:: f(x; \mu, \lambda) = \frac{1}{2\lambda} + \exp\left(-\frac{|x - \mu|}{\lambda}\right). -# @wraps(np.random.hypergeometric) -def hypergeometric(ngood, nbad, nsample, size=None, key=None): - return DEFAULT.hypergeometric(ngood, nbad, nsample, size, key=key) + The first law of Laplace, from 1774, states that the frequency + of an error can be expressed as an exponential function of the + absolute magnitude of the error, which leads to the Laplace + distribution. For many problems in economics and health + sciences, this distribution seems to model the data better + than the standard Gaussian distribution. + References + ---------- + .. [1] Abramowitz, M. and Stegun, I. A. (Eds.). "Handbook of + Mathematical Functions with Formulas, Graphs, and Mathematical + Tables, 9th printing," New York: Dover, 1972. + .. [2] Kotz, Samuel, et. al. "The Laplace Distribution and + Generalizations, " Birkhauser, 2001. + .. [3] Weisstein, Eric W. "Laplace Distribution." + From MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/LaplaceDistribution.html + .. [4] Wikipedia, "Laplace distribution", + https://en.wikipedia.org/wiki/Laplace_distribution -# @wraps(np.random.logseries) -def logseries(p, size=None, key=None): - return DEFAULT.logseries(p, size, key=key) + Examples + -------- + Draw samples from the distribution + >>> loc, scale = 0., 1. + >>> s = bm.random.laplace(loc, scale, 1000) -# @wraps(np.random.multinomial) -def multinomial(n, pvals, size=None, key=None): - return DEFAULT.multinomial(n, pvals, size, key=key) + Display the histogram of the samples, along with + the probability density function: + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, 30, density=True) + >>> x = np.arange(-8., 8., .01) + >>> pdf = np.exp(-abs(x-loc)/scale)/(2.*scale) + >>> plt.plot(x, pdf) -# @wraps(np.random.multivariate_normal) -def multivariate_normal(mean, cov, size=None, method: str = 'cholesky', key=None): - return DEFAULT.multivariate_normal(mean, cov, size, method, key=key) + Plot Gaussian for comparison: + >>> g = (1/(scale * np.sqrt(2 * np.pi)) * + ... np.exp(-(x - loc)**2 / (2 * scale**2))) + >>> plt.plot(x,g) + """ + return DEFAULT.laplace(loc, scale, size, key=key) -# @wraps(np.random.negative_binomial) -def negative_binomial(n, p, size=None, key=None): - return DEFAULT.negative_binomial(n, p, size, key=key) +def logistic(loc=None, scale=None, size=None, key=None): + r""" + Draw samples from a logistic distribution. -# @wraps(np.random.noncentral_chisquare) -def noncentral_chisquare(df, nonc, size=None, key=None): - return DEFAULT.noncentral_chisquare(df, nonc, size, key=key) + Samples are drawn from a logistic distribution with specified + parameters, loc (location or mean, also median), and scale (>0). + .. note:: + New code should use the `~numpy.random.Generator.logistic` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. -# @wraps(np.random.noncentral_f) -def noncentral_f(dfnum, dfden, nonc, size=None, key=None): - return DEFAULT.noncentral_f(dfnum, dfden, nonc, size, key=key) + Parameters + ---------- + loc : float or array_like of floats, optional + Parameter of the distribution. Default is 0. + scale : float or array_like of floats, optional + Parameter of the distribution. Must be non-negative. + Default is 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``loc`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized logistic distribution. -# @wraps(np.random.power) -def power(a, size=None, key=None): - return DEFAULT.power(a, size, key=key) + See Also + -------- + scipy.stats.logistic : probability density function, distribution or + cumulative density function, etc. + random.Generator.logistic: which should be used for new code. + Notes + ----- + The probability density for the Logistic distribution is -# @wraps(np.random.rayleigh) -def rayleigh(scale=1.0, size=None, key=None): - return DEFAULT.rayleigh(scale, size, key=key) + .. math:: P(x) = P(x) = \frac{e^{-(x-\mu)/s}}{s(1+e^{-(x-\mu)/s})^2}, + where :math:`\mu` = location and :math:`s` = scale. -# @wraps(np.random.triangular) -def triangular(size=None, key=None): - return DEFAULT.triangular(size, key=key) + The Logistic distribution is used in Extreme Value problems where it + can act as a mixture of Gumbel distributions, in Epidemiology, and by + the World Chess Federation (FIDE) where it is used in the Elo ranking + system, assuming the performance of each player is a logistically + distributed random variable. + References + ---------- + .. [1] Reiss, R.-D. and Thomas M. (2001), "Statistical Analysis of + Extreme Values, from Insurance, Finance, Hydrology and Other + Fields," Birkhauser Verlag, Basel, pp 132-133. + .. [2] Weisstein, Eric W. "Logistic Distribution." From + MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/LogisticDistribution.html + .. [3] Wikipedia, "Logistic-distribution", + https://en.wikipedia.org/wiki/Logistic_distribution -# @wraps(np.random.vonmises) -def vonmises(mu, kappa, size=None, key=None): - return DEFAULT.vonmises(mu, kappa, size, key=key) + Examples + -------- + Draw samples from the distribution: + >>> loc, scale = 10, 1 + >>> s = bm.random.logistic(loc, scale, 10000) + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, bins=50) -# @wraps(np.random.wald) -def wald(mean, scale, size=None, key=None): - return DEFAULT.wald(mean, scale, size, key=key) + # plot against distribution + >>> def logist(x, loc, scale): + ... return np.exp((loc-x)/scale)/(scale*(1+np.exp((loc-x)/scale))**2) + >>> lgst_val = logist(bins, loc, scale) + >>> plt.plot(bins, lgst_val * count.max() / lgst_val.max()) + >>> plt.show() + """ + return DEFAULT.logistic(loc, scale, size, key=key) -def weibull(a, size=None, key=None): - r""" - Draw samples from a Weibull distribution. - - Draw samples from a 1-parameter Weibull distribution with the given - shape parameter `a`. - .. math:: X = (-ln(U))^{1/a} +def normal(loc=None, scale=None, size=None, key=None): + r""" + Draw random samples from a normal (Gaussian) distribution. - Here, U is drawn from the uniform distribution over (0,1]. + The probability density function of the normal distribution, first + derived by De Moivre and 200 years later by both Gauss and Laplace + independently [2]_, is often called the bell curve because of + its characteristic shape (see the example below). - The more common 2-parameter Weibull, including a scale parameter - :math:`\lambda` is just :math:`X = \lambda(-ln(U))^{1/a}`. + The normal distributions occurs often in nature. For example, it + describes the commonly occurring distribution of samples influenced + by a large number of tiny, random disturbances, each with its own + unique distribution [2]_. .. note:: - New code should use the ``weibull`` method of a ``default_rng()`` - instance instead; please see the :ref:`random-quick-start`. + New code should use the `~numpy.random.Generator.normal` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. Parameters ---------- - a : float or array_like of floats - Shape parameter of the distribution. Must be nonnegative. + loc : float or array_like of floats + Mean ("centre") of the distribution. + scale : float or array_like of floats + Standard deviation (spread or "width") of the distribution. Must be + non-negative. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), - a single value is returned if ``a`` is a scalar. Otherwise, - ``np.array(a).size`` samples are drawn. + a single value is returned if ``loc`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn. Returns ------- out : ndarray or scalar - Drawn samples from the parameterized Weibull distribution. + Drawn samples from the parameterized normal distribution. See Also -------- - scipy.stats.weibull_max - scipy.stats.weibull_min - scipy.stats.genextreme - gumbel - random.Generator.weibull: which should be used for new code. + scipy.stats.norm : probability density function, distribution or + cumulative density function, etc. + random.Generator.normal: which should be used for new code. Notes ----- - The Weibull (or Type III asymptotic extreme value distribution - for smallest values, SEV Type III, or Rosin-Rammler - distribution) is one of a class of Generalized Extreme Value - (GEV) distributions used in modeling extreme value problems. - This class includes the Gumbel and Frechet distributions. - - The probability density for the Weibull distribution is - - .. math:: p(x) = \frac{a} - {\lambda}(\frac{x}{\lambda})^{a-1}e^{-(x/\lambda)^a}, + The probability density for the Gaussian distribution is - where :math:`a` is the shape and :math:`\lambda` the scale. + .. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }} + e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} }, - The function has its peak (the mode) at - :math:`\lambda(\frac{a-1}{a})^{1/a}`. + where :math:`\mu` is the mean and :math:`\sigma` the standard + deviation. The square of the standard deviation, :math:`\sigma^2`, + is called the variance. - When ``a = 1``, the Weibull distribution reduces to the exponential - distribution. + The function has its peak at the mean, and its "spread" increases with + the standard deviation (the function reaches 0.607 times its maximum at + :math:`x + \sigma` and :math:`x - \sigma` [2]_). This implies that + normal is more likely to return samples lying close to the mean, rather + than those far away. References ---------- - .. [1] Waloddi Weibull, Royal Technical University, Stockholm, - 1939 "A Statistical Theory Of The Strength Of Materials", + .. [1] Wikipedia, "Normal distribution", + https://en.wikipedia.org/wiki/Normal_distribution + .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability, + Random Variables and Random Signal Principles", 4th ed., 2001, + pp. 51, 51, 125. + + Examples + -------- + Draw samples from the distribution: + + >>> mu, sigma = 0, 0.1 # mean and standard deviation + >>> s = bm.random.normal(mu, sigma, 1000) + + Verify the mean and the variance: + + >>> abs(mu - np.mean(s)) + 0.0 # may vary + + >>> abs(sigma - np.std(s, ddof=1)) + 0.1 # may vary + + Display the histogram of the samples, along with + the probability density function: + + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, 30, density=True) + >>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * + ... np.exp( - (bins - mu)**2 / (2 * sigma**2) ), + ... linewidth=2, color='r') + >>> plt.show() + + Two-by-four array of samples from the normal distribution with + mean 3 and standard deviation 2.5: + + >>> bm.random.normal(3, 2.5, size=(2, 4)) + array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random + [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random + """ + return DEFAULT.normal(loc, scale, size, key=key) + + +def pareto(a, size=None, key=None): + r""" + Draw samples from a Pareto II or Lomax distribution with + specified shape. + + The Lomax or Pareto II distribution is a shifted Pareto + distribution. The classical Pareto distribution can be + obtained from the Lomax distribution by adding 1 and + multiplying by the scale parameter ``m`` (see Notes). The + smallest value of the Lomax distribution is zero while for the + classical Pareto distribution it is ``mu``, where the standard + Pareto distribution has location ``mu = 1``. Lomax can also + be considered as a simplified version of the Generalized + Pareto distribution (available in SciPy), with the scale set + to one and the location set to zero. + + The Pareto distribution must be greater than zero, and is + unbounded above. It is also known as the "80-20 rule". In + this distribution, 80 percent of the weights are in the lowest + 20 percent of the range, while the other 20 percent fill the + remaining 80 percent of the range. + + .. note:: + New code should use the `~numpy.random.Generator.pareto` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + a : float or array_like of floats + Shape of the distribution. Must be positive. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``a`` is a scalar. Otherwise, + ``np.array(a).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Pareto distribution. + + See Also + -------- + scipy.stats.lomax : probability density function, distribution or + cumulative density function, etc. + scipy.stats.genpareto : probability density function, distribution or + cumulative density function, etc. + random.Generator.pareto: which should be used for new code. + + Notes + ----- + The probability density for the Pareto distribution is + + .. math:: p(x) = \frac{am^a}{x^{a+1}} + + where :math:`a` is the shape and :math:`m` the scale. + + The Pareto distribution, named after the Italian economist + Vilfredo Pareto, is a power law probability distribution + useful in many real world problems. Outside the field of + economics it is generally referred to as the Bradford + distribution. Pareto developed the distribution to describe + the distribution of wealth in an economy. It has also found + use in insurance, web page access statistics, oil field sizes, + and many other problems, including the download frequency for + projects in Sourceforge [1]_. It is one of the so-called + "fat-tailed" distributions. + + References + ---------- + .. [1] Francis Hunt and Paul Johnson, On the Pareto Distribution of + Sourceforge projects. + .. [2] Pareto, V. (1896). Course of Political Economy. Lausanne. + .. [3] Reiss, R.D., Thomas, M.(2001), Statistical Analysis of Extreme + Values, Birkhauser Verlag, Basel, pp 23-30. + .. [4] Wikipedia, "Pareto distribution", + https://en.wikipedia.org/wiki/Pareto_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> a, m = 3., 2. # shape and mode + >>> s = (bm.random.pareto(a, 1000) + 1) * m + + Display the histogram of the samples, along with the probability + density function: + + >>> import matplotlib.pyplot as plt + >>> count, bins, _ = plt.hist(s, 100, density=True) + >>> fit = a*m**a / bins**(a+1) + >>> plt.plot(bins, max(count)*fit/max(fit), linewidth=2, color='r') + >>> plt.show() + """ + return DEFAULT.pareto(a, size, key=key) + + +def poisson(lam=1.0, size=None, key=None): + r""" + Draw samples from a Poisson distribution. + + The Poisson distribution is the limit of the binomial distribution + for large N. + + .. note:: + New code should use the `~numpy.random.Generator.poisson` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + lam : float or array_like of floats + Expected number of events occurring in a fixed-time interval, + must be >= 0. A sequence must be broadcastable over the requested + size. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``lam`` is a scalar. Otherwise, + ``np.array(lam).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Poisson distribution. + + See Also + -------- + random.Generator.poisson: which should be used for new code. + + Notes + ----- + The Poisson distribution + + .. math:: f(k; \lambda)=\frac{\lambda^k e^{-\lambda}}{k!} + + For events with an expected separation :math:`\lambda` the Poisson + distribution :math:`f(k; \lambda)` describes the probability of + :math:`k` events occurring within the observed + interval :math:`\lambda`. + + Because the output is limited to the range of the C int64 type, a + ValueError is raised when `lam` is within 10 sigma of the maximum + representable value. + + References + ---------- + .. [1] Weisstein, Eric W. "Poisson Distribution." + From MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/PoissonDistribution.html + .. [2] Wikipedia, "Poisson distribution", + https://en.wikipedia.org/wiki/Poisson_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> import numpy as np + >>> s = bm.random.poisson(5, 10000) + + Display histogram of the sample: + + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, 14, density=True) + >>> plt.show() + + Draw each 100 values for lambda 100 and 500: + + >>> s = bm.random.poisson(lam=(100., 500.), size=(100, 2)) + """ + return DEFAULT.poisson(lam, size, key=key) + + +def standard_cauchy(size=None, key=None): + r""" + Draw samples from a standard Cauchy distribution with mode = 0. + + Also known as the Lorentz distribution. + + .. note:: + New code should use the + `~numpy.random.Generator.standard_cauchy` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + samples : ndarray or scalar + The drawn samples. + + See Also + -------- + random.Generator.standard_cauchy: which should be used for new code. + + Notes + ----- + The probability density function for the full Cauchy distribution is + + .. math:: P(x; x_0, \gamma) = \frac{1}{\pi \gamma \bigl[ 1+ + (\frac{x-x_0}{\gamma})^2 \bigr] } + + and the Standard Cauchy distribution just sets :math:`x_0=0` and + :math:`\gamma=1` + + The Cauchy distribution arises in the solution to the driven harmonic + oscillator problem, and also describes spectral line broadening. It + also describes the distribution of values at which a line tilted at + a random angle will cut the x axis. + + When studying hypothesis tests that assume normality, seeing how the + tests perform on data from a Cauchy distribution is a good indicator of + their sensitivity to a heavy-tailed distribution, since the Cauchy looks + very much like a Gaussian distribution, but with heavier tails. + + References + ---------- + .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "Cauchy + Distribution", + https://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm + .. [2] Weisstein, Eric W. "Cauchy Distribution." From MathWorld--A + Wolfram Web Resource. + http://mathworld.wolfram.com/CauchyDistribution.html + .. [3] Wikipedia, "Cauchy distribution" + https://en.wikipedia.org/wiki/Cauchy_distribution + + Examples + -------- + Draw samples and plot the distribution: + + >>> import matplotlib.pyplot as plt + >>> s = bm.random.standard_cauchy(1000000) + >>> s = s[(s>-25) & (s<25)] # truncate distribution so it plots well + >>> plt.hist(s, bins=100) + >>> plt.show() + """ + return DEFAULT.standard_cauchy(size, key=key) + + +def standard_exponential(size=None, key=None): + r""" + Draw samples from the standard exponential distribution. + + `standard_exponential` is identical to the exponential distribution + with a scale parameter of 1. + + .. note:: + New code should use the + `~numpy.random.Generator.standard_exponential` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + out : float or ndarray + Drawn samples. + + See Also + -------- + random.Generator.standard_exponential: which should be used for new code. + + Examples + -------- + Output a 3x8000 array: + + >>> n = bm.random.standard_exponential((3, 8000)) + """ + return DEFAULT.standard_exponential(size, key=key) + + +def standard_gamma(shape, size=None, key=None): + r""" + Draw samples from a standard Gamma distribution. + + Samples are drawn from a Gamma distribution with specified parameters, + shape (sometimes designated "k") and scale=1. + + .. note:: + New code should use the + `~numpy.random.Generator.standard_gamma` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + shape : float or array_like of floats + Parameter, must be non-negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``shape`` is a scalar. Otherwise, + ``np.array(shape).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized standard gamma distribution. + + See Also + -------- + scipy.stats.gamma : probability density function, distribution or + cumulative density function, etc. + random.Generator.standard_gamma: which should be used for new code. + + Notes + ----- + The probability density for the Gamma distribution is + + .. math:: p(x) = x^{k-1}\frac{e^{-x/\theta}}{\theta^k\Gamma(k)}, + + where :math:`k` is the shape and :math:`\theta` the scale, + and :math:`\Gamma` is the Gamma function. + + The Gamma distribution is often used to model the times to failure of + electronic components, and arises naturally in processes for which the + waiting times between Poisson distributed events are relevant. + + References + ---------- + .. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A + Wolfram Web Resource. + http://mathworld.wolfram.com/GammaDistribution.html + .. [2] Wikipedia, "Gamma distribution", + https://en.wikipedia.org/wiki/Gamma_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> shape, scale = 2., 1. # mean and width + >>> s = bm.random.standard_gamma(shape, 1000000) + + Display the histogram of the samples, along with + the probability density function: + + >>> import matplotlib.pyplot as plt + >>> import scipy.special as sps # doctest: +SKIP + >>> count, bins, ignored = plt.hist(s, 50, density=True) + >>> y = bins**(shape-1) * ((np.exp(-bins/scale))/ # doctest: +SKIP + ... (sps.gamma(shape) * scale**shape)) + >>> plt.plot(bins, y, linewidth=2, color='r') # doctest: +SKIP + >>> plt.show() + """ + return DEFAULT.standard_gamma(shape, size, key=key) + + +def standard_normal(size=None, key=None): + r""" + Draw samples from a standard Normal distribution (mean=0, stdev=1). + + .. note:: + New code should use the + `~numpy.random.Generator.standard_normal` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + out : float or ndarray + A floating-point array of shape ``size`` of drawn samples, or a + single sample if ``size`` was not specified. + + See Also + -------- + normal : + Equivalent function with additional ``loc`` and ``scale`` arguments + for setting the mean and standard deviation. + random.Generator.standard_normal: which should be used for new code. + + Notes + ----- + For random samples from the normal distribution with mean ``mu`` and + standard deviation ``sigma``, use one of:: + + mu + sigma * bm.random.standard_normal(size=...) + bm.random.normal(mu, sigma, size=...) + + Examples + -------- + >>> bm.random.standard_normal() + 2.1923875335537315 #random + + >>> s = bm.random.standard_normal(8000) + >>> s + array([ 0.6888893 , 0.78096262, -0.89086505, ..., 0.49876311, # random + -0.38672696, -0.4685006 ]) # random + >>> s.shape + (8000,) + >>> s = bm.random.standard_normal(size=(3, 4, 2)) + >>> s.shape + (3, 4, 2) + + Two-by-four array of samples from the normal distribution with + mean 3 and standard deviation 2.5: + + >>> 3 + 2.5 * bm.random.standard_normal(size=(2, 4)) + array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random + [ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random + """ + return DEFAULT.standard_normal(size, key=key) + + +def standard_t(df, size=None, key=None): + r""" + Draw samples from a standard Student's t distribution with `df` degrees + of freedom. + + A special case of the hyperbolic distribution. As `df` gets + large, the result resembles that of the standard normal + distribution (`standard_normal`). + + .. note:: + New code should use the `~numpy.random.Generator.standard_t` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + df : float or array_like of floats + Degrees of freedom, must be > 0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``df`` is a scalar. Otherwise, + ``np.array(df).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized standard Student's t distribution. + + See Also + -------- + random.Generator.standard_t: which should be used for new code. + + Notes + ----- + The probability density function for the t distribution is + + .. math:: P(x, df) = \frac{\Gamma(\frac{df+1}{2})}{\sqrt{\pi df} + \Gamma(\frac{df}{2})}\Bigl( 1+\frac{x^2}{df} \Bigr)^{-(df+1)/2} + + The t test is based on an assumption that the data come from a + Normal distribution. The t test provides a way to test whether + the sample mean (that is the mean calculated from the data) is + a good estimate of the true mean. + + The derivation of the t-distribution was first published in + 1908 by William Gosset while working for the Guinness Brewery + in Dublin. Due to proprietary issues, he had to publish under + a pseudonym, and so he used the name Student. + + References + ---------- + .. [1] Dalgaard, Peter, "Introductory Statistics With R", + Springer, 2002. + .. [2] Wikipedia, "Student's t-distribution" + https://en.wikipedia.org/wiki/Student's_t-distribution + + Examples + -------- + From Dalgaard page 83 [1]_, suppose the daily energy intake for 11 + women in kilojoules (kJ) is: + + >>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \ + ... 7515, 8230, 8770]) + + Does their energy intake deviate systematically from the recommended + value of 7725 kJ? Our null hypothesis will be the absence of deviation, + and the alternate hypothesis will be the presence of an effect that could be + either positive or negative, hence making our test 2-tailed. + + Because we are estimating the mean and we have N=11 values in our sample, + we have N-1=10 degrees of freedom. We set our significance level to 95% and + compute the t statistic using the empirical mean and empirical standard + deviation of our intake. We use a ddof of 1 to base the computation of our + empirical standard deviation on an unbiased estimate of the variance (note: + the final estimate is not unbiased due to the concave nature of the square + root). + + >>> np.mean(intake) + 6753.636363636364 + >>> intake.std(ddof=1) + 1142.1232221373727 + >>> t = (np.mean(intake)-7725)/(intake.std(ddof=1)/np.sqrt(len(intake))) + >>> t + -2.8207540608310198 + + We draw 1000000 samples from Student's t distribution with the adequate + degrees of freedom. + + >>> import matplotlib.pyplot as plt + >>> s = bm.random.standard_t(10, size=1000000) + >>> h = plt.hist(s, bins=100, density=True) + + Does our t statistic land in one of the two critical regions found at + both tails of the distribution? + + >>> np.sum(np.abs(t) < np.abs(s)) / float(len(s)) + 0.018318 #random < 0.05, statistic is in critical region + + The probability value for this 2-tailed test is about 1.83%, which is + lower than the 5% pre-determined significance threshold. + + Therefore, the probability of observing values as extreme as our intake + conditionally on the null hypothesis being true is too low, and we reject + the null hypothesis of no deviation. + """ + return DEFAULT.standard_t(df, size, key=key) + + +def uniform(low=0.0, high=1.0, size=None, key=None): + r""" + Draw samples from a uniform distribution. + + Samples are uniformly distributed over the half-open interval + ``[low, high)`` (includes low, but excludes high). In other words, + any value within the given interval is equally likely to be drawn + by `uniform`. + + .. note:: + New code should use the `~numpy.random.Generator.uniform` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + low : float or array_like of floats, optional + Lower boundary of the output interval. All values generated will be + greater than or equal to low. The default value is 0. + high : float or array_like of floats + Upper boundary of the output interval. All values generated will be + less than or equal to high. The high limit may be included in the + returned array of floats due to floating-point rounding in the + equation ``low + (high-low) * random_sample()``. The default value + is 1.0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``low`` and ``high`` are both scalars. + Otherwise, ``np.broadcast(low, high).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized uniform distribution. + + See Also + -------- + randint : Discrete uniform distribution, yielding integers. + random_integers : Discrete uniform distribution over the closed + interval ``[low, high]``. + random_sample : Floats uniformly distributed over ``[0, 1)``. + random : Alias for `random_sample`. + rand : Convenience function that accepts dimensions as input, e.g., + ``rand(2,2)`` would generate a 2-by-2 array of floats, + uniformly distributed over ``[0, 1)``. + random.Generator.uniform: which should be used for new code. + + Notes + ----- + The probability density function of the uniform distribution is + + .. math:: p(x) = \frac{1}{b - a} + + anywhere within the interval ``[a, b)``, and zero elsewhere. + + When ``high`` == ``low``, values of ``low`` will be returned. + If ``high`` < ``low``, the results are officially undefined + and may eventually raise an error, i.e. do not rely on this + function to behave when passed arguments satisfying that + inequality condition. The ``high`` limit may be included in the + returned array of floats due to floating-point rounding in the + equation ``low + (high-low) * random_sample()``. For example: + + >>> x = np.float32(5*0.99999999) + >>> x + 5.0 + + + Examples + -------- + Draw samples from the distribution: + + >>> s = bm.random.uniform(-1,0,1000) + + All values are within the given interval: + + >>> np.all(s >= -1) + True + >>> np.all(s < 0) + True + + Display the histogram of the samples, along with the + probability density function: + + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, 15, density=True) + >>> plt.plot(bins, np.ones_like(bins), linewidth=2, color='r') + >>> plt.show() + """ + return DEFAULT.uniform(low, high, size, key=key) + + +def truncated_normal(lower, upper, size=None, loc=0., scale=1., dtype=float, key=None): + r"""Sample truncated standard normal random values with given shape and dtype. + + Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + + + Notes + ----- + This distribution is the normal distribution centered on ``loc`` (default + 0), with standard deviation ``scale`` (default 1), and clipped at ``a``, + ``b`` standard deviations to the left, right (respectively) from ``loc``. + If ``myclip_a`` and ``myclip_b`` are clip values in the sample space (as + opposed to the number of standard deviations) then they can be converted + to the required form according to:: + + a, b = (myclip_a - loc) / scale, (myclip_b - loc) / scale + + + Parameters + ---------- + lower : float, ndarray + A float or array of floats representing the lower bound for + truncation. Must be broadcast-compatible with ``upper``. + upper : float, ndarray + A float or array of floats representing the upper bound for + truncation. Must be broadcast-compatible with ``lower``. + size : optional, list of int, tuple of int + A tuple of nonnegative integers specifying the result + shape. Must be broadcast-compatible with ``lower`` and ``upper``. The + default (None) produces a result shape by broadcasting ``lower`` and + ``upper``. + loc: optional, float, ndarray + A float or array of floats representing the mean of the + distribution. Default is 0. + scale : float, ndarray + Standard deviation (spread or "width") of the distribution. Must be + non-negative. Default is 1. + dtype: optional + The float dtype for the returned values (default float64 if + jax_enable_x64 is true, otherwise float32). + key: jax.Array + The key for random generator. Consistent with the jax's random + paradigm. + + Returns + ------- + out : Array + A random array with the specified dtype and shape given by ``shape`` if + ``shape`` is not None, or else by broadcasting ``lower`` and ``upper``. + Returns values in the open interval ``(lower, upper)``. + """ + return DEFAULT.truncated_normal(lower, upper, size, loc, scale, dtype=dtype, key=key) + + +RandomState.truncated_normal.__doc__ = truncated_normal.__doc__ + + +def bernoulli(p=0.5, size=None, key=None): + r"""Sample Bernoulli random values with given shape and mean. + + Parameters + ---------- + p: float, array_like, optional + A float or array of floats for the mean of the random + variables. Must be broadcast-compatible with ``shape`` and the values + should be within [0, 1]. Default 0.5. + size: optional, tuple of int, int + A tuple of nonnegative integers representing the result + shape. Must be broadcast-compatible with ``p.shape``. The default (None) + produces a result shape equal to ``p.shape``. + + Returns + ------- + out: array_like + A random array with boolean dtype and shape given by ``shape`` if ``shape`` + is not None, or else ``p.shape``. + """ + return DEFAULT.bernoulli(p, size, key=key) + + +def lognormal(mean=None, sigma=None, size=None, key=None): + r""" + Draw samples from a log-normal distribution. + + Draw samples from a log-normal distribution with specified mean, + standard deviation, and array shape. Note that the mean and standard + deviation are not the values for the distribution itself, but of the + underlying normal distribution it is derived from. + + .. note:: + New code should use the `~numpy.random.Generator.lognormal` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + mean : float or array_like of floats, optional + Mean value of the underlying normal distribution. Default is 0. + sigma : float or array_like of floats, optional + Standard deviation of the underlying normal distribution. Must be + non-negative. Default is 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``mean`` and ``sigma`` are both scalars. + Otherwise, ``np.broadcast(mean, sigma).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized log-normal distribution. + + See Also + -------- + scipy.stats.lognorm : probability density function, distribution, + cumulative density function, etc. + random.Generator.lognormal: which should be used for new code. + + Notes + ----- + A variable `x` has a log-normal distribution if `log(x)` is normally + distributed. The probability density function for the log-normal + distribution is: + + .. math:: p(x) = \frac{1}{\sigma x \sqrt{2\pi}} + e^{(-\frac{(ln(x)-\mu)^2}{2\sigma^2})} + + where :math:`\mu` is the mean and :math:`\sigma` is the standard + deviation of the normally distributed logarithm of the variable. + A log-normal distribution results if a random variable is the *product* + of a large number of independent, identically-distributed variables in + the same way that a normal distribution results if the variable is the + *sum* of a large number of independent, identically-distributed + variables. + + References + ---------- + .. [1] Limpert, E., Stahel, W. A., and Abbt, M., "Log-normal + Distributions across the Sciences: Keys and Clues," + BioScience, Vol. 51, No. 5, May, 2001. + https://stat.ethz.ch/~stahel/lognormal/bioscience.pdf + .. [2] Reiss, R.D. and Thomas, M., "Statistical Analysis of Extreme + Values," Basel: Birkhauser Verlag, 2001, pp. 31-32. + + Examples + -------- + Draw samples from the distribution: + + >>> mu, sigma = 3., 1. # mean and standard deviation + >>> s = bm.random.lognormal(mu, sigma, 1000) + + Display the histogram of the samples, along with + the probability density function: + + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, 100, density=True, align='mid') + + >>> x = np.linspace(min(bins), max(bins), 10000) + >>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) + ... / (x * sigma * np.sqrt(2 * np.pi))) + + >>> plt.plot(x, pdf, linewidth=2, color='r') + >>> plt.axis('tight') + >>> plt.show() + + Demonstrate that taking the products of random samples from a uniform + distribution can be fit well by a log-normal probability density + function. + + >>> # Generate a thousand samples: each is the product of 100 random + >>> # values, drawn from a normal distribution. + >>> b = [] + >>> for i in range(1000): + ... a = 10. + bm.random.standard_normal(100) + ... b.append(np.product(a)) + + >>> b = np.array(b) / np.min(b) # scale values to be positive + >>> count, bins, ignored = plt.hist(b, 100, density=True, align='mid') + >>> sigma = np.std(np.log(b)) + >>> mu = np.mean(np.log(b)) + + >>> x = np.linspace(min(bins), max(bins), 10000) + >>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) + ... / (x * sigma * np.sqrt(2 * np.pi))) + + >>> plt.plot(x, pdf, color='r', linewidth=2) + >>> plt.show() + """ + return DEFAULT.lognormal(mean, sigma, size, key=key) + + +def binomial(n, p, size=None, key=None): + r""" + Draw samples from a binomial distribution. + + Samples are drawn from a binomial distribution with specified + parameters, n trials and p probability of success where + n an integer >= 0 and p is in the interval [0,1]. (n may be + input as a float, but it is truncated to an integer in use) + + .. note:: + New code should use the `~numpy.random.Generator.binomial` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + n : int or array_like of ints + Parameter of the distribution, >= 0. Floats are also accepted, + but they will be truncated to integers. + p : float or array_like of floats + Parameter of the distribution, >= 0 and <=1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``n`` and ``p`` are both scalars. + Otherwise, ``np.broadcast(n, p).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized binomial distribution, where + each sample is equal to the number of successes over the n trials. + + See Also + -------- + scipy.stats.binom : probability density function, distribution or + cumulative density function, etc. + random.Generator.binomial: which should be used for new code. + + Notes + ----- + The probability density for the binomial distribution is + + .. math:: P(N) = \binom{n}{N}p^N(1-p)^{n-N}, + + where :math:`n` is the number of trials, :math:`p` is the probability + of success, and :math:`N` is the number of successes. + + When estimating the standard error of a proportion in a population by + using a random sample, the normal distribution works well unless the + product p*n <=5, where p = population proportion estimate, and n = + number of samples, in which case the binomial distribution is used + instead. For example, a sample of 15 people shows 4 who are left + handed, and 11 who are right handed. Then p = 4/15 = 27%. 0.27*15 = 4, + so the binomial distribution should be used in this case. + + References + ---------- + .. [1] Dalgaard, Peter, "Introductory Statistics with R", + Springer-Verlag, 2002. + .. [2] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, + Fifth Edition, 2002. + .. [3] Lentner, Marvin, "Elementary Applied Statistics", Bogden + and Quigley, 1972. + .. [4] Weisstein, Eric W. "Binomial Distribution." From MathWorld--A + Wolfram Web Resource. + http://mathworld.wolfram.com/BinomialDistribution.html + .. [5] Wikipedia, "Binomial distribution", + https://en.wikipedia.org/wiki/Binomial_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> n, p = 10, .5 # number of trials, probability of each trial + >>> s = bm.random.binomial(n, p, 1000) + # result of flipping a coin 10 times, tested 1000 times. + + A real world example. A company drills 9 wild-cat oil exploration + wells, each with an estimated probability of success of 0.1. All nine + wells fail. What is the probability of that happening? + + Let's do 20,000 trials of the model, and count the number that + generate zero positive results. + + >>> sum(bm.random.binomial(9, 0.1, 20000) == 0)/20000. + # answer = 0.38885, or 38%. + """ + return DEFAULT.binomial(n, p, size, key=key) + + +def chisquare(df, size=None, key=None): + r""" + Draw samples from a chi-square distribution. + + When `df` independent random variables, each with standard normal + distributions (mean 0, variance 1), are squared and summed, the + resulting distribution is chi-square (see Notes). This distribution + is often used in hypothesis testing. + + .. note:: + New code should use the `~numpy.random.Generator.chisquare` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + df : float or array_like of floats + Number of degrees of freedom, must be > 0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``df`` is a scalar. Otherwise, + ``np.array(df).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized chi-square distribution. + + Raises + ------ + ValueError + When `df` <= 0 or when an inappropriate `size` (e.g. ``size=-1``) + is given. + + See Also + -------- + random.Generator.chisquare: which should be used for new code. + + Notes + ----- + The variable obtained by summing the squares of `df` independent, + standard normally distributed random variables: + + .. math:: Q = \sum_{i=0}^{\mathtt{df}} X^2_i + + is chi-square distributed, denoted + + .. math:: Q \sim \chi^2_k. + + The probability density function of the chi-squared distribution is + + .. math:: p(x) = \frac{(1/2)^{k/2}}{\Gamma(k/2)} + x^{k/2 - 1} e^{-x/2}, + + where :math:`\Gamma` is the gamma function, + + .. math:: \Gamma(x) = \int_0^{-\infty} t^{x - 1} e^{-t} dt. + + References + ---------- + .. [1] NIST "Engineering Statistics Handbook" + https://www.itl.nist.gov/div898/handbook/eda/section3/eda3666.htm + + Examples + -------- + >>> bm.random.chisquare(2,4) + array([ 1.89920014, 9.00867716, 3.13710533, 5.62318272]) # random + """ + return DEFAULT.chisquare(df, size, key=key) + + +def dirichlet(alpha, size=None, key=None): + r""" + Draw samples from the Dirichlet distribution. + + Draw `size` samples of dimension k from a Dirichlet distribution. A + Dirichlet-distributed random variable can be seen as a multivariate + generalization of a Beta distribution. The Dirichlet distribution + is a conjugate prior of a multinomial distribution in Bayesian + inference. + + .. note:: + New code should use the `~numpy.random.Generator.dirichlet` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + alpha : sequence of floats, length k + Parameter of the distribution (length ``k`` for sample of + length ``k``). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + vector of length ``k`` is returned. + + Returns + ------- + samples : ndarray, + The drawn samples, of shape ``(size, k)``. + + Raises + ------ + ValueError + If any value in ``alpha`` is less than or equal to zero + + See Also + -------- + random.Generator.dirichlet: which should be used for new code. + + Notes + ----- + The Dirichlet distribution is a distribution over vectors + :math:`x` that fulfil the conditions :math:`x_i>0` and + :math:`\sum_{i=1}^k x_i = 1`. + + The probability density function :math:`p` of a + Dirichlet-distributed random vector :math:`X` is + proportional to + + .. math:: p(x) \propto \prod_{i=1}^{k}{x^{\alpha_i-1}_i}, + + where :math:`\alpha` is a vector containing the positive + concentration parameters. + + The method uses the following property for computation: let :math:`Y` + be a random vector which has components that follow a standard gamma + distribution, then :math:`X = \frac{1}{\sum_{i=1}^k{Y_i}} Y` + is Dirichlet-distributed + + References + ---------- + .. [1] David McKay, "Information Theory, Inference and Learning + Algorithms," chapter 23, + http://www.inference.org.uk/mackay/itila/ + .. [2] Wikipedia, "Dirichlet distribution", + https://en.wikipedia.org/wiki/Dirichlet_distribution + + Examples + -------- + Taking an example cited in Wikipedia, this distribution can be used if + one wanted to cut strings (each of initial length 1.0) into K pieces + with different lengths, where each piece had, on average, a designated + average length, but allowing some variation in the relative sizes of + the pieces. + + >>> s = bm.random.dirichlet((10, 5, 3), 20).transpose() + + >>> import matplotlib.pyplot as plt + >>> plt.barh(range(20), s[0]) + >>> plt.barh(range(20), s[1], left=s[0], color='g') + >>> plt.barh(range(20), s[2], left=s[0]+s[1], color='r') + >>> plt.title("Lengths of Strings") + """ + return DEFAULT.dirichlet(alpha, size, key=key) + + +def geometric(p, size=None, key=None): + r""" + Draw samples from the geometric distribution. + + Bernoulli trials are experiments with one of two outcomes: + success or failure (an example of such an experiment is flipping + a coin). The geometric distribution models the number of trials + that must be run in order to achieve success. It is therefore + supported on the positive integers, ``k = 1, 2, ...``. + + The probability mass function of the geometric distribution is + + .. math:: f(k) = (1 - p)^{k - 1} p + + where `p` is the probability of success of an individual trial. + + .. note:: + New code should use the `~numpy.random.Generator.geometric` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + p : float or array_like of floats + The probability of success of an individual trial. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``p`` is a scalar. Otherwise, + ``np.array(p).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized geometric distribution. + + See Also + -------- + random.Generator.geometric: which should be used for new code. + + Examples + -------- + Draw ten thousand values from the geometric distribution, + with the probability of an individual success equal to 0.35: + + >>> z = bm.random.geometric(p=0.35, size=10000) + + How many trials succeeded after a single run? + + >>> (z == 1).sum() / 10000. + 0.34889999999999999 #random + """ + return DEFAULT.geometric(p, size, key=key) + + +def f(dfnum, dfden, size=None, key=None): + r""" + Draw samples from an F distribution. + + Samples are drawn from an F distribution with specified parameters, + `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of + freedom in denominator), where both parameters must be greater than + zero. + + The random variate of the F distribution (also known as the + Fisher distribution) is a continuous probability distribution + that arises in ANOVA tests, and is the ratio of two chi-square + variates. + + .. note:: + New code should use the `~numpy.random.Generator.f` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + dfnum : float or array_like of floats + Degrees of freedom in numerator, must be > 0. + dfden : float or array_like of float + Degrees of freedom in denominator, must be > 0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``dfnum`` and ``dfden`` are both scalars. + Otherwise, ``np.broadcast(dfnum, dfden).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Fisher distribution. + + See Also + -------- + scipy.stats.f : probability density function, distribution or + cumulative density function, etc. + random.Generator.f: which should be used for new code. + + Notes + ----- + The F statistic is used to compare in-group variances to between-group + variances. Calculating the distribution depends on the sampling, and + so it is a function of the respective degrees of freedom in the + problem. The variable `dfnum` is the number of samples minus one, the + between-groups degrees of freedom, while `dfden` is the within-groups + degrees of freedom, the sum of the number of samples in each group + minus the number of groups. + + References + ---------- + .. [1] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, + Fifth Edition, 2002. + .. [2] Wikipedia, "F-distribution", + https://en.wikipedia.org/wiki/F-distribution + + Examples + -------- + An example from Glantz[1], pp 47-40: + + Two groups, children of diabetics (25 people) and children from people + without diabetes (25 controls). Fasting blood glucose was measured, + case group had a mean value of 86.1, controls had a mean value of + 82.2. Standard deviations were 2.09 and 2.49 respectively. Are these + data consistent with the null hypothesis that the parents diabetic + status does not affect their children's blood glucose levels? + Calculating the F statistic from the data gives a value of 36.01. + + Draw samples from the distribution: + + >>> dfnum = 1. # between group degrees of freedom + >>> dfden = 48. # within groups degrees of freedom + >>> s = bm.random.f(dfnum, dfden, 1000) + + The lower bound for the top 1% of the samples is : + + >>> np.sort(s)[-10] + 7.61988120985 # random + + So there is about a 1% chance that the F statistic will exceed 7.62, + the measured value is 36, so the null hypothesis is rejected at the 1% + level. + """ + return DEFAULT.f(dfnum, dfden, size, key=key) + + +def hypergeometric(ngood, nbad, nsample, size=None, key=None): + r""" + Draw samples from a Hypergeometric distribution. + + Samples are drawn from a hypergeometric distribution with specified + parameters, `ngood` (ways to make a good selection), `nbad` (ways to make + a bad selection), and `nsample` (number of items sampled, which is less + than or equal to the sum ``ngood + nbad``). + + .. note:: + New code should use the + `~numpy.random.Generator.hypergeometric` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + ngood : int or array_like of ints + Number of ways to make a good selection. Must be nonnegative. + nbad : int or array_like of ints + Number of ways to make a bad selection. Must be nonnegative. + nsample : int or array_like of ints + Number of items sampled. Must be at least 1 and at most + ``ngood + nbad``. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if `ngood`, `nbad`, and `nsample` + are all scalars. Otherwise, ``np.broadcast(ngood, nbad, nsample).size`` + samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized hypergeometric distribution. Each + sample is the number of good items within a randomly selected subset of + size `nsample` taken from a set of `ngood` good items and `nbad` bad items. + + See Also + -------- + scipy.stats.hypergeom : probability density function, distribution or + cumulative density function, etc. + random.Generator.hypergeometric: which should be used for new code. + + Notes + ----- + The probability density for the Hypergeometric distribution is + + .. math:: P(x) = \frac{\binom{g}{x}\binom{b}{n-x}}{\binom{g+b}{n}}, + + where :math:`0 \le x \le n` and :math:`n-b \le x \le g` + + for P(x) the probability of ``x`` good results in the drawn sample, + g = `ngood`, b = `nbad`, and n = `nsample`. + + Consider an urn with black and white marbles in it, `ngood` of them + are black and `nbad` are white. If you draw `nsample` balls without + replacement, then the hypergeometric distribution describes the + distribution of black balls in the drawn sample. + + Note that this distribution is very similar to the binomial + distribution, except that in this case, samples are drawn without + replacement, whereas in the Binomial case samples are drawn with + replacement (or the sample space is infinite). As the sample space + becomes large, this distribution approaches the binomial. + + References + ---------- + .. [1] Lentner, Marvin, "Elementary Applied Statistics", Bogden + and Quigley, 1972. + .. [2] Weisstein, Eric W. "Hypergeometric Distribution." From + MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/HypergeometricDistribution.html + .. [3] Wikipedia, "Hypergeometric distribution", + https://en.wikipedia.org/wiki/Hypergeometric_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> ngood, nbad, nsamp = 100, 2, 10 + # number of good, number of bad, and number of samples + >>> s = bm.random.hypergeometric(ngood, nbad, nsamp, 1000) + >>> from matplotlib.pyplot import hist + >>> hist(s) + # note that it is very unlikely to grab both bad items + + Suppose you have an urn with 15 white and 15 black marbles. + If you pull 15 marbles at random, how likely is it that + 12 or more of them are one color? + + >>> s = bm.random.hypergeometric(15, 15, 15, 100000) + >>> sum(s>=12)/100000. + sum(s<=3)/100000. + # answer = 0.003 ... pretty unlikely! + """ + return DEFAULT.hypergeometric(ngood, nbad, nsample, size, key=key) + + +def logseries(p, size=None, key=None): + r""" + Draw samples from a logarithmic series distribution. + + Samples are drawn from a log series distribution with specified + shape parameter, 0 <= ``p`` < 1. + + .. note:: + New code should use the `~numpy.random.Generator.logseries` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + p : float or array_like of floats + Shape parameter for the distribution. Must be in the range [0, 1). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``p`` is a scalar. Otherwise, + ``np.array(p).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized logarithmic series distribution. + + See Also + -------- + scipy.stats.logser : probability density function, distribution or + cumulative density function, etc. + random.Generator.logseries: which should be used for new code. + + Notes + ----- + The probability density for the Log Series distribution is + + .. math:: P(k) = \frac{-p^k}{k \ln(1-p)}, + + where p = probability. + + The log series distribution is frequently used to represent species + richness and occurrence, first proposed by Fisher, Corbet, and + Williams in 1943 [2]. It may also be used to model the numbers of + occupants seen in cars [3]. + + References + ---------- + .. [1] Buzas, Martin A.; Culver, Stephen J., Understanding regional + species diversity through the log series distribution of + occurrences: BIODIVERSITY RESEARCH Diversity & Distributions, + Volume 5, Number 5, September 1999 , pp. 187-195(9). + .. [2] Fisher, R.A,, A.S. Corbet, and C.B. Williams. 1943. The + relation between the number of species and the number of + individuals in a random sample of an animal population. + Journal of Animal Ecology, 12:42-58. + .. [3] D. J. Hand, F. Daly, D. Lunn, E. Ostrowski, A Handbook of Small + Data Sets, CRC Press, 1994. + .. [4] Wikipedia, "Logarithmic distribution", + https://en.wikipedia.org/wiki/Logarithmic_distribution + + Examples + -------- + Draw samples from the distribution: + + >>> a = .6 + >>> s = bm.random.logseries(a, 10000) + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s) + + # plot against distribution + + >>> def logseries(k, p): + ... return -p**k/(k*np.log(1-p)) + >>> plt.plot(bins, logseries(bins, a)*count.max()/ + ... logseries(bins, a).max(), 'r') + >>> plt.show() + """ + return DEFAULT.logseries(p, size, key=key) + + +def multinomial(n, pvals, size=None, key=None): + r""" + Draw samples from a multinomial distribution. + + The multinomial distribution is a multivariate generalization of the + binomial distribution. Take an experiment with one of ``p`` + possible outcomes. An example of such an experiment is throwing a dice, + where the outcome can be 1 through 6. Each sample drawn from the + distribution represents `n` such experiments. Its values, + ``X_i = [X_0, X_1, ..., X_p]``, represent the number of times the + outcome was ``i``. + + .. note:: + New code should use the `~numpy.random.Generator.multinomial` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + n : int + Number of experiments. + pvals : sequence of floats, length p + Probabilities of each of the ``p`` different outcomes. These + must sum to 1 (however, the last element is always assumed to + account for the remaining probability, as long as + ``sum(pvals[:-1]) <= 1)``. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + out : ndarray + The drawn samples, of shape *size*, if that was provided. If not, + the shape is ``(N,)``. + + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional + value drawn from the distribution. + + See Also + -------- + random.Generator.multinomial: which should be used for new code. + + Examples + -------- + Throw a dice 20 times: + + >>> bm.random.multinomial(20, [1/6.]*6, size=1) + array([[4, 1, 7, 5, 2, 1]]) # random + + It landed 4 times on 1, once on 2, etc. + + Now, throw the dice 20 times, and 20 times again: + + >>> bm.random.multinomial(20, [1/6.]*6, size=2) + array([[3, 4, 3, 3, 4, 3], # random + [2, 4, 3, 4, 0, 7]]) + + For the first run, we threw 3 times 1, 4 times 2, etc. For the second, + we threw 2 times 1, 4 times 2, etc. + + A loaded die is more likely to land on number 6: + + >>> bm.random.multinomial(100, [1/7.]*5 + [2/7.]) + array([11, 16, 14, 17, 16, 26]) # random + + The probability inputs should be normalized. As an implementation + detail, the value of the last entry is ignored and assumed to take + up any leftover probability mass, but this should not be relied on. + A biased coin which has twice as much weight on one side as on the + other should be sampled like so: + + >>> bm.random.multinomial(100, [1.0 / 3, 2.0 / 3]) # RIGHT + array([38, 62]) # random + + not like: + + >>> bm.random.multinomial(100, [1.0, 2.0]) # WRONG + Traceback (most recent call last): + ValueError: pvals < 0, pvals > 1 or pvals contains NaNs + """ + return DEFAULT.multinomial(n, pvals, size, key=key) + + +def multivariate_normal(mean, cov, size=None, method: str = 'cholesky', key=None): + r""" + Draw random samples from a multivariate normal distribution. + + The multivariate normal, multinormal or Gaussian distribution is a + generalization of the one-dimensional normal distribution to higher + dimensions. Such a distribution is specified by its mean and + covariance matrix. These parameters are analogous to the mean + (average or "center") and variance (standard deviation, or "width," + squared) of the one-dimensional normal distribution. + + .. note:: + New code should use the + `~numpy.random.Generator.multivariate_normal` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + mean : 1-D array_like, of length N + Mean of the N-dimensional distribution. + cov : 2-D array_like, of shape (N, N) + Covariance matrix of the distribution. It must be symmetric and + positive-semidefinite for proper sampling. + size : int or tuple of ints, optional + Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are + generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because + each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. + If no shape is specified, a single (`N`-D) sample is returned. + check_valid : { 'warn', 'raise', 'ignore' }, optional + Behavior when the covariance matrix is not positive semidefinite. + tol : float, optional + Tolerance when checking the singular values in covariance matrix. + cov is cast to double before the check. + + Returns + ------- + out : ndarray + The drawn samples, of shape *size*, if that was provided. If not, + the shape is ``(N,)``. + + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional + value drawn from the distribution. + + See Also + -------- + random.Generator.multivariate_normal: which should be used for new code. + + Notes + ----- + The mean is a coordinate in N-dimensional space, which represents the + location where samples are most likely to be generated. This is + analogous to the peak of the bell curve for the one-dimensional or + univariate normal distribution. + + Covariance indicates the level to which two variables vary together. + From the multivariate normal distribution, we draw N-dimensional + samples, :math:`X = [x_1, x_2, ... x_N]`. The covariance matrix + element :math:`C_{ij}` is the covariance of :math:`x_i` and :math:`x_j`. + The element :math:`C_{ii}` is the variance of :math:`x_i` (i.e. its + "spread"). + + Instead of specifying the full covariance matrix, popular + approximations include: + + - Spherical covariance (`cov` is a multiple of the identity matrix) + - Diagonal covariance (`cov` has non-negative elements, and only on + the diagonal) + + This geometrical property can be seen in two dimensions by plotting + generated data-points: + + >>> mean = [0, 0] + >>> cov = [[1, 0], [0, 100]] # diagonal covariance + + Diagonal covariance means that points are oriented along x or y-axis: + + >>> import matplotlib.pyplot as plt + >>> x, y = bm.random.multivariate_normal(mean, cov, 5000).T + >>> plt.plot(x, y, 'x') + >>> plt.axis('equal') + >>> plt.show() + + Note that the covariance matrix must be positive semidefinite (a.k.a. + nonnegative-definite). Otherwise, the behavior of this method is + undefined and backwards compatibility is not guaranteed. + + References + ---------- + .. [1] Papoulis, A., "Probability, Random Variables, and Stochastic + Processes," 3rd ed., New York: McGraw-Hill, 1991. + .. [2] Duda, R. O., Hart, P. E., and Stork, D. G., "Pattern + Classification," 2nd ed., New York: Wiley, 2001. + + Examples + -------- + >>> mean = (1, 2) + >>> cov = [[1, 0], [0, 1]] + >>> x = bm.random.multivariate_normal(mean, cov, (3, 3)) + >>> x.shape + (3, 3, 2) + + Here we generate 800 samples from the bivariate normal distribution + with mean [0, 0] and covariance matrix [[6, -3], [-3, 3.5]]. The + expected variances of the first and second components of the sample + are 6 and 3.5, respectively, and the expected correlation + coefficient is -3/sqrt(6*3.5) ≈ -0.65465. + + >>> cov = np.array([[6, -3], [-3, 3.5]]) + >>> pts = bm.random.multivariate_normal([0, 0], cov, size=800) + + Check that the mean, covariance, and correlation coefficient of the + sample are close to the expected values: + + >>> pts.mean(axis=0) + array([ 0.0326911 , -0.01280782]) # may vary + >>> np.cov(pts.T) + array([[ 5.96202397, -2.85602287], + [-2.85602287, 3.47613949]]) # may vary + >>> np.corrcoef(pts.T)[0, 1] + -0.6273591314603949 # may vary + + We can visualize this data with a scatter plot. The orientation + of the point cloud illustrates the negative correlation of the + components of this sample. + + >>> import matplotlib.pyplot as plt + >>> plt.plot(pts[:, 0], pts[:, 1], '.', alpha=0.5) + >>> plt.axis('equal') + >>> plt.grid() + >>> plt.show() + """ + return DEFAULT.multivariate_normal(mean, cov, size, method, key=key) + + +def negative_binomial(n, p, size=None, key=None): + r""" + Draw samples from a negative binomial distribution. + + Samples are drawn from a negative binomial distribution with specified + parameters, `n` successes and `p` probability of success where `n` + is > 0 and `p` is in the interval [0, 1]. + + .. note:: + New code should use the + `~numpy.random.Generator.negative_binomial` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + n : float or array_like of floats + Parameter of the distribution, > 0. + p : float or array_like of floats + Parameter of the distribution, >= 0 and <=1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``n`` and ``p`` are both scalars. + Otherwise, ``np.broadcast(n, p).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized negative binomial distribution, + where each sample is equal to N, the number of failures that + occurred before a total of n successes was reached. + + See Also + -------- + random.Generator.negative_binomial: which should be used for new code. + + Notes + ----- + The probability mass function of the negative binomial distribution is + + .. math:: P(N;n,p) = \frac{\Gamma(N+n)}{N!\Gamma(n)}p^{n}(1-p)^{N}, + + where :math:`n` is the number of successes, :math:`p` is the + probability of success, :math:`N+n` is the number of trials, and + :math:`\Gamma` is the gamma function. When :math:`n` is an integer, + :math:`\frac{\Gamma(N+n)}{N!\Gamma(n)} = \binom{N+n-1}{N}`, which is + the more common form of this term in the pmf. The negative + binomial distribution gives the probability of N failures given n + successes, with a success on the last trial. + + If one throws a die repeatedly until the third time a "1" appears, + then the probability distribution of the number of non-"1"s that + appear before the third "1" is a negative binomial distribution. + + References + ---------- + .. [1] Weisstein, Eric W. "Negative Binomial Distribution." From + MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/NegativeBinomialDistribution.html + .. [2] Wikipedia, "Negative binomial distribution", + https://en.wikipedia.org/wiki/Negative_binomial_distribution + + Examples + -------- + Draw samples from the distribution: + + A real world example. A company drills wild-cat oil + exploration wells, each with an estimated probability of + success of 0.1. What is the probability of having one success + for each successive well, that is what is the probability of a + single success after drilling 5 wells, after 6 wells, etc.? + + >>> s = bm.random.negative_binomial(1, 0.1, 100000) + >>> for i in range(1, 11): # doctest: +SKIP + ... probability = sum(s 0. + + .. versionchanged:: 1.10.0 + Earlier NumPy versions required dfnum > 1. + nonc : float or array_like of floats + Non-centrality, must be non-negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``df`` and ``nonc`` are both scalars. + Otherwise, ``np.broadcast(df, nonc).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized noncentral chi-square distribution. + + See Also + -------- + random.Generator.noncentral_chisquare: which should be used for new code. + + Notes + ----- + The probability density function for the noncentral Chi-square + distribution is + + .. math:: P(x;df,nonc) = \sum^{\infty}_{i=0} + \frac{e^{-nonc/2}(nonc/2)^{i}}{i!} + P_{Y_{df+2i}}(x), + + where :math:`Y_{q}` is the Chi-square with q degrees of freedom. + + References + ---------- + .. [1] Wikipedia, "Noncentral chi-squared distribution" + https://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution + + Examples + -------- + Draw values from the distribution and plot the histogram + + >>> import matplotlib.pyplot as plt + >>> values = plt.hist(bm.random.noncentral_chisquare(3, 20, 100000), + ... bins=200, density=True) + >>> plt.show() + + Draw values from a noncentral chisquare with very small noncentrality, + and compare to a chisquare. + + >>> plt.figure() + >>> values = plt.hist(bm.random.noncentral_chisquare(3, .0000001, 100000), + ... bins=np.arange(0., 25, .1), density=True) + >>> values2 = plt.hist(bm.random.chisquare(3, 100000), + ... bins=np.arange(0., 25, .1), density=True) + >>> plt.plot(values[1][0:-1], values[0]-values2[0], 'ob') + >>> plt.show() + + Demonstrate how large values of non-centrality lead to a more symmetric + distribution. + + >>> plt.figure() + >>> values = plt.hist(bm.random.noncentral_chisquare(3, 20, 100000), + ... bins=200, density=True) + >>> plt.show() + """ + return DEFAULT.noncentral_chisquare(df, nonc, size, key=key) + + +def noncentral_f(dfnum, dfden, nonc, size=None, key=None): + r""" + Draw samples from the noncentral F distribution. + + Samples are drawn from an F distribution with specified parameters, + `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of + freedom in denominator), where both parameters > 1. + `nonc` is the non-centrality parameter. + + .. note:: + New code should use the + `~numpy.random.Generator.noncentral_f` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + dfnum : float or array_like of floats + Numerator degrees of freedom, must be > 0. + + .. versionchanged:: 1.14.0 + Earlier NumPy versions required dfnum > 1. + dfden : float or array_like of floats + Denominator degrees of freedom, must be > 0. + nonc : float or array_like of floats + Non-centrality parameter, the sum of the squares of the numerator + means, must be >= 0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``dfnum``, ``dfden``, and ``nonc`` + are all scalars. Otherwise, ``np.broadcast(dfnum, dfden, nonc).size`` + samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized noncentral Fisher distribution. + + See Also + -------- + random.Generator.noncentral_f: which should be used for new code. + + Notes + ----- + When calculating the power of an experiment (power = probability of + rejecting the null hypothesis when a specific alternative is true) the + non-central F statistic becomes important. When the null hypothesis is + true, the F statistic follows a central F distribution. When the null + hypothesis is not true, then it follows a non-central F statistic. + + References + ---------- + .. [1] Weisstein, Eric W. "Noncentral F-Distribution." + From MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/NoncentralF-Distribution.html + .. [2] Wikipedia, "Noncentral F-distribution", + https://en.wikipedia.org/wiki/Noncentral_F-distribution + + Examples + -------- + In a study, testing for a specific alternative to the null hypothesis + requires use of the Noncentral F distribution. We need to calculate the + area in the tail of the distribution that exceeds the value of the F + distribution for the null hypothesis. We'll plot the two probability + distributions for comparison. + + >>> dfnum = 3 # between group deg of freedom + >>> dfden = 20 # within groups degrees of freedom + >>> nonc = 3.0 + >>> nc_vals = bm.random.noncentral_f(dfnum, dfden, nonc, 1000000) + >>> NF = np.histogram(nc_vals, bins=50, density=True) + >>> c_vals = bm.random.f(dfnum, dfden, 1000000) + >>> F = np.histogram(c_vals, bins=50, density=True) + >>> import matplotlib.pyplot as plt + >>> plt.plot(F[1][1:], F[0]) + >>> plt.plot(NF[1][1:], NF[0]) + >>> plt.show() + """ + return DEFAULT.noncentral_f(dfnum, dfden, nonc, size, key=key) + + +def power(a, size=None, key=None): + r""" + Draws samples in [0, 1] from a power distribution with positive + exponent a - 1. + + Also known as the power function distribution. + + .. note:: + New code should use the `~numpy.random.Generator.power` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + a : float or array_like of floats + Parameter of the distribution. Must be non-negative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``a`` is a scalar. Otherwise, + ``np.array(a).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized power distribution. + + Raises + ------ + ValueError + If a <= 0. + + See Also + -------- + random.Generator.power: which should be used for new code. + + Notes + ----- + The probability density function is + + .. math:: P(x; a) = ax^{a-1}, 0 \le x \le 1, a>0. + + The power function distribution is just the inverse of the Pareto + distribution. It may also be seen as a special case of the Beta + distribution. + + It is used, for example, in modeling the over-reporting of insurance + claims. + + References + ---------- + .. [1] Christian Kleiber, Samuel Kotz, "Statistical size distributions + in economics and actuarial sciences", Wiley, 2003. + .. [2] Heckert, N. A. and Filliben, James J. "NIST Handbook 148: + Dataplot Reference Manual, Volume 2: Let Subcommands and Library + Functions", National Institute of Standards and Technology + Handbook Series, June 2003. + https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/powpdf.pdf + + Examples + -------- + Draw samples from the distribution: + + >>> a = 5. # shape + >>> samples = 1000 + >>> s = bm.random.power(a, samples) + + Display the histogram of the samples, along with + the probability density function: + + >>> import matplotlib.pyplot as plt + >>> count, bins, ignored = plt.hist(s, bins=30) + >>> x = np.linspace(0, 1, 100) + >>> y = a*x**(a-1.) + >>> normed_y = samples*np.diff(bins)[0]*y + >>> plt.plot(x, normed_y) + >>> plt.show() + + Compare the power function distribution to the inverse of the Pareto. + + >>> from scipy import stats # doctest: +SKIP + >>> rvs = bm.random.power(5, 1000000) + >>> rvsp = bm.random.pareto(5, 1000000) + >>> xx = np.linspace(0,1,100) + >>> powpdf = stats.powerlaw.pdf(xx,5) # doctest: +SKIP + + >>> plt.figure() + >>> plt.hist(rvs, bins=50, density=True) + >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP + >>> plt.title('bm.random.power(5)') + + >>> plt.figure() + >>> plt.hist(1./(1.+rvsp), bins=50, density=True) + >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP + >>> plt.title('inverse of 1 + bm.random.pareto(5)') + + >>> plt.figure() + >>> plt.hist(1./(1.+rvsp), bins=50, density=True) + >>> plt.plot(xx,powpdf,'r-') # doctest: +SKIP + >>> plt.title('inverse of stats.pareto(5)') + """ + return DEFAULT.power(a, size, key=key) + + +def rayleigh(scale=1.0, size=None, key=None): + r""" + Draw samples from a Rayleigh distribution. + + The :math:`\chi` and Weibull distributions are generalizations of the + Rayleigh. + + .. note:: + New code should use the `~numpy.random.Generator.rayleigh` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + scale : float or array_like of floats, optional + Scale, also equals the mode. Must be non-negative. Default is 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``scale`` is a scalar. Otherwise, + ``np.array(scale).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Rayleigh distribution. + + See Also + -------- + random.Generator.rayleigh: which should be used for new code. + + Notes + ----- + The probability density function for the Rayleigh distribution is + + .. math:: P(x;scale) = \frac{x}{scale^2}e^{\frac{-x^2}{2 \cdotp scale^2}} + + The Rayleigh distribution would arise, for example, if the East + and North components of the wind velocity had identical zero-mean + Gaussian distributions. Then the wind speed would have a Rayleigh + distribution. + + References + ---------- + .. [1] Brighton Webs Ltd., "Rayleigh Distribution," + https://web.archive.org/web/20090514091424/http://brighton-webs.co.uk:80/distributions/rayleigh.asp + .. [2] Wikipedia, "Rayleigh distribution" + https://en.wikipedia.org/wiki/Rayleigh_distribution + + Examples + -------- + Draw values from the distribution and plot the histogram + + >>> from matplotlib.pyplot import hist + >>> values = hist(bm.random.rayleigh(3, 100000), bins=200, density=True) + + Wave heights tend to follow a Rayleigh distribution. If the mean wave + height is 1 meter, what fraction of waves are likely to be larger than 3 + meters? + + >>> meanvalue = 1 + >>> modevalue = np.sqrt(2 / np.pi) * meanvalue + >>> s = bm.random.rayleigh(modevalue, 1000000) + + The percentage of waves larger than 3 meters is: + + >>> 100.*sum(s>3)/1000000. + 0.087300000000000003 # random + """ + return DEFAULT.rayleigh(scale, size, key=key) + + +def triangular(size=None, key=None): + r""" + Draw samples from the triangular distribution over the + interval ``[left, right]``. + + The triangular distribution is a continuous probability + distribution with lower limit left, peak at mode, and upper + limit right. Unlike the other distributions, these parameters + directly define the shape of the pdf. + + .. note:: + New code should use the `~numpy.random.Generator.triangular` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + left : float or array_like of floats + Lower limit. + mode : float or array_like of floats + The value where the peak of the distribution occurs. + The value must fulfill the condition ``left <= mode <= right``. + right : float or array_like of floats + Upper limit, must be larger than `left`. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``left``, ``mode``, and ``right`` + are all scalars. Otherwise, ``np.broadcast(left, mode, right).size`` + samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized triangular distribution. + + See Also + -------- + random.Generator.triangular: which should be used for new code. + + Notes + ----- + The probability density function for the triangular distribution is + + .. math:: P(x;l, m, r) = \begin{cases} + \frac{2(x-l)}{(r-l)(m-l)}& \text{for $l \leq x \leq m$},\\ + \frac{2(r-x)}{(r-l)(r-m)}& \text{for $m \leq x \leq r$},\\ + 0& \text{otherwise}. + \end{cases} + + The triangular distribution is often used in ill-defined + problems where the underlying distribution is not known, but + some knowledge of the limits and mode exists. Often it is used + in simulations. + + References + ---------- + .. [1] Wikipedia, "Triangular distribution" + https://en.wikipedia.org/wiki/Triangular_distribution + + Examples + -------- + Draw values from the distribution and plot the histogram: + + >>> import matplotlib.pyplot as plt + >>> h = plt.hist(bm.random.triangular(-3, 0, 8, 100000), bins=200, + ... density=True) + >>> plt.show() + """ + return DEFAULT.triangular(size, key=key) + + +def vonmises(mu, kappa, size=None, key=None): + r""" + Draw samples from a von Mises distribution. + + Samples are drawn from a von Mises distribution with specified mode + (mu) and dispersion (kappa), on the interval [-pi, pi]. + + The von Mises distribution (also known as the circular normal + distribution) is a continuous probability distribution on the unit + circle. It may be thought of as the circular analogue of the normal + distribution. + + .. note:: + New code should use the `~numpy.random.Generator.vonmises` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + mu : float or array_like of floats + Mode ("center") of the distribution. + kappa : float or array_like of floats + Dispersion of the distribution, has to be >=0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``mu`` and ``kappa`` are both scalars. + Otherwise, ``np.broadcast(mu, kappa).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized von Mises distribution. + + See Also + -------- + scipy.stats.vonmises : probability density function, distribution, or + cumulative density function, etc. + random.Generator.vonmises: which should be used for new code. + + Notes + ----- + The probability density for the von Mises distribution is + + .. math:: p(x) = \frac{e^{\kappa cos(x-\mu)}}{2\pi I_0(\kappa)}, + + where :math:`\mu` is the mode and :math:`\kappa` the dispersion, + and :math:`I_0(\kappa)` is the modified Bessel function of order 0. + + The von Mises is named for Richard Edler von Mises, who was born in + Austria-Hungary, in what is now the Ukraine. He fled to the United + States in 1939 and became a professor at Harvard. He worked in + probability theory, aerodynamics, fluid mechanics, and philosophy of + science. + + References + ---------- + .. [1] Abramowitz, M. and Stegun, I. A. (Eds.). "Handbook of + Mathematical Functions with Formulas, Graphs, and Mathematical + Tables, 9th printing," New York: Dover, 1972. + .. [2] von Mises, R., "Mathematical Theory of Probability + and Statistics", New York: Academic Press, 1964. + + Examples + -------- + Draw samples from the distribution: + + >>> mu, kappa = 0.0, 4.0 # mean and dispersion + >>> s = bm.random.vonmises(mu, kappa, 1000) + + Display the histogram of the samples, along with + the probability density function: + + >>> import matplotlib.pyplot as plt + >>> from scipy.special import i0 # doctest: +SKIP + >>> plt.hist(s, 50, density=True) + >>> x = np.linspace(-np.pi, np.pi, num=51) + >>> y = np.exp(kappa*np.cos(x-mu))/(2*np.pi*i0(kappa)) # doctest: +SKIP + >>> plt.plot(x, y, linewidth=2, color='r') # doctest: +SKIP + >>> plt.show() + """ + return DEFAULT.vonmises(mu, kappa, size, key=key) + + +def wald(mean, scale, size=None, key=None): + r""" + Draw samples from a Wald, or inverse Gaussian, distribution. + + As the scale approaches infinity, the distribution becomes more like a + Gaussian. Some references claim that the Wald is an inverse Gaussian + with mean equal to 1, but this is by no means universal. + + The inverse Gaussian distribution was first studied in relationship to + Brownian motion. In 1956 M.C.K. Tweedie used the name inverse Gaussian + because there is an inverse relationship between the time to cover a + unit distance and distance covered in unit time. + + .. note:: + New code should use the `~numpy.random.Generator.wald` + method of a `~numpy.random.Generator` instance instead; + please see the :ref:`random-quick-start`. + + Parameters + ---------- + mean : float or array_like of floats + Distribution mean, must be > 0. + scale : float or array_like of floats + Scale parameter, must be > 0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``mean`` and ``scale`` are both scalars. + Otherwise, ``np.broadcast(mean, scale).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Wald distribution. + + See Also + -------- + random.Generator.wald: which should be used for new code. + + Notes + ----- + The probability density function for the Wald distribution is + + .. math:: P(x;mean,scale) = \sqrt{\frac{scale}{2\pi x^3}}e^ + \frac{-scale(x-mean)^2}{2\cdotp mean^2x} + + As noted above the inverse Gaussian distribution first arise + from attempts to model Brownian motion. It is also a + competitor to the Weibull for use in reliability modeling and + modeling stock returns and interest rate processes. + + References + ---------- + .. [1] Brighton Webs Ltd., Wald Distribution, + https://web.archive.org/web/20090423014010/http://www.brighton-webs.co.uk:80/distributions/wald.asp + .. [2] Chhikara, Raj S., and Folks, J. Leroy, "The Inverse Gaussian + Distribution: Theory : Methodology, and Applications", CRC Press, + 1988. + .. [3] Wikipedia, "Inverse Gaussian distribution" + https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution + + Examples + -------- + Draw values from the distribution and plot the histogram: + + >>> import matplotlib.pyplot as plt + >>> h = plt.hist(bm.random.wald(3, 2, 100000), bins=200, density=True) + >>> plt.show() + """ + return DEFAULT.wald(mean, scale, size, key=key) + + +def weibull(a, size=None, key=None): + r""" + Draw samples from a Weibull distribution. + + Draw samples from a 1-parameter Weibull distribution with the given + shape parameter `a`. + + .. math:: X = (-ln(U))^{1/a} + + Here, U is drawn from the uniform distribution over (0,1]. + + The more common 2-parameter Weibull, including a scale parameter + :math:`\lambda` is just :math:`X = \lambda(-ln(U))^{1/a}`. + + .. note:: + New code should use the ``weibull`` method of a ``default_rng()`` + instance instead; please see the :ref:`random-quick-start`. + + Parameters + ---------- + a : float or array_like of floats + Shape parameter of the distribution. Must be nonnegative. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``a`` is a scalar. Otherwise, + ``np.array(a).size`` samples are drawn. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized Weibull distribution. + + Notes + ----- + The Weibull (or Type III asymptotic extreme value distribution + for smallest values, SEV Type III, or Rosin-Rammler + distribution) is one of a class of Generalized Extreme Value + (GEV) distributions used in modeling extreme value problems. + This class includes the Gumbel and Frechet distributions. + + The probability density for the Weibull distribution is + + .. math:: p(x) = \frac{a} + {\lambda}(\frac{x}{\lambda})^{a-1}e^{-(x/\lambda)^a}, + + where :math:`a` is the shape and :math:`\lambda` the scale. + + The function has its peak (the mode) at + :math:`\lambda(\frac{a-1}{a})^{1/a}`. + + When ``a = 1``, the Weibull distribution reduces to the exponential + distribution. + + References + ---------- + .. [1] Waloddi Weibull, Royal Technical University, Stockholm, + 1939 "A Statistical Theory Of The Strength Of Materials", Ingeniorsvetenskapsakademiens Handlingar Nr 151, 1939, Generalstabens Litografiska Anstalts Forlag, Stockholm. .. [2] Waloddi Weibull, "A Statistical Distribution Function of @@ -2352,8 +4889,8 @@ def categorical(logits, axis: int = -1, size=None, key=None): def rand_like(input, *, dtype=None, key=None): - """Similar to ``rand_like`` in torch. - + """Similar to ``rand_like`` in torch. + Returns a tensor with the same size as input that is filled with random numbers from a uniform distribution on the interval ``[0, 1)``. @@ -2369,8 +4906,8 @@ def rand_like(input, *, dtype=None, key=None): def randn_like(input, *, dtype=None, key=None): - """Similar to ``randn_like`` in torch. - + """Similar to ``randn_like`` in torch. + Returns a tensor with the same size as ``input`` that is filled with random numbers from a normal distribution with mean 0 and variance 1. @@ -2386,8 +4923,8 @@ def randn_like(input, *, dtype=None, key=None): def randint_like(input, low=0, high=None, *, dtype=None, key=None): - """Similar to ``randint_like`` in torch. - + """Similar to ``randint_like`` in torch. + Returns a tensor with the same shape as Tensor ``input`` filled with random integers generated uniformly between ``low`` (inclusive) and ``high`` (exclusive). @@ -2410,4 +4947,3 @@ def randint_like(input, low=0, high=None, *, dtype=None, key=None): __r = globals().get(__k, None) if __r is not None and callable(__r): __t.__doc__ = __r.__doc__ - diff --git a/brainpy/_src/math/sparse/tests/test_csrmv_taichi.py b/brainpy/_src/math/sparse/tests/test_csrmv_taichi.py index 2ee940d44..1c603da01 100644 --- a/brainpy/_src/math/sparse/tests/test_csrmv_taichi.py +++ b/brainpy/_src/math/sparse/tests/test_csrmv_taichi.py @@ -402,7 +402,7 @@ def test_homo_grad(self, transpose, shape, homo_data): @parameterized.product( transpose=[True, False], - shape=[(200, 200), (200, 100), (10, 1000), (2, 2000)], + shape=[(200, 200), (200, 100), (2, 2000)], ) def test_heter(self, transpose, shape): print(f'test_homo: transpose = {transpose} shape = {shape}') diff --git a/brainpy/check.py b/brainpy/check.py index a1c780106..fafc0551d 100644 --- a/brainpy/check.py +++ b/brainpy/check.py @@ -41,7 +41,7 @@ 'is_all_objs', 'jit_error', 'jit_error_checking', - 'jit_error2', + 'jit_error_checking_no_args', 'serialize_kwargs', ] @@ -349,13 +349,13 @@ def is_float( if not isinstance(value, (float, np.floating)): raise ValueError(f'{name} must be a float, but got {type(value)}') if min_bound is not None: - jit_error2(value < min_bound, - ValueError(f"{name} must be a float bigger than {min_bound}, " + jit_error_checking_no_args(value < min_bound, + ValueError(f"{name} must be a float bigger than {min_bound}, " f"while we got {value}")) if max_bound is not None: - jit_error2(value > max_bound, - ValueError(f"{name} must be a float smaller than {max_bound}, " + jit_error_checking_no_args(value > max_bound, + ValueError(f"{name} must be a float smaller than {max_bound}, " f"while we got {value}")) return value @@ -387,12 +387,12 @@ def is_integer(value: int, name=None, min_bound=None, max_bound=None, allow_none else: raise ValueError(f'{name} must be an int, but got {value}') if min_bound is not None: - jit_error2(jnp.any(value < min_bound), - ValueError(f"{name} must be an int bigger than {min_bound}, " + jit_error_checking_no_args(jnp.any(value < min_bound), + ValueError(f"{name} must be an int bigger than {min_bound}, " f"while we got {value}")) if max_bound is not None: - jit_error2(jnp.any(value > max_bound), - ValueError(f"{name} must be an int smaller than {max_bound}, " + jit_error_checking_no_args(jnp.any(value > max_bound), + ValueError(f"{name} must be an int smaller than {max_bound}, " f"while we got {value}")) return value @@ -596,7 +596,7 @@ def jit_error(pred, err_fun, err_arg=None): Parameters ---------- - pred: bool + pred: bool, Array The boolean prediction. err_fun: callable The error function, which raise errors. @@ -610,7 +610,7 @@ def jit_error(pred, err_fun, err_arg=None): jit_error_checking = jit_error -def jit_error2(pred: bool, err: Exception): +def jit_error_checking_no_args(pred: bool, err: Exception): """Check errors in a jit function. Parameters diff --git a/docs/apis/brainpy.math.random.rst b/docs/apis/brainpy.math.random.rst index e52a3450b..5a0af2fa1 100644 --- a/docs/apis/brainpy.math.random.rst +++ b/docs/apis/brainpy.math.random.rst @@ -4,10 +4,15 @@ .. currentmodule:: brainpy.math.random .. automodule:: brainpy.math.random + + +Random Sampling Functions +------------------------- + + .. autosummary:: :toctree: generated/ :nosignatures: - :template: classtemplate.rst seed split_key @@ -70,6 +75,17 @@ rand_like randint_like randn_like + + +Random Generator +------------------------- + + +.. autosummary:: + :toctree: generated/ + :nosignatures: + :template: classtemplate.rst + RandomState Generator DEFAULT diff --git a/requirements-dev.txt b/requirements-dev.txt index 068c38546..51f41a414 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,6 +7,7 @@ matplotlib msgpack tqdm pathos +taichi # test requirements pytest