You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2020/03/12 08:49:25 UTC

[incubator-mxnet] branch master updated: [numpy] add op random.f (#17586)

This is an automated email from the ASF dual-hosted git repository.

haoj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new bd6e917  [numpy] add op random.f (#17586)
bd6e917 is described below

commit bd6e917611e825c95160d4bcb9204d86be44198f
Author: Yiyan66 <57...@users.noreply.github.com>
AuthorDate: Thu Mar 12 16:47:53 2020 +0800

    [numpy] add op random.f (#17586)
    
    * F
    
    * f
---
 python/mxnet/ndarray/numpy/random.py   | 66 +++++++++++++++++++++++++++++++++-
 python/mxnet/numpy/random.py           | 64 ++++++++++++++++++++++++++++++++-
 python/mxnet/symbol/numpy/random.py    | 39 +++++++++++++++++++-
 tests/python/unittest/test_numpy_op.py | 31 ++++++++++++++++
 4 files changed, 197 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/ndarray/numpy/random.py b/python/mxnet/ndarray/numpy/random.py
index 76cf3a5..ff1f7fd 100644
--- a/python/mxnet/ndarray/numpy/random.py
+++ b/python/mxnet/ndarray/numpy/random.py
@@ -23,7 +23,7 @@ from ..ndarray import NDArray
 
 
 __all__ = ['randint', 'uniform', 'normal', "choice", "rand", "multinomial", "multivariate_normal",
-           'logistic', 'gumbel', "rayleigh",
+           'logistic', 'gumbel', "rayleigh", 'f',
            "shuffle", 'gamma', 'beta', 'chisquare', 'exponential', 'lognormal', 'weibull', 'pareto', 'power']
 
 
@@ -865,6 +865,70 @@ def beta(a, b, size=None, dtype=None, ctx=None):
     return out.astype(dtype)
 
 
+def f(dfnum, dfden, size=None, ctx=None):
+    r"""Draw samples from an F distribution.
+
+    Samples are drawn from an F distribution with specified parameters,
+    `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
+    freedom in denominator), where both parameters must be greater than
+    zero.
+
+    The random variate of the F distribution (also known as the
+    Fisher distribution) is a continuous probability distribution
+    that arises in ANOVA tests, and is the ratio of two chi-square
+    variates.
+
+    Parameters
+    ----------
+    dfnum : float or ndarray of floats
+        Degrees of freedom in numerator, must be > 0.
+    dfden : float or ndarray of float
+        Degrees of freedom in denominator, must be > 0.
+    size : int or tuple of ints, optional
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
+        a single value is returned if ``dfnum`` and ``dfden`` are both scalars.
+        Otherwise, ``np.broadcast(dfnum, dfden).size`` samples are drawn.
+    ctx : Context, optional
+        Device context of output. Default is current context.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        Drawn samples from the parameterized Fisher distribution.
+
+    Examples
+    --------
+    An example from Glantz[1], pp 47-40:
+
+    Two groups, children of diabetics (25 people) and children from people
+    without diabetes (25 controls). Fasting blood glucose was measured,
+    case group had a mean value of 86.1, controls had a mean value of
+    82.2. Standard deviations were 2.09 and 2.49 respectively. Are these
+    data consistent with the null hypothesis that the parents diabetic
+    status does not affect their children's blood glucose levels?
+    Calculating the F statistic from the data gives a value of 36.01.
+
+    Draw samples from the distribution:
+
+    >>> dfnum = 1. # between group degrees of freedom
+    >>> dfden = 48. # within groups degrees of freedom
+    >>> s = np.random.f(dfnum, dfden, 1000)
+
+    The lower bound for the top 1% of the samples is :
+
+    >>> np.sort(s)[-10]
+    7.61988120985 # random
+
+    So there is about a 1% chance that the F statistic will exceed 7.62,
+    the measured value is 36, so the null hypothesis is rejected at the 1%
+    level.
+    """
+    X = chisquare(df=dfnum, size=size, ctx=ctx)
+    Y = chisquare(df=dfden, size=size, ctx=ctx)
+    return (X * dfden) / (Y * dfnum)
+
+
 def chisquare(df, size=None, dtype=None, ctx=None):
     r"""
     chisquare(df, size=None, dtype=None, ctx=None)
diff --git a/python/mxnet/numpy/random.py b/python/mxnet/numpy/random.py
index f272ecb..b07cc4a 100644
--- a/python/mxnet/numpy/random.py
+++ b/python/mxnet/numpy/random.py
@@ -21,7 +21,7 @@ from ..ndarray import numpy as _mx_nd_np
 
 
 __all__ = ["randint", "uniform", "normal", "choice", "rand", "multinomial", "multivariate_normal",
-           "logistic", "gumbel",
+           "logistic", "gumbel", "f",
            "shuffle", "randn", "gamma", "beta", "chisquare", "exponential", "lognormal",
            "weibull", "pareto", "power", "rayleigh"]
 
@@ -873,6 +873,68 @@ def beta(a, b, size=None, dtype=None, ctx=None):
     return _mx_nd_np.random.beta(a, b, size=size, dtype=dtype, ctx=ctx)
 
 
+def f(dfnum, dfden, size=None, ctx=None):
+    r"""Draw samples from an F distribution.
+
+    Samples are drawn from an F distribution with specified parameters,
+    `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
+    freedom in denominator), where both parameters must be greater than
+    zero.
+
+    The random variate of the F distribution (also known as the
+    Fisher distribution) is a continuous probability distribution
+    that arises in ANOVA tests, and is the ratio of two chi-square
+    variates.
+
+    Parameters
+    ----------
+    dfnum : float or ndarray of floats
+        Degrees of freedom in numerator, must be > 0.
+    dfden : float or ndarray of float
+        Degrees of freedom in denominator, must be > 0.
+    size : int or tuple of ints, optional
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
+        a single value is returned if ``dfnum`` and ``dfden`` are both scalars.
+        Otherwise, ``np.broadcast(dfnum, dfden).size`` samples are drawn.
+    ctx : Context, optional
+        Device context of output. Default is current context.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        Drawn samples from the parameterized Fisher distribution.
+
+    Examples
+    --------
+    An example from Glantz[1], pp 47-40:
+
+    Two groups, children of diabetics (25 people) and children from people
+    without diabetes (25 controls). Fasting blood glucose was measured,
+    case group had a mean value of 86.1, controls had a mean value of
+    82.2. Standard deviations were 2.09 and 2.49 respectively. Are these
+    data consistent with the null hypothesis that the parents diabetic
+    status does not affect their children's blood glucose levels?
+    Calculating the F statistic from the data gives a value of 36.01.
+
+    Draw samples from the distribution:
+
+    >>> dfnum = 1. # between group degrees of freedom
+    >>> dfden = 48. # within groups degrees of freedom
+    >>> s = np.random.f(dfnum, dfden, 1000)
+
+    The lower bound for the top 1% of the samples is :
+
+    >>> np.sort(s)[-10]
+    7.61988120985 # random
+
+    So there is about a 1% chance that the F statistic will exceed 7.62,
+    the measured value is 36, so the null hypothesis is rejected at the 1%
+    level.
+    """
+    return _mx_nd_np.random.f(dfnum, dfden, size=size, ctx=ctx)
+
+
 def chisquare(df, size=None, dtype=None, ctx=None):
     r"""
     chisquare(df, size=None, dtype=None, ctx=None)
diff --git a/python/mxnet/symbol/numpy/random.py b/python/mxnet/symbol/numpy/random.py
index 7afe8a9..46be069 100644
--- a/python/mxnet/symbol/numpy/random.py
+++ b/python/mxnet/symbol/numpy/random.py
@@ -22,7 +22,7 @@ from . import _internal as _npi
 
 
 __all__ = ['randint', 'uniform', 'normal', 'multivariate_normal',
-           'logistic', 'gumbel', 'rayleigh',
+           'logistic', 'gumbel', 'rayleigh', 'f',
            'rand', 'shuffle', 'gamma', 'beta', 'chisquare', 'exponential', 'lognormal',
            'weibull', 'pareto', 'power']
 
@@ -568,6 +568,43 @@ def beta(a, b, size=None, dtype=None, ctx=None):
     return out.astype(dtype)
 
 
+def f(dfnum, dfden, size=None, ctx=None):
+    r"""Draw samples from an F distribution.
+
+    Samples are drawn from an F distribution with specified parameters,
+    `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
+    freedom in denominator), where both parameters must be greater than
+    zero.
+
+    The random variate of the F distribution (also known as the
+    Fisher distribution) is a continuous probability distribution
+    that arises in ANOVA tests, and is the ratio of two chi-square
+    variates.
+
+    Parameters
+    ----------
+    dfnum : float or _Symbol of floats
+        Degrees of freedom in numerator, must be > 0.
+    dfden : float or _Symbol of float
+        Degrees of freedom in denominator, must be > 0.
+    size : int or tuple of ints, optional
+        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
+        a single value is returned if ``dfnum`` and ``dfden`` are both scalars.
+        Otherwise, ``np.broadcast(dfnum, dfden).size`` samples are drawn.
+    ctx : Context, optional
+        Device context of output. Default is current context.
+
+    Returns
+    -------
+    out : _Symbol
+        Drawn samples from the parameterized Fisher distribution.
+    """
+    X = chisquare(df=dfnum, size=size, ctx=ctx)
+    Y = chisquare(df=dfden, size=size, ctx=ctx)
+    return (X * dfden) / (Y * dfnum)
+
+
 def chisquare(df, size=None, dtype=None, ctx=None):
     r"""
     chisquare(df, size=None, dtype=None, ctx=None)
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index b0ee3e5..b8e9343 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -3906,6 +3906,37 @@ def test_np_random_beta():
 
 @with_seed()
 @use_np
+def test_np_random_f():
+    class TestRandomF(HybridBlock):
+        def __init__(self, size=None):
+            super(TestRandomF, self).__init__()
+            self._size = size
+
+        def hybrid_forward(self, F, dfnum, dfden):
+            return F.np.random.f(dfnum, dfden, size=self._size)
+
+    shape_list = [(), (1,), (2, 3), (4, 0, 5), 6, (7, 8), None]
+    hybridize_list = [False, True]
+    df = np.array([1])
+    for [param_shape, hybridize] in itertools.product(shape_list,
+         hybridize_list):
+        if sys.version_info.major < 3 and param_shape == ():
+            continue
+        mx_df = df
+        np_df = mx_df.asnumpy()
+        test_random_f = TestRandomF(size=param_shape)
+        if hybridize:
+            test_random_f.hybridize()
+        np_out = _np.random.f(np_df, np_df, size=param_shape)
+        mx_out = test_random_f(mx_df, mx_df)
+        mx_out_imperative = mx.np.random.f(mx_df, mx_df, size=param_shape)
+
+        assert_almost_equal(np_out.shape, mx_out.shape)
+        assert_almost_equal(np_out.shape, mx_out_imperative.shape)
+
+
+@with_seed()
+@use_np
 def test_np_random_chisquare():
     class TestRandomChisquare(HybridBlock):
         def __init__(self, size=None, dtype=None, ctx=None):