You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/12/07 07:49:19 UTC
[GitHub] anirudh2290 closed pull request #13498: Fix flaky test test_random:test_randint_generator

anirudh2290 closed pull request #13498: Fix flaky test test_random:test_randint_generator
URL: https://github.com/apache/incubator-mxnet/pull/13498
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index 26f7762ca9b..0a4d17dc266 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -1849,12 +1849,12 @@ def chi_square_check(generator, buckets, probs, nsamples=1000000):
 
     If the generator is continuous, the buckets should contain tuples of (range_min, range_max) \
     and the probs should be the corresponding ideal probability within the specific ranges. \
-    Otherwise, the buckets should be the possible output of the discrete distribution and the \
+    Otherwise, the buckets should contain all the possible values generated over the discrete distribution and the \
     probs should be groud-truth probability.
 
     Usually the user is required to specify the probs parameter.
 
-    After obtatining the p value, we could further use the standard p > 0.05 threshold to get \
+    After obtaining the p value, we could further use the standard p > 0.05 (alpha) threshold to get \
     the final result.
 
     Examples::
@@ -1906,7 +1906,6 @@ def chi_square_check(generator, buckets, probs, nsamples=1000000):
             buckets_npy[i * 2 + 1] = buckets[i][1]
     else:
         continuous_dist = False
-        buckets_npy = np.array(buckets)
     expected_freq = (nsamples * np.array(probs, dtype=np.float32)).astype(np.int32)
     if continuous_dist:
         sample_bucket_ids = np.searchsorted(buckets_npy, samples, side='right')
@@ -1923,7 +1922,7 @@ def chi_square_check(generator, buckets, probs, nsamples=1000000):
     _, p = ss.chisquare(f_obs=obs_freq, f_exp=expected_freq)
     return p, obs_freq, expected_freq
 
-def verify_generator(generator, buckets, probs, nsamples=1000000, nrepeat=5, success_rate=0.15):
+def verify_generator(generator, buckets, probs, nsamples=1000000, nrepeat=5, success_rate=0.25, alpha=0.05):
     """Verify whether the generator is correct using chi-square testing.
 
     The test is repeated for "nrepeat" times and we check if the success rate is
@@ -1946,6 +1945,8 @@ def verify_generator(generator, buckets, probs, nsamples=1000000, nrepeat=5, suc
         The times to repeat the test
     success_rate: float
         The desired success rate
+    alpha: float
+        The desired threshold for type-I error i.e. when a true null hypothesis is rejected
 
     Returns
     -------
@@ -1961,7 +1962,7 @@ def verify_generator(generator, buckets, probs, nsamples=1000000, nrepeat=5, suc
         cs_ret_l.append(cs_ret)
         obs_freq_l.append(obs_freq)
         expected_freq_l.append(expected_freq)
-    success_num = (np.array(cs_ret_l) > 0.05).sum()
+    success_num = (np.array(cs_ret_l) > alpha).sum()
     if success_num < nrepeat * success_rate:
         raise AssertionError("Generator test fails, Chi-square p=%s, obs_freq=%s, expected_freq=%s."
                              "\nbuckets=%s, probs=%s"
diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
index 3026d31c0f9..405602f073b 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -860,25 +860,26 @@ def test_randint_extremes():
     assert a>=50000000 and a<=50000010
 
 @with_seed()
-@unittest.skip("Flaky test: https://github.com/apache/incubator-mxnet/issues/13446")
 def test_randint_generator():
     ctx = mx.context.current_context()
     for dtype in ['int32', 'int64']:
-        for low, high in [(50000000, 50001000),(-50000000,-9900),(-500,199),(-2147483647,2147483647)]:
+        for low, high in [(50000000, 50001000),(-50000100,-50000000),(-500,199)]:
             scale = high - low
             buckets, probs = gen_buckets_probs_with_ppf(lambda x: ss.uniform.ppf(x, loc=low, scale=scale), 5)
             # Quantize bucket boundaries to reflect the actual dtype and adjust probs accordingly
             buckets = np.array(buckets, dtype=dtype).tolist()
             probs = [(buckets[i][1] - buckets[i][0]) / float(scale) for i in range(5)]
             generator_mx = lambda x: mx.nd.random.randint(low, high, shape=x, ctx=ctx, dtype=dtype).asnumpy()
-            verify_generator(generator=generator_mx, buckets=buckets, probs=probs)
+            verify_generator(generator=generator_mx, buckets=buckets, probs=probs, nrepeat=100)
+            # Scipy uses alpha = 0.01 for testing discrete distribution generator but we are using default alpha=0.05 (higher threshold ensures robustness)
+            # Refer - https://github.com/scipy/scipy/blob/9f12af697763fb5f9767d5cb1280ce62456a3974/scipy/stats/tests/test_discrete_basic.py#L45
             generator_mx_same_seed = \
                 lambda x: np.concatenate(
                     [mx.nd.random.randint(low, high, shape=x // 10, ctx=ctx, dtype=dtype).asnumpy()
                         for _ in range(10)])
-            verify_generator(generator=generator_mx_same_seed, buckets=buckets, probs=probs)
+            verify_generator(generator=generator_mx_same_seed, buckets=buckets, probs=probs, nrepeat=100)
 
-with_seed()
+@with_seed()
 def test_randint_without_dtype():
     a = mx.nd.random.randint(low=50000000, high=50000010, ctx=mx.context.current_context())
     assert(a.dtype, 'int32')


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services