You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ch...@apache.org on 2019/02/18 00:35:05 UTC

[ignite] branch master updated: IGNITE-11261: [ML] Flaky test(testNaiveBaggingLogRegression)

This is an automated email from the ASF dual-hosted git repository.

chief pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 00701db  IGNITE-11261: [ML] Flaky test(testNaiveBaggingLogRegression)
00701db is described below

commit 00701db89e95b4052d8b28a77829417627e99e04
Author: Artem Malykh <am...@gmail.com>
AuthorDate: Mon Feb 18 03:34:57 2019 +0300

    IGNITE-11261: [ML] Flaky test(testNaiveBaggingLogRegression)
    
    This closes #6069
---
 .../java/org/apache/ignite/ml/nn/MLPTrainer.java   | 16 +++++++---
 .../logistic/LogisticRegressionSGDTrainer.java     |  5 ++--
 .../ml/trainers/AdaptableDatasetTrainer.java       | 16 +++++-----
 .../test/java/org/apache/ignite/ml/TestUtils.java  | 28 ++++++++++++++++-
 .../ml/composition/CompositionTestSuite.java       |  1 +
 .../ml/composition/{ => bagging}/BaggingTest.java  | 35 ++++++++++++++++++----
 6 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
index 08fb07f..dec0fb7 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
@@ -17,10 +17,6 @@
 
 package org.apache.ignite.ml.nn;
 
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
 import org.apache.ignite.ml.composition.CompositionUtils;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
@@ -28,6 +24,7 @@ import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilde
 import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
 import org.apache.ignite.ml.math.functions.IgniteDifferentiableVectorToDoubleFunction;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.primitives.matrix.Matrix;
@@ -40,6 +37,11 @@ import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
 import org.apache.ignite.ml.trainers.MultiLabelDatasetTrainer;
 import org.apache.ignite.ml.util.Utils;
 
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
 /**
  * Multilayer perceptron trainer based on partition based {@link Dataset}.
  *
@@ -378,4 +380,10 @@ public class MLPTrainer<P extends Serializable> extends MultiLabelDatasetTrainer
 
         return res;
     }
+
+    /** {@inheritDoc} */
+    @Override public MLPTrainer<P> withEnvironmentBuilder(
+        LearningEnvironmentBuilder envBuilder) {
+        return (MLPTrainer<P>)super.withEnvironmentBuilder(envBuilder);
+    }
 }
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
index 4fcef08..345a885 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
@@ -17,7 +17,6 @@
 
 package org.apache.ignite.ml.regressions.logistic;
 
-import java.util.Arrays;
 import org.apache.ignite.ml.composition.CompositionUtils;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
@@ -39,6 +38,8 @@ import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
 import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 import org.jetbrains.annotations.NotNull;
 
+import java.util.Arrays;
+
 /**
  * Trainer of the logistic regression model based on stochastic gradient descent algorithm.
  */
@@ -103,7 +104,7 @@ public class LogisticRegressionSGDTrainer extends SingleLabelDatasetTrainer<Logi
             batchSize,
             locIterations,
             seed
-        );
+        ).withEnvironmentBuilder(envBuilder);
 
         IgniteBiFunction<K, V, double[]> lbExtractorWrapper = (k, v) -> new double[] {lbExtractor.apply(k, v)};
         MultilayerPerceptron mlp;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
index 3e48a4a..6e8fd1e 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
@@ -96,9 +96,10 @@ public class AdaptableDatasetTrainer<I, O, IW, OW, M extends IgniteModel<IW, OW>
     /** {@inheritDoc} */
     @Override public <K, V> AdaptableDatasetModel<I, O, IW, OW, M> fit(DatasetBuilder<K, V> datasetBuilder,
         FeatureLabelExtractor<K, V, L> extractor) {
-        M fit = wrapped.fit(
-            datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
-            extractor.andThen(afterExtractor));
+        M fit = wrapped.
+            withEnvironmentBuilder(envBuilder)
+            .fit(datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
+                extractor.andThen(afterExtractor));
 
         return new AdaptableDatasetModel<>(before, fit, after);
     }
@@ -112,10 +113,11 @@ public class AdaptableDatasetTrainer<I, O, IW, OW, M extends IgniteModel<IW, OW>
     @Override protected <K, V> AdaptableDatasetModel<I, O, IW, OW, M> updateModel(
         AdaptableDatasetModel<I, O, IW, OW, M> mdl, DatasetBuilder<K, V> datasetBuilder,
         FeatureLabelExtractor<K, V, L> extractor) {
-        M updated = wrapped.updateModel(
-            mdl.innerModel(),
-            datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
-            extractor.andThen(afterExtractor));
+        M updated = wrapped.withEnvironmentBuilder(envBuilder)
+            .updateModel(
+                mdl.innerModel(),
+                datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
+                extractor.andThen(afterExtractor));
 
         return mdl.withInnerModel(updated);
     }
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java b/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
index 06e9ef2..052fc96 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
@@ -17,7 +17,6 @@
 
 package org.apache.ignite.ml;
 
-import java.util.stream.IntStream;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
 import org.apache.ignite.ml.math.primitives.matrix.Matrix;
@@ -26,6 +25,8 @@ import org.apache.ignite.ml.trainers.DatasetTrainer;
 import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
 import org.junit.Assert;
 
+import java.util.stream.IntStream;
+
 import static org.junit.Assert.assertTrue;
 
 /** */
@@ -171,6 +172,31 @@ public class TestUtils {
     }
 
     /**
+     * Verifies that two vectors are equal.
+     *
+     * @param exp Expected vector.
+     * @param observed Actual vector.
+     */
+    public static void assertEquals(Vector exp, Vector observed, double eps) {
+        Assert.assertNotNull("Observed should not be null", observed);
+
+        if (exp.size() != observed.size()) {
+            String msgBuff = "Observed has incorrect dimensions." +
+                "\nobserved is " + observed.size() +
+                " x " + observed.size();
+
+            Assert.fail(msgBuff);
+        }
+
+        for (int i = 0; i < exp.size(); ++i) {
+            double eij = exp.getX(i);
+            double aij = observed.getX(i);
+
+            Assert.assertEquals(eij, aij, eps);
+        }
+    }
+
+    /**
      * Verifies that two double arrays are close (sup norm).
      *
      * @param msg The identifying message for the assertion error.
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
index 87d56cd..bf0943c 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.composition;
 
+import org.apache.ignite.ml.composition.bagging.BaggingTest;
 import org.apache.ignite.ml.composition.boosting.GDBTrainerTest;
 import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregatorTest;
 import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregatorTest;
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
similarity index 85%
rename from modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java
rename to modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
index 7a84b64..1fc218c 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
@@ -15,15 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.ignite.ml.composition;
+package org.apache.ignite.ml.composition.bagging;
 
-import java.util.Arrays;
-import java.util.Map;
 import org.apache.ignite.ml.IgniteModel;
 import org.apache.ignite.ml.TestUtils;
 import org.apache.ignite.ml.common.TrainerTest;
-import org.apache.ignite.ml.composition.bagging.BaggedModel;
-import org.apache.ignite.ml.composition.bagging.BaggedTrainer;
+import org.apache.ignite.ml.composition.combinators.parallel.ModelsParallelComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator;
 import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator;
 import org.apache.ignite.ml.dataset.Dataset;
@@ -38,16 +35,38 @@ import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpda
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator;
 import org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel;
 import org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer;
+import org.apache.ignite.ml.trainers.AdaptableDatasetModel;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
 import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
 import org.apache.ignite.ml.trainers.TrainerTransformers;
 import org.junit.Test;
 
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Tests for bagging algorithm.
  */
 public class BaggingTest extends TrainerTest {
     /**
+     * Dependency of weights of first model in ensemble after training in
+     * {@link BaggingTest#testNaiveBaggingLogRegression()}. This dependency is tested to ensure that it is
+     * fully determined by provided seeds.
+     */
+    private static Map<Integer, Vector> firstModelWeights;
+
+    static {
+        firstModelWeights = new HashMap<>();
+
+        firstModelWeights.put(1, VectorUtils.of(-0.14721735583126058, 4.366377931980097));
+        firstModelWeights.put(2, VectorUtils.of(-1.0092940937477968, 1.2950461550870134));
+        firstModelWeights.put(3, VectorUtils.of(-5.5345231104301655, -0.7554216668724918));
+        firstModelWeights.put(4, VectorUtils.of(0.136489632011201, 1.0937407007786915));
+        firstModelWeights.put(13, VectorUtils.of(-0.27321382073998685, 1.1199411864901687));
+    }
+
+    /**
      * Test that count of entries in context is equal to initial dataset size * subsampleRatio.
      */
     @Test
@@ -81,7 +100,7 @@ public class BaggingTest extends TrainerTest {
 
         BaggedTrainer<Double> baggedTrainer = TrainerTransformers.makeBagged(
             trainer,
-            10,
+            7,
             0.7,
             2,
             2,
@@ -95,6 +114,10 @@ public class BaggingTest extends TrainerTest {
             (k, v) -> v[0]
         );
 
+        Vector weights = ((LogisticRegressionModel)((AdaptableDatasetModel)((ModelsParallelComposition)((AdaptableDatasetModel)mdl
+            .model()).innerModel()).submodels().get(0)).innerModel()).weights();
+
+        TestUtils.assertEquals(firstModelWeights.get(parts), weights, 0.0);
         TestUtils.assertEquals(0, mdl.predict(VectorUtils.of(100, 10)), PRECISION);
         TestUtils.assertEquals(1, mdl.predict(VectorUtils.of(10, 100)), PRECISION);
     }