You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ch...@apache.org on 2019/02/18 00:35:05 UTC
[ignite] branch master updated: IGNITE-11261: [ML] Flaky
test(testNaiveBaggingLogRegression)
This is an automated email from the ASF dual-hosted git repository.
chief pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 00701db IGNITE-11261: [ML] Flaky test(testNaiveBaggingLogRegression)
00701db is described below
commit 00701db89e95b4052d8b28a77829417627e99e04
Author: Artem Malykh <am...@gmail.com>
AuthorDate: Mon Feb 18 03:34:57 2019 +0300
IGNITE-11261: [ML] Flaky test(testNaiveBaggingLogRegression)
This closes #6069
---
.../java/org/apache/ignite/ml/nn/MLPTrainer.java | 16 +++++++---
.../logistic/LogisticRegressionSGDTrainer.java | 5 ++--
.../ml/trainers/AdaptableDatasetTrainer.java | 16 +++++-----
.../test/java/org/apache/ignite/ml/TestUtils.java | 28 ++++++++++++++++-
.../ml/composition/CompositionTestSuite.java | 1 +
.../ml/composition/{ => bagging}/BaggingTest.java | 35 ++++++++++++++++++----
6 files changed, 81 insertions(+), 20 deletions(-)
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
index 08fb07f..dec0fb7 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
@@ -17,10 +17,6 @@
package org.apache.ignite.ml.nn;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
import org.apache.ignite.ml.composition.CompositionUtils;
import org.apache.ignite.ml.dataset.Dataset;
import org.apache.ignite.ml.dataset.DatasetBuilder;
@@ -28,6 +24,7 @@ import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilde
import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder;
import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
import org.apache.ignite.ml.math.functions.IgniteDifferentiableVectorToDoubleFunction;
import org.apache.ignite.ml.math.functions.IgniteFunction;
import org.apache.ignite.ml.math.primitives.matrix.Matrix;
@@ -40,6 +37,11 @@ import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
import org.apache.ignite.ml.trainers.MultiLabelDatasetTrainer;
import org.apache.ignite.ml.util.Utils;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
/**
* Multilayer perceptron trainer based on partition based {@link Dataset}.
*
@@ -378,4 +380,10 @@ public class MLPTrainer<P extends Serializable> extends MultiLabelDatasetTrainer
return res;
}
+
+ /** {@inheritDoc} */
+ @Override public MLPTrainer<P> withEnvironmentBuilder(
+ LearningEnvironmentBuilder envBuilder) {
+ return (MLPTrainer<P>)super.withEnvironmentBuilder(envBuilder);
+ }
}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
index 4fcef08..345a885 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainer.java
@@ -17,7 +17,6 @@
package org.apache.ignite.ml.regressions.logistic;
-import java.util.Arrays;
import org.apache.ignite.ml.composition.CompositionUtils;
import org.apache.ignite.ml.dataset.Dataset;
import org.apache.ignite.ml.dataset.DatasetBuilder;
@@ -39,6 +38,8 @@ import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
import org.jetbrains.annotations.NotNull;
+import java.util.Arrays;
+
/**
* Trainer of the logistic regression model based on stochastic gradient descent algorithm.
*/
@@ -103,7 +104,7 @@ public class LogisticRegressionSGDTrainer extends SingleLabelDatasetTrainer<Logi
batchSize,
locIterations,
seed
- );
+ ).withEnvironmentBuilder(envBuilder);
IgniteBiFunction<K, V, double[]> lbExtractorWrapper = (k, v) -> new double[] {lbExtractor.apply(k, v)};
MultilayerPerceptron mlp;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
index 3e48a4a..6e8fd1e 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/trainers/AdaptableDatasetTrainer.java
@@ -96,9 +96,10 @@ public class AdaptableDatasetTrainer<I, O, IW, OW, M extends IgniteModel<IW, OW>
/** {@inheritDoc} */
@Override public <K, V> AdaptableDatasetModel<I, O, IW, OW, M> fit(DatasetBuilder<K, V> datasetBuilder,
FeatureLabelExtractor<K, V, L> extractor) {
- M fit = wrapped.fit(
- datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
- extractor.andThen(afterExtractor));
+ M fit = wrapped.
+ withEnvironmentBuilder(envBuilder)
+ .fit(datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
+ extractor.andThen(afterExtractor));
return new AdaptableDatasetModel<>(before, fit, after);
}
@@ -112,10 +113,11 @@ public class AdaptableDatasetTrainer<I, O, IW, OW, M extends IgniteModel<IW, OW>
@Override protected <K, V> AdaptableDatasetModel<I, O, IW, OW, M> updateModel(
AdaptableDatasetModel<I, O, IW, OW, M> mdl, DatasetBuilder<K, V> datasetBuilder,
FeatureLabelExtractor<K, V, L> extractor) {
- M updated = wrapped.updateModel(
- mdl.innerModel(),
- datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
- extractor.andThen(afterExtractor));
+ M updated = wrapped.withEnvironmentBuilder(envBuilder)
+ .updateModel(
+ mdl.innerModel(),
+ datasetBuilder.withUpstreamTransformer(upstreamTransformerBuilder),
+ extractor.andThen(afterExtractor));
return mdl.withInnerModel(updated);
}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java b/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
index 06e9ef2..052fc96 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/TestUtils.java
@@ -17,7 +17,6 @@
package org.apache.ignite.ml;
-import java.util.stream.IntStream;
import org.apache.ignite.ml.dataset.DatasetBuilder;
import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
import org.apache.ignite.ml.math.primitives.matrix.Matrix;
@@ -26,6 +25,8 @@ import org.apache.ignite.ml.trainers.DatasetTrainer;
import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
import org.junit.Assert;
+import java.util.stream.IntStream;
+
import static org.junit.Assert.assertTrue;
/** */
@@ -171,6 +172,31 @@ public class TestUtils {
}
/**
+ * Verifies that two vectors are equal.
+ *
+ * @param exp Expected vector.
+ * @param observed Actual vector.
+ */
+ public static void assertEquals(Vector exp, Vector observed, double eps) {
+ Assert.assertNotNull("Observed should not be null", observed);
+
+ if (exp.size() != observed.size()) {
+ String msgBuff = "Observed has incorrect dimensions." +
+ "\nobserved is " + observed.size() +
+ " x " + observed.size();
+
+ Assert.fail(msgBuff);
+ }
+
+ for (int i = 0; i < exp.size(); ++i) {
+ double eij = exp.getX(i);
+ double aij = observed.getX(i);
+
+ Assert.assertEquals(eij, aij, eps);
+ }
+ }
+
+ /**
* Verifies that two double arrays are close (sup norm).
*
* @param msg The identifying message for the assertion error.
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
index 87d56cd..bf0943c 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/composition/CompositionTestSuite.java
@@ -17,6 +17,7 @@
package org.apache.ignite.ml.composition;
+import org.apache.ignite.ml.composition.bagging.BaggingTest;
import org.apache.ignite.ml.composition.boosting.GDBTrainerTest;
import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregatorTest;
import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregatorTest;
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
similarity index 85%
rename from modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java
rename to modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
index 7a84b64..1fc218c 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/composition/BaggingTest.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/composition/bagging/BaggingTest.java
@@ -15,15 +15,12 @@
* limitations under the License.
*/
-package org.apache.ignite.ml.composition;
+package org.apache.ignite.ml.composition.bagging;
-import java.util.Arrays;
-import java.util.Map;
import org.apache.ignite.ml.IgniteModel;
import org.apache.ignite.ml.TestUtils;
import org.apache.ignite.ml.common.TrainerTest;
-import org.apache.ignite.ml.composition.bagging.BaggedModel;
-import org.apache.ignite.ml.composition.bagging.BaggedTrainer;
+import org.apache.ignite.ml.composition.combinators.parallel.ModelsParallelComposition;
import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator;
import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator;
import org.apache.ignite.ml.dataset.Dataset;
@@ -38,16 +35,38 @@ import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpda
import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator;
import org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel;
import org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer;
+import org.apache.ignite.ml.trainers.AdaptableDatasetModel;
import org.apache.ignite.ml.trainers.DatasetTrainer;
import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
import org.apache.ignite.ml.trainers.TrainerTransformers;
import org.junit.Test;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
/**
* Tests for bagging algorithm.
*/
public class BaggingTest extends TrainerTest {
/**
+ * Dependency of weights of first model in ensemble after training in
+ * {@link BaggingTest#testNaiveBaggingLogRegression()}. This dependency is tested to ensure that it is
+ * fully determined by provided seeds.
+ */
+ private static Map<Integer, Vector> firstModelWeights;
+
+ static {
+ firstModelWeights = new HashMap<>();
+
+ firstModelWeights.put(1, VectorUtils.of(-0.14721735583126058, 4.366377931980097));
+ firstModelWeights.put(2, VectorUtils.of(-1.0092940937477968, 1.2950461550870134));
+ firstModelWeights.put(3, VectorUtils.of(-5.5345231104301655, -0.7554216668724918));
+ firstModelWeights.put(4, VectorUtils.of(0.136489632011201, 1.0937407007786915));
+ firstModelWeights.put(13, VectorUtils.of(-0.27321382073998685, 1.1199411864901687));
+ }
+
+ /**
* Test that count of entries in context is equal to initial dataset size * subsampleRatio.
*/
@Test
@@ -81,7 +100,7 @@ public class BaggingTest extends TrainerTest {
BaggedTrainer<Double> baggedTrainer = TrainerTransformers.makeBagged(
trainer,
- 10,
+ 7,
0.7,
2,
2,
@@ -95,6 +114,10 @@ public class BaggingTest extends TrainerTest {
(k, v) -> v[0]
);
+ Vector weights = ((LogisticRegressionModel)((AdaptableDatasetModel)((ModelsParallelComposition)((AdaptableDatasetModel)mdl
+ .model()).innerModel()).submodels().get(0)).innerModel()).weights();
+
+ TestUtils.assertEquals(firstModelWeights.get(parts), weights, 0.0);
TestUtils.assertEquals(0, mdl.predict(VectorUtils.of(100, 10)), PRECISION);
TestUtils.assertEquals(1, mdl.predict(VectorUtils.of(10, 100)), PRECISION);
}