You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by sb...@apache.org on 2017/12/25 11:46:46 UTC

[14/20] ignite git commit: IGNITE-7174: Local MLP

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/java/org/apache/ignite/ml/nn/SimpleMLPLocalBatchTrainerInput.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/nn/SimpleMLPLocalBatchTrainerInput.java b/modules/ml/src/test/java/org/apache/ignite/ml/nn/SimpleMLPLocalBatchTrainerInput.java
new file mode 100644
index 0000000..ca5fe07
--- /dev/null
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/nn/SimpleMLPLocalBatchTrainerInput.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.nn;
+
+import java.util.Random;
+import org.apache.ignite.lang.IgniteBiTuple;
+import org.apache.ignite.ml.math.Matrix;
+import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
+import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
+import org.apache.ignite.ml.nn.initializers.RandomInitializer;
+import org.apache.ignite.ml.util.Utils;
+
+/**
+ * Class for local batch training of {@link MultilayerPerceptron}.
+ *
+ * It is constructed from two matrices: one containing inputs of function to approximate and other containing ground truth
+ * values of this function for corresponding inputs.
+ *
+ * We fix batch size given by this input by some constant value.
+ */
+public class SimpleMLPLocalBatchTrainerInput implements LocalBatchTrainerInput<MultilayerPerceptron> {
+    /**
+     * Multilayer perceptron to be trained.
+     */
+    private final MultilayerPerceptron mlp;
+
+    /**
+     * Inputs stored as columns.
+     */
+    private Matrix inputs;
+
+    /**
+     * Ground truths stored as columns.
+     */
+    private final Matrix groundTruth;
+
+    /**
+     * Size of batch returned on each step.
+     */
+    private int batchSize;
+
+    /**
+     * Construct instance of this class.
+     *
+     * @param arch Architecture of multilayer perceptron.
+     * @param rnd Random numbers generator.
+     * @param inputs Inputs stored as columns.
+     * @param groundTruth Ground truth stored as columns.
+     * @param batchSize Size of batch returned on each step.
+     */
+    public SimpleMLPLocalBatchTrainerInput(MLPArchitecture arch, Random rnd, Matrix inputs, Matrix groundTruth, int batchSize) {
+        this.mlp = new MultilayerPerceptron(arch, new RandomInitializer(rnd));
+        this.inputs = inputs;
+        this.groundTruth = groundTruth;
+        this.batchSize = batchSize;
+    }
+
+    /** {@inheritDoc} */
+    @Override public IgniteBiTuple<Matrix, Matrix> getBatch() {
+        int inputRowSize = inputs.rowSize();
+        int outputRowSize = groundTruth.rowSize();
+
+        Matrix vectors = new DenseLocalOnHeapMatrix(inputRowSize, batchSize);
+        Matrix labels = new DenseLocalOnHeapMatrix(outputRowSize, batchSize);
+
+        int[] samples = Utils.selectKDistinct(inputs.columnSize(), batchSize);
+
+        for (int i = 0; i < batchSize; i++) {
+            vectors.assignColumn(i, inputs.getCol(samples[i]));
+            labels.assignColumn(i, groundTruth.getCol(samples[i]));
+        }
+
+        return new IgniteBiTuple<>(vectors, labels);
+    }
+
+    /** {@inheritDoc} */
+    @Override public MultilayerPerceptron mdl() {
+        return mlp;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/Mnist.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/Mnist.java b/modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/Mnist.java
new file mode 100644
index 0000000..cf959a5
--- /dev/null
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/Mnist.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.nn.performance;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.X;
+import org.apache.ignite.lang.IgniteBiTuple;
+import org.apache.ignite.ml.math.Matrix;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.nn.Activators;
+import org.apache.ignite.ml.nn.LossFunctions;
+import org.apache.ignite.ml.nn.MultilayerPerceptron;
+import org.apache.ignite.ml.nn.SimpleMLPLocalBatchTrainerInput;
+import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
+import org.apache.ignite.ml.nn.trainers.local.MLPLocalBatchTrainer;
+import org.apache.ignite.ml.nn.updaters.RPropUpdater;
+import org.apache.ignite.ml.trees.performance.ColumnDecisionTreeTrainerBenchmark;
+import org.apache.ignite.ml.util.MnistUtils;
+import org.junit.Test;
+
+import static org.apache.ignite.ml.math.VectorUtils.num2Vec;
+
+/**
+ * Various benchmarks for hand runs.
+ */
+public class Mnist {
+    /** Name of the property specifying path to training set images. */
+    private static final String PROP_TRAINING_IMAGES = "mnist.training.images";
+
+    /** Name of property specifying path to training set labels. */
+    private static final String PROP_TRAINING_LABELS = "mnist.training.labels";
+
+    /** Name of property specifying path to test set images. */
+    private static final String PROP_TEST_IMAGES = "mnist.test.images";
+
+    /** Name of property specifying path to test set labels. */
+    private static final String PROP_TEST_LABELS = "mnist.test.labels";
+
+    /**
+     * Run decision tree classifier on MNIST using bi-indexed cache as a storage for dataset.
+     * To run this test rename this method so it starts from 'test'.
+     *
+     * @throws IOException In case of loading MNIST dataset errors.
+     */
+    @Test
+    public void tstMNIST() throws IOException {
+        int samplesCntCnt = 60_000;
+        int featCnt = 28 * 28;
+        int hiddenNeuronsCnt = 100;
+
+        Properties props = loadMNISTProperties();
+
+        Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(props.getProperty(PROP_TRAINING_IMAGES),
+            props.getProperty(PROP_TRAINING_LABELS), new Random(123L), samplesCntCnt);
+
+        Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(props.getProperty(PROP_TEST_IMAGES),
+            props.getProperty(PROP_TEST_LABELS), new Random(123L), 10_000);
+
+        IgniteBiTuple<Matrix, Matrix> ds = createDataset(trainingMnistStream, samplesCntCnt, featCnt);
+        IgniteBiTuple<Matrix, Matrix> testDs = createDataset(testMnistStream, 10000, featCnt);
+
+        MLPArchitecture conf = new MLPArchitecture(featCnt).
+            withAddedLayer(hiddenNeuronsCnt, true, Activators.SIGMOID).
+            withAddedLayer(10, false, Activators.SIGMOID);
+
+        SimpleMLPLocalBatchTrainerInput input = new SimpleMLPLocalBatchTrainerInput(conf,
+            new Random(),
+            ds.get1(),
+            ds.get2(),
+            2000);
+
+        MultilayerPerceptron mdl = new MLPLocalBatchTrainer<>(LossFunctions.MSE,
+            () -> new RPropUpdater(0.1, 1.2, 0.5),
+            1E-7,
+            200).
+            train(input);
+
+        X.println("Training started");
+        long before = System.currentTimeMillis();
+
+        X.println("Training finished in " + (System.currentTimeMillis() - before));
+
+        Vector predicted = mdl.apply(testDs.get1()).foldColumns(VectorUtils::vec2Num);
+        Vector truth = testDs.get2().foldColumns(VectorUtils::vec2Num);
+
+        Tracer.showAscii(truth);
+        Tracer.showAscii(predicted);
+    }
+
+    /** */
+    private IgniteBiTuple<Matrix, Matrix> createDataset(Stream<DenseLocalOnHeapVector> s, int samplesCnt, int featCnt) {
+        Matrix vectors = new DenseLocalOnHeapMatrix(featCnt, samplesCnt);
+        Matrix labels = new DenseLocalOnHeapMatrix(10, samplesCnt);
+        List<DenseLocalOnHeapVector> sc = s.collect(Collectors.toList());
+
+        for (int i = 0; i < samplesCnt; i++) {
+            DenseLocalOnHeapVector v = sc.get(i);
+            vectors.assignColumn(i, v.viewPart(0, featCnt));
+            labels.assignColumn(i, num2Vec((int)v.getX(featCnt), 10));
+        }
+
+        return new IgniteBiTuple<>(vectors, labels);
+    }
+
+    /** Load properties for MNIST tests. */
+    private static Properties loadMNISTProperties() throws IOException {
+        Properties res = new Properties();
+
+        InputStream is = ColumnDecisionTreeTrainerBenchmark.class.getClassLoader().getResourceAsStream("manualrun/trees/columntrees.manualrun.properties");
+
+        res.load(is);
+
+        return res;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/java/org/apache/ignite/ml/regressions/OLSMultipleLinearRegressionModelTest.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/regressions/OLSMultipleLinearRegressionModelTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/regressions/OLSMultipleLinearRegressionModelTest.java
index 37c972c..74d5524 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/regressions/OLSMultipleLinearRegressionModelTest.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/regressions/OLSMultipleLinearRegressionModelTest.java
@@ -48,6 +48,6 @@ public class OLSMultipleLinearRegressionModelTest {
         OLSMultipleLinearRegressionModel mdl = trainer.train(data);
 
         TestUtils.assertEquals(new double[] {0d, 0d, 0d, 0d, 0d, 0d},
-            val.minus(mdl.predict(val)).getStorage().data(), 1e-13);
+            val.minus(mdl.apply(val)).getStorage().data(), 1e-13);
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/java/org/apache/ignite/ml/trees/ColumnDecisionTreeTrainerTest.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/trees/ColumnDecisionTreeTrainerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/trees/ColumnDecisionTreeTrainerTest.java
index 9e81bea..b090f43 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/trees/ColumnDecisionTreeTrainerTest.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/trees/ColumnDecisionTreeTrainerTest.java
@@ -184,8 +184,8 @@ public class ColumnDecisionTreeTrainerTest extends BaseDecisionTreeTest {
         byRegion.keySet().forEach(k -> {
             LabeledVectorDouble sp = byRegion.get(k).get(0);
             Tracer.showAscii(sp.features());
-            X.println("Actual and predicted vectors [act=" + sp.label() + " " + ", pred=" + mdl.predict(sp.features()) + "]");
-            assert mdl.predict(sp.features()) == sp.doubleLabel();
+            X.println("Actual and predicted vectors [act=" + sp.label() + " " + ", pred=" + mdl.apply(sp.features()) + "]");
+            assert mdl.apply(sp.features()) == sp.doubleLabel();
         });
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/java/org/apache/ignite/ml/trees/performance/ColumnDecisionTreeTrainerBenchmark.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/trees/performance/ColumnDecisionTreeTrainerBenchmark.java b/modules/ml/src/test/java/org/apache/ignite/ml/trees/performance/ColumnDecisionTreeTrainerBenchmark.java
index 524a8ad..a72dec2 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/trees/performance/ColumnDecisionTreeTrainerBenchmark.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/trees/performance/ColumnDecisionTreeTrainerBenchmark.java
@@ -274,8 +274,8 @@ public class ColumnDecisionTreeTrainerBenchmark extends BaseDecisionTreeTest {
         byRegion.keySet().forEach(k -> {
             LabeledVectorDouble sp = byRegion.get(k).get(0);
             Tracer.showAscii(sp.features());
-            X.println("Predicted value and label [pred=" + mdl.predict(sp.features()) + ", label=" + sp.doubleLabel() + "]");
-            assert mdl.predict(sp.features()) == sp.doubleLabel();
+            X.println("Predicted value and label [pred=" + mdl.apply(sp.features()) + ", label=" + sp.doubleLabel() + "]");
+            assert mdl.apply(sp.features()) == sp.doubleLabel();
         });
     }
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/ml/src/test/resources/manualrun/trees/columntrees.manualrun.properties
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/resources/manualrun/trees/columntrees.manualrun.properties b/modules/ml/src/test/resources/manualrun/trees/columntrees.manualrun.properties
index d9574f3..2fd77ed 100644
--- a/modules/ml/src/test/resources/manualrun/trees/columntrees.manualrun.properties
+++ b/modules/ml/src/test/resources/manualrun/trees/columntrees.manualrun.properties
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 # Paths to mnist dataset parts.
-mnist.training.images=/path/to/train-images-idx3-ubyte
-mnist.training.labels=/path/to/train-labels-idx1-ubyte
-mnist.test.images=/path/to/t10k-images-idx3-ubyte
-mnist.test.labels=/path/to/t10k-labels-idx1-ubyte
+mnist.training.images=/path/to/mnist/train-images-idx3-ubyte
+mnist.training.labels=/path/to/mnist/train-labels-idx1-ubyte
+mnist.test.images=/path/to/mnist/t10k-images-idx3-ubyte
+mnist.test.labels=/path/to/mnist/t10k-labels-idx1-ubyte

http://git-wip-us.apache.org/repos/asf/ignite/blob/e4f19215/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
----------------------------------------------------------------------
diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
index f530300..f9117f4 100644
--- a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
+++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
@@ -143,7 +143,7 @@ class SplitDataGenerator<V extends Vector> {
 
         DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catFeaturesInfo));
 
-        byRegion.keySet().forEach(k -> mdl.predict(byRegion.get(k).get(0).features()));
+        byRegion.keySet().forEach(k -> mdl.apply(byRegion.get(k).get(0).features()));
     }
 
     /**