You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ch...@apache.org on 2019/02/01 13:54:08 UTC
[ignite] branch master updated: IGNITE-10793: [ML] Create
comprehensive example for dataset generators
This is an automated email from the ASF dual-hosted git repository.
chief pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 9a2fb4c IGNITE-10793: [ML] Create comprehensive example for dataset generators
9a2fb4c is described below
commit 9a2fb4c18ea2a94baeb335ffebeaa109691dd199
Author: Alexey Platonov <ap...@gmail.com>
AuthorDate: Fri Feb 1 16:53:48 2019 +0300
IGNITE-10793: [ML] Create comprehensive example for dataset generators
This closes #5999
---
.../ParametricVectorGeneratorExample.java | 65 ++++++++++
.../util/generators/StandardGeneratorsExample.java | 75 +++++++++++
.../generators/VectorGeneratorFamilyExample.java | 65 ++++++++++
.../VectorGeneratorPrimitivesExample.java | 97 ++++++++++++++
.../examples/ml/util/generators/package-info.java | 22 ++++
.../java/org/apache/ignite/ml/math/Tracer.java | 141 ++++++++++++++++++++-
.../apache/ignite/ml/math/d3-dataset-template.html | 112 ++++++++++++++++
.../org/apache/ignite/ml/IgniteMLTestSuite.java | 2 +
.../generators/DataStreamGeneratorTestSuite.java | 49 +++++++
9 files changed, 622 insertions(+), 6 deletions(-)
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/ParametricVectorGeneratorExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/ParametricVectorGeneratorExample.java
new file mode 100644
index 0000000..217bb36
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/ParametricVectorGeneratorExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.util.generators;
+
+import java.io.IOException;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducer;
+import org.apache.ignite.ml.util.generators.primitives.vector.ParametricVectorGenerator;
+
+/**
+ * Examples of using {@link ParametricVectorGenerator} for generating two dimensional data.
+ * {@link ParametricVectorGenerator} allows to create surfaces in N-dinensional spaces where each
+ * dimension depends on one parameter 't'. In such generator just one random producer is used, it
+ * defines a set of values for parameter 't'.
+ */
+public class ParametricVectorGeneratorExample {
+ /**
+ * Run example.
+ *
+ * @param args Args.
+ */
+ public static void main(String... args) throws IOException {
+ // Example of Archimedean spiral.
+ DataStreamGenerator spiral = new ParametricVectorGenerator(
+ new UniformRandomProducer(-50, 50), //'t' will be in [-50, 50] range
+ t -> Math.cos(Math.abs(t)) * Math.abs(t),
+ t -> Math.sin(Math.abs(t)) * Math.abs(t)
+ ).asDataStream();
+
+ Tracer.showClassificationDatasetHtml("Spiral", spiral, 20000, 0, 1, false);
+
+ // Example of heart shape.
+ DataStreamGenerator heart = new ParametricVectorGenerator(new UniformRandomProducer(-50, 50),
+ t -> 16 * Math.pow(Math.sin(t), 3),
+ t -> 13 * Math.cos(t) - 5 * Math.cos(2 * t) - 2 * Math.cos(3 * t) - Math.cos(4 * t)
+ ).asDataStream();
+
+ Tracer.showClassificationDatasetHtml("Heart", heart, 2000, 0, 1, false);
+
+ // Example of butterfly-like shape.
+ DataStreamGenerator butterfly = new ParametricVectorGenerator(
+ new UniformRandomProducer(-100, 100), //'t' will be in [-100, 100] range
+ t -> 10 * Math.sin(t) * (Math.exp(Math.cos(t)) - 2 * Math.cos(4 * t) - Math.pow(Math.sin(t / 12), 5)),
+ t -> 10 * Math.cos(t) * (Math.exp(Math.cos(t)) - 2 * Math.cos(4 * t) - Math.pow(Math.sin(t / 12), 5))
+ ).asDataStream();
+
+ Tracer.showClassificationDatasetHtml("Butterfly", butterfly, 2000, 0, 1, false);
+ }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/StandardGeneratorsExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/StandardGeneratorsExample.java
new file mode 100644
index 0000000..65ff98c
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/StandardGeneratorsExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.util.generators;
+
+import java.io.IOException;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.standard.GaussianMixtureDataStream;
+import org.apache.ignite.ml.util.generators.standard.RegressionDataStream;
+import org.apache.ignite.ml.util.generators.standard.RingsDataStream;
+import org.apache.ignite.ml.util.generators.standard.TwoSeparableClassesDataStream;
+
+/**
+ * Examples of using standard dataset generators. Standard dataset generator represents a
+ * toy datasets that can be used for algorithms testing.
+ */
+public class StandardGeneratorsExample {
+ /**
+ * Run example.
+ *
+ * @param args Args.
+ */
+ public static void main(String... args) throws IOException {
+ // Constructs a set of gaussians with different mean and variance values where each gaussian represents
+ // a unique class.
+ GaussianMixtureDataStream gaussianMixture = new GaussianMixtureDataStream.Builder()
+ // Variance vector should be two dimensional because there are two dimensions.
+ .add(VectorUtils.of(0., 0.), VectorUtils.of(1, 0.1))
+ .add(VectorUtils.of(0., -10.), VectorUtils.of(2, 0.1))
+ .add(VectorUtils.of(0., -20.), VectorUtils.of(4, 0.1))
+ .add(VectorUtils.of(0., 10.), VectorUtils.of(0.05, 0.1))
+ .add(VectorUtils.of(0., 20.), VectorUtils.of(0.025, 0.1))
+ .add(VectorUtils.of(-10., 0.), VectorUtils.of(0.1, 2))
+ .add(VectorUtils.of(-20., 0.), VectorUtils.of(0.1, 4))
+ .add(VectorUtils.of(10., 0.), VectorUtils.of(0.1, 0.05))
+ .add(VectorUtils.of(20., 0.), VectorUtils.of(0.1, 0.025))
+ .build();
+
+ Tracer.showClassificationDatasetHtml("Gaussian mixture", gaussianMixture, 2500, 0, 1, true);
+
+ // A set of nested rings where each ring represents a class.
+ RingsDataStream ringsDataStream = new RingsDataStream(7, 5.0, 5.0);
+ Tracer.showClassificationDatasetHtml("Rings", ringsDataStream, 1500, 0, 1, true);
+
+ // Examples of lineary separable classes, a set of uniform distributed points on plane that can be splitted
+ // on two classes by diagonal hyperplane. Each example represents a different margin - distance between
+ // points and diagonal hyperplane. If margin < 0 then points of different classes are mixed.
+ TwoSeparableClassesDataStream linearySeparableClasses1 = new TwoSeparableClassesDataStream(0., 20.);
+ TwoSeparableClassesDataStream linearySeparableClasses2 = new TwoSeparableClassesDataStream(5., 20.);
+ TwoSeparableClassesDataStream linearySeparableClasses3 = new TwoSeparableClassesDataStream(-5., 20.);
+ Tracer.showClassificationDatasetHtml("Two separable classes (margin = 0.0)", linearySeparableClasses1, 1500, 0, 1, true);
+ Tracer.showClassificationDatasetHtml("Two separable classes (margin = 5.0)", linearySeparableClasses2, 1500, 0, 1, true);
+ Tracer.showClassificationDatasetHtml("Two separable classes (margin = -5.0)", linearySeparableClasses3, 1500, 0, 1, true);
+
+ // Example of regression dataset with base function y(x) = |x^2 - 10|.
+ RegressionDataStream regression = RegressionDataStream.twoDimensional(
+ x -> Math.abs(x * x - 10), -10, 10);
+ Tracer.showRegressionDatasetInHtml("|x^2 - 10|", regression, 1000, 0);
+ }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorFamilyExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorFamilyExample.java
new file mode 100644
index 0000000..6bd5d30
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorFamilyExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.util.generators;
+
+import java.io.IOException;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGenerator;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorPrimitives;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorsFamily;
+
+/**
+ * Example of using distribution families. Each distribution from family represents a class. Distribution family
+ * is a distribution hence such family can be used as element of hight-level family where this distribution will
+ * represent one class. Such families helps to construct ditributions with complex shape.
+ */
+public class VectorGeneratorFamilyExample {
+ /**
+ * Run example.
+ *
+ * @param args Args.
+ */
+ public static void main(String[] args) throws IOException {
+ // Family of ring sectors.
+ VectorGenerator family1 = new VectorGeneratorsFamily.Builder()
+ .add(VectorGeneratorPrimitives.ring(5., 0, 2 * Math.PI))
+ .add(VectorGeneratorPrimitives.ring(10., 0, Math.PI))
+ .add(VectorGeneratorPrimitives.ring(15., Math.PI, 2 * Math.PI))
+ .add(VectorGeneratorPrimitives.ring(20., 0, Math.PI / 2))
+ .add(VectorGeneratorPrimitives.ring(25., Math.PI / 2, Math.PI))
+ .add(VectorGeneratorPrimitives.ring(30., Math.PI, 3 * Math.PI / 2))
+ .add(VectorGeneratorPrimitives.ring(35., 3 * Math.PI / 2, 2 * Math.PI))
+ .build();
+
+ // Family that constructed by 45 degree rotation from previous family.
+ VectorGenerator family2 = family1.rotate(Math.PI/ 4).map(v -> v.times(1.5));
+
+ Tracer.showClassificationDatasetHtml("Family of ring sectors [first family]", family1.asDataStream(),
+ 2000, 0, 1, true);
+ Tracer.showClassificationDatasetHtml("Family of ring sectors [second family]", family2.asDataStream(),
+ 2000, 0, 1, true);
+
+ // Combination of families where first family represents a complex distribution for first class and
+ // second family for second class.
+ VectorGenerator family = new VectorGeneratorsFamily.Builder()
+ .add(family1).add(family2).build();
+
+ Tracer.showClassificationDatasetHtml("Family of ring sectors [both families as two calsses]", family.asDataStream(),
+ 2000, 0, 1, true);
+ }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorPrimitivesExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorPrimitivesExample.java
new file mode 100644
index 0000000..cc2a7af
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/VectorGeneratorPrimitivesExample.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.util.generators;
+
+import java.io.IOException;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.DiscreteRandomProducer;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGenerator;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorPrimitives;
+
+/**
+ * Example of using primitive generators and combiners for generators.
+ */
+public class VectorGeneratorPrimitivesExample {
+ /**
+ * Run example.
+ *
+ * @param args Args.
+ */
+ public static void main(String... args) throws IOException {
+ // Vectors from ring-like distribution.
+ VectorGenerator fullRing = VectorGeneratorPrimitives.ring(10, 0, 2 * Math.PI);
+ // Vectors from ring's sector distribution.
+ VectorGenerator partOfRing = VectorGeneratorPrimitives.ring(15, -Math.PI / 2, Math.PI);
+ // Vectors from distribution having filled circle shape.
+ VectorGenerator circle = VectorGeneratorPrimitives.circle(14.5);
+ // Vectors from uniform distribution in n-dimensional space.
+ VectorGenerator parallelogram = VectorGeneratorPrimitives.parallelogram(VectorUtils.of(10, 15));
+ // Vectors from gaussian.
+ VectorGenerator gauss = VectorGeneratorPrimitives.gauss(VectorUtils.of(0.0, 0.0), VectorUtils.of(10., 15.));
+
+ Tracer.showClassificationDatasetHtml("Full ring", fullRing.asDataStream(), 1500, 0, 1, false);
+ Tracer.showClassificationDatasetHtml("Sector", partOfRing.asDataStream(), 1500, 0, 1, false);
+ Tracer.showClassificationDatasetHtml("Circle", circle.asDataStream(), 1500, 0, 1, false);
+ Tracer.showClassificationDatasetHtml("Paralellogram", parallelogram.asDataStream(), 1500, 0, 1, false);
+ Tracer.showClassificationDatasetHtml("Gauss", gauss.asDataStream(), 1500, 0, 1, false);
+
+ // Using of rotate for generator.
+ VectorGenerator rotatedParallelogram = parallelogram.rotate(-Math.PI / 8);
+ Tracer.showClassificationDatasetHtml("Rotated paralellogram", rotatedParallelogram.asDataStream(), 1500, 0, 1, false);
+
+ // Sum of generators where vectors from first generator are summed with corresponding vectors from second generator.
+ VectorGenerator gaussPlusRing = gauss.plus(fullRing);
+ Tracer.showClassificationDatasetHtml("Gauss plus ring", gaussPlusRing.asDataStream(), 1500, 0, 1, false);
+
+ // Example of vector generator filtering.
+ VectorGenerator filteredCircle = circle.filter(v -> Math.abs(v.get(0)) > 5);
+ Tracer.showClassificationDatasetHtml("Filtered circle", filteredCircle.asDataStream(), 1500, 0, 1, false);
+
+ // Example of using map function for vector generator.
+ VectorGenerator mappedCircle = circle.map(v -> v.get(1) < 0 ? v : v.times(VectorUtils.of(2, 4)));
+ Tracer.showClassificationDatasetHtml("Mapped circle", mappedCircle.asDataStream(), 1500, 0, 1, false);
+
+ // Example of generators concatenation where each vector of first generator are concatenated with corresponding
+ // vector from second generator.
+ DataStreamGenerator ringAndGauss = fullRing.concat(gauss).asDataStream();
+ Tracer.showClassificationDatasetHtml("Ring and gauss [x1, x2]", ringAndGauss, 1500, 0, 1, false);
+ Tracer.showClassificationDatasetHtml("Ring and gauss [x2, x3]", ringAndGauss, 1500, 1, 2, false);
+ Tracer.showClassificationDatasetHtml("Ring and gauss [x3, x4]", ringAndGauss, 1500, 2, 3, false);
+ Tracer.showClassificationDatasetHtml("Ring and gauss [x4, x1]", ringAndGauss, 1500, 3, 0, false);
+
+ // Example of vector generator function noize.
+ VectorGenerator noisifiedRing = fullRing.noisify(new DiscreteRandomProducer(0.1, 0.2, 0.3, 0.4));
+ Tracer.showClassificationDatasetHtml("Noisified ring", noisifiedRing.asDataStream(), 1500, 0, 1, false);
+
+ // Example of complex distribution with "axe" shape.
+ VectorGenerator axeBlade = circle.filter(v -> Math.abs(v.get(1)) > 5.)
+ .rotate(Math.PI / 4).filter(v -> Math.abs(v.get(0)) > 1.5)
+ .rotate(-Math.PI / 2).filter(v -> Math.abs(v.get(0)) > 1.5)
+ .rotate(Math.PI / 4).filter(v -> Math.sqrt(v.getLengthSquared()) > 10)
+ .map(v -> {
+ if (Math.abs(v.get(0)) > 8 && Math.abs(v.get(1)) < 9)
+ return v.times(0.5);
+ else
+ return v;
+ }).rotate(Math.PI / 2);
+
+ Tracer.showClassificationDatasetHtml("Axe blade", axeBlade.asDataStream(), 1500, 0, 1, false);
+ }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/package-info.java b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/package-info.java
new file mode 100644
index 0000000..e62b15e
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/util/generators/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Examples for data stream generators.
+ */
+package org.apache.ignite.examples.ml.util.generators;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/math/Tracer.java b/modules/ml/src/main/java/org/apache/ignite/ml/math/Tracer.java
index c4703f4..da5e53c 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/math/Tracer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/math/Tracer.java
@@ -29,13 +29,20 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
+import java.util.HashMap;
+import java.util.List;
import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.lang.IgniteUuid;
import org.apache.ignite.ml.math.primitives.matrix.Matrix;
import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
/**
* Utility methods to support output of {@link Vector} and {@link Matrix} instances to plain text or HTML.
@@ -70,8 +77,7 @@ public class Tracer {
}
/**
- * Default vector color mapper implementation that map given double value
- * to continuous red-blue (R_B) specter.
+ * Default vector color mapper implementation that map given double value to continuous red-blue (R_B) specter.
*
* @param vec Vector to map.
* @return {@link ColorMapper} for the given vector.
@@ -81,8 +87,7 @@ public class Tracer {
}
/**
- * Default matrix color mapper implementation that map given double value
- * to continuous red-blue (R_B) specter.
+ * Default matrix color mapper implementation that map given double value to continuous red-blue (R_B) specter.
*
* @param mtx Matrix to be mapped.
* @return Color mapper for given matrix.
@@ -129,7 +134,7 @@ public class Tracer {
String cls = vec.getClass().getSimpleName();
String vectorStr = mkString(vec, fmt);
- if(showMeta)
+ if (showMeta)
return String.format(LOCALE, "%s(%d) [%s]", cls, vec.size(), vectorStr);
else
return String.format(LOCALE, "[%s]", vectorStr);
@@ -177,7 +182,6 @@ public class Tracer {
System.out.println(asAscii(mtx, fmt));
}
-
/**
* @param mtx {@link Matrix} object to show as a plain text.
* @param fmt Format string for matrix rows.
@@ -329,6 +333,131 @@ public class Tracer {
}
/**
+ * Open browser and shows given dataset generator's data on two dimensional plane. Label of vectors
+ * is used for ordinate representation.
+ *
+ * @param name Name of dataset for showing.
+ * @param generator Datastream generator.
+ * @param limit Count of points that should be taken from dataset.
+ * @param xIdx Index of dimension for abscissa representation.
+ * @throws IOException
+ */
+ public static void showRegressionDatasetInHtml(String name, DataStreamGenerator generator, int limit,
+ int xIdx) throws IOException {
+
+ List<LabeledVector<Double>> values = generator.labeled().limit(limit).collect(Collectors.toList());
+ showVectorsHtml(name, values, xIdx, LabeledVector::label, v -> Color.BLUE);
+ }
+
+ /**
+ * Open browser and shows given dataset generator's data on two dimensional plane. Label of vectors
+ * is used for ordinate representation.
+ *
+ * @param generator Datastream generator.
+ * @param limit Count of points that should be taken from dataset.
+ * @param xIdx Index of dimension for abscissa representation.
+ * @throws IOException
+ */
+ public static void showRegressionDatasetInHtml(DataStreamGenerator generator, int limit,
+ int xIdx) throws IOException {
+
+ showRegressionDatasetInHtml("Regression dataset", generator, limit, xIdx);
+ }
+
+ /**
+ * Open browser and shows given dataset generator's data on two dimensional plane.
+ *
+ * @param generator Datastream generator.
+ * @param limit Count of points that should be taken from dataset.
+ * @param xIdx Index of dimension for abscissa representation.
+ * @param yIdx Index of dimension for ordinate representation.
+ * @param isLabeled if isLabeled == true then colors will be used for separate different classes on plane.
+ * @throws IOException
+ */
+ public static void showClassificationDatasetHtml(DataStreamGenerator generator, int limit,
+ int xIdx, int yIdx, boolean isLabeled) throws IOException {
+
+ showClassificationDatasetHtml("Classification dataset", generator, limit, xIdx, yIdx, isLabeled);
+ }
+
+ /**
+ * Open browser and shows given dataset generator's data on two dimensional plane.
+ *
+ * @param name Name of dataset for showing.
+ * @param generator Datastream generator.
+ * @param limit Count of points that should be taken from dataset.
+ * @param xIdx Index of dimension for abscissa representation.
+ * @param yIdx Index of dimension for ordinate representation.
+ * @param isLabeled if isLabeled == true then colors will be used for separate different classes on plane.
+ * @throws IOException
+ */
+ public static void showClassificationDatasetHtml(String name, DataStreamGenerator generator, int limit,
+ int xIdx, int yIdx, boolean isLabeled) throws IOException {
+
+ List<LabeledVector<Double>> values = generator.labeled().limit(limit).collect(Collectors.toList());
+ Map<Integer, Color> labelsMapping = new HashMap<>();
+ if (isLabeled) {
+ Set<Double> lbls = values.stream().map(LabeledVector::label).collect(Collectors.toSet());
+ Random rnd = new Random(0);
+ for (Double label : lbls)
+ labelsMapping.put(label.intValue(), new Color(rnd.nextInt()));
+ }
+
+ showVectorsHtml(name, values, xIdx,
+ v -> v.features().get(yIdx),
+ v -> isLabeled ? labelsMapping.get(v.label().intValue()) : Color.BLUE
+ );
+ }
+
+ /**
+ * @param name Dataset name for showing.
+ * @param values List of vectors are taken from dataset generator.
+ * @param xIndex Index of abscissa in vector.
+ * @param yGetter Getter of ordinate value from vector.
+ * @param colorGetter Getter of collor for showing.
+ * @throws IOException
+ */
+ private static void showVectorsHtml(String name, List<LabeledVector<Double>> values,
+ int xIndex, Function<LabeledVector<Double>, Double> yGetter,
+ Function<LabeledVector<Double>, Color> colorGetter) throws IOException {
+
+ if(!isBrowseSupported())
+ return;
+
+ String tmpl = fileToString("d3-dataset-template.html");
+
+ String data = values.stream().map(v -> {
+ double x = v.features().get(xIndex);
+ double y = yGetter.apply(v);
+ Color c = colorGetter.apply(v);
+
+ return dataColorJson(x, y, c);
+ }).collect(Collectors.joining(",", "[", "]"));
+
+ openHtmlFile(tmpl.
+ replaceAll("/\\*@NAME@\\*/.*\n", "var name = \"" + name + "\";\n").
+ replaceAll("/\\*@DATA@\\*/.*\n", "var data = " + data + ";\n")
+ );
+ }
+
+ /**
+ * Serialize xy-pair with vector to JSON representation.
+ *
+ * @param x X-value.
+ * @param y Y-value.
+ * @param clr Color.
+ */
+ private static String dataColorJson(double x, double y, Color clr) {
+ return "{" +
+ "x: " + String.format(LOCALE, "%4f", x) +
+ ", y: " + String.format(LOCALE, "%4f", y) +
+ ", r: " + clr.getRed() +
+ ", g: " + clr.getGreen() +
+ ", b: " + clr.getBlue() +
+ "}";
+ }
+
+ /**
* @param d Value of {@link Matrix} or {@link Vector} element.
* @param clr {@link Color} to paint.
* @return JSON representation for given value and color.
diff --git a/modules/ml/src/main/resources/org/apache/ignite/ml/math/d3-dataset-template.html b/modules/ml/src/main/resources/org/apache/ignite/ml/math/d3-dataset-template.html
new file mode 100644
index 0000000..03e9c89
--- /dev/null
+++ b/modules/ml/src/main/resources/org/apache/ignite/ml/math/d3-dataset-template.html
@@ -0,0 +1,112 @@
+<!DOCTYPE html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<meta charset="utf-8">
+<title>IgniteML</title>
+<style>
+ body {
+ margin: 0 15px;
+ }
+
+ p {
+ margin: 10px 0 !important;
+ }
+
+ .name {
+ font-size: 20px;
+ font-weight: 400;
+ font-family: monospace;
+ }
+
+ .swatch {
+ display: inline-block;
+ width: 25px;
+ height: 25px;
+ margin-left: 5px;
+ vertical-align: bottom;
+ }
+
+</style>
+<body>
+<img style="margin-top: 15px" width="100px" src="https://ignite.apache.org/images/logo3.png">
+<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.5.0/d3.min.js"></script>
+<script>
+ /*@DATA@*/var data = [{r: 0, g: 0, b: 0, x: 0, y: -10}, {r: 0, g: 0, b: 255, x: -10, y: 0}, {r: 255, g: 0, b: 0, x: 10, y: 0}, {r: 0, g: 255, b: 0, x: 0, y: 10}];
+ /*@NAME@*/var name = "Graph";
+
+ var minX = minY = Number.MAX_SAFE_INTEGER;
+ var maxX = maxY = Number.MIN_SAFE_INTEGER;
+
+ for (var row = 0; row < data.length; row++) {
+ var x = data[row].x;
+ var y = data[row].y;
+ if (x < minX)
+ minX = x;
+ else if (x > maxX)
+ maxX = x;
+
+ if (y < minY)
+ minY = y;
+ else if (y > maxY)
+ maxY = y;
+ }
+
+ var rh = 5; // Constant.
+ var rw = 5;
+
+ var W = (maxX - minX + 1) * rw;
+ var H = (maxY - minY + 1) * rh;
+
+ d3.selectAll("body")
+ .append("p")
+ .text(name + " (size: " + data.length + ")")
+ .attr("class", "name");
+
+ var vis = d3.select("body").append("svg")
+ .attr("width", W)
+ .attr("height", H);
+
+ var scale1 = d3.scaleLinear()
+ .domain([minX, maxX])
+ .range([0, W]);
+
+ var scale2 = d3.scaleLinear()
+ .domain([minY, maxY])
+ .range([H, 0]);
+
+ // define the y axis
+ var yAxis = d3.axisRight().scale(scale2);
+
+ // define the y axis
+ var xAxis = d3.axisBottom().scale(scale1);
+
+ // draw y axis with labels and move in from the size by the amount of padding
+ vis.append("g").call(yAxis);
+
+ // draw x axis with labels and move to the bottom of the chart area
+ vis.append("g").call(xAxis);
+
+ vis.selectAll("rect")
+ .data(data)
+ .enter()
+ .append("rect")
+ .attr("x", function (d, i) { return (d.x - minX) * rw; })
+ .attr("y", function (d, i) { return (d.y - minY) * rh; })
+ .attr("fill", function (d) { return "rgb(" + d.r + ", " + d.g + ", " + d.b + ")"; })
+ .attr("width", rw)
+ .attr("height", rh);
+</script>
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/IgniteMLTestSuite.java b/modules/ml/src/test/java/org/apache/ignite/ml/IgniteMLTestSuite.java
index e6be10b..076a81d 100644
--- a/modules/ml/src/test/java/org/apache/ignite/ml/IgniteMLTestSuite.java
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/IgniteMLTestSuite.java
@@ -35,6 +35,7 @@ import org.apache.ignite.ml.selection.SelectionTestSuite;
import org.apache.ignite.ml.structures.StructuresTestSuite;
import org.apache.ignite.ml.svm.SVMTestSuite;
import org.apache.ignite.ml.tree.DecisionTreeTestSuite;
+import org.apache.ignite.ml.util.generators.DataStreamGeneratorTestSuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
@@ -57,6 +58,7 @@ import org.junit.runners.Suite;
StructuresTestSuite.class,
CommonTestSuite.class,
MultiClassTestSuite.class,
+ DataStreamGeneratorTestSuite.class,
/** JUnit 3 tests. */
DecisionTreeTestSuite.class,
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTestSuite.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTestSuite.java
new file mode 100644
index 0000000..640edb9
--- /dev/null
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTestSuite.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators;
+
+import org.apache.ignite.ml.util.generators.primitives.scalar.DiscreteRandomProducerTest;
+import org.apache.ignite.ml.util.generators.primitives.scalar.GaussRandomProducerTest;
+import org.apache.ignite.ml.util.generators.primitives.scalar.RandomProducerTest;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducerTest;
+import org.apache.ignite.ml.util.generators.primitives.vector.ParametricVectorGeneratorTest;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorPrimitivesTest;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorTest;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorsFamilyTest;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+
+/**
+ * Test suite for all tests located in {@link org.apache.ignite.ml.util.generators} package.
+ */
+@RunWith(Suite.class)
+@Suite.SuiteClasses({
+ DiscreteRandomProducerTest.class,
+ GaussRandomProducerTest.class,
+ RandomProducerTest.class,
+ UniformRandomProducerTest.class,
+ ParametricVectorGeneratorTest.class,
+ VectorGeneratorPrimitivesTest.class,
+ VectorGeneratorsFamilyTest.class,
+ VectorGeneratorTest.class,
+ DataStreamGeneratorTest.class
+})
+public class DataStreamGeneratorTestSuite {
+}
+
+