You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by sb...@apache.org on 2017/12/15 14:10:02 UTC

[32/50] [abbrv] ignite git commit: IGNITE-6495: performance measurement of decision trees algorithms

IGNITE-6495: performance measurement of decision trees algorithms

this closes #3213


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/a0516f9e
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/a0516f9e
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/a0516f9e

Branch: refs/heads/ignite-zk-ce
Commit: a0516f9e8ccb39125de67e5196b943ea6d55f7df
Parents: 47e7daf
Author: Oleg Ignatenko <oi...@gridgain.com>
Authored: Wed Dec 13 20:10:29 2017 +0300
Committer: Yury Babak <yb...@gridgain.com>
Committed: Wed Dec 13 20:10:29 2017 +0300

----------------------------------------------------------------------
 .../yardstick/config/benchmark-ml.properties    |   2 +
 modules/yardstick/pom-standalone.xml            |  12 +
 modules/yardstick/pom.xml                       |  12 +
 .../IgniteColumnDecisionTreeGiniBenchmark.java  |  70 +++
 ...niteColumnDecisionTreeVarianceBenchmark.java |  71 ++++
 .../yardstick/ml/trees/SplitDataGenerator.java  | 426 +++++++++++++++++++
 .../ignite/yardstick/ml/trees/package-info.java |  22 +
 7 files changed, 615 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/config/benchmark-ml.properties
----------------------------------------------------------------------
diff --git a/modules/yardstick/config/benchmark-ml.properties b/modules/yardstick/config/benchmark-ml.properties
index dbf90dd..4867c0f 100644
--- a/modules/yardstick/config/benchmark-ml.properties
+++ b/modules/yardstick/config/benchmark-ml.properties
@@ -85,4 +85,6 @@ CONFIGS="\
 -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteDenseLocalOffHeapMatrixMulBenchmark -sn IgniteNode -ds ${ver}dense-local-offheap-matrix-mul-${b}-backup,\
 -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteSparseLocalMatrixMulBenchmark -sn IgniteNode -ds ${ver}sparse-local-matrix-mul-${b}-backup,\
 -cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteSparseBlockDistributedMatrixMulBenchmark -sn IgniteNode -ds ${ver}sparse-block-distributed-matrix-mul-${b}-backup,\
+-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteColumnDecisionTreeVarianceBenchmark -sn IgniteNode -ds ${ver}column-decision-tree-variance-${b}-backup,\
+-cfg ${SCRIPT_DIR}/../config/ignite-localhost-config.xml -nn ${nodesNum} -b ${b} -w ${w} -d ${d} -t ${t} -sm ${sm} -dn IgniteColumnDecisionTreeGiniBenchmark -sn IgniteNode -ds ${ver}column-decision-tree-gini-${b}-backup,\
 "

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/pom-standalone.xml
----------------------------------------------------------------------
diff --git a/modules/yardstick/pom-standalone.xml b/modules/yardstick/pom-standalone.xml
index 78569a3..58920b9 100644
--- a/modules/yardstick/pom-standalone.xml
+++ b/modules/yardstick/pom-standalone.xml
@@ -123,6 +123,18 @@
                     <version>${project.version}</version>
                 </dependency>
             </dependencies>
+
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-compiler-plugin</artifactId>
+                        <configuration>
+                            <source>1.8</source>
+                            <target>1.8</target>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
         </profile>
     </profiles>
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/pom.xml
----------------------------------------------------------------------
diff --git a/modules/yardstick/pom.xml b/modules/yardstick/pom.xml
index 6a98584..8dda6a7 100644
--- a/modules/yardstick/pom.xml
+++ b/modules/yardstick/pom.xml
@@ -143,6 +143,18 @@
                     <version>${project.version}</version>
                 </dependency>
             </dependencies>
+
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-compiler-plugin</artifactId>
+                        <configuration>
+                            <source>1.8</source>
+                            <target>1.8</target>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
         </profile>
     </profiles>
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeGiniBenchmark.java
----------------------------------------------------------------------
diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeGiniBenchmark.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeGiniBenchmark.java
new file mode 100644
index 0000000..f8a7c08
--- /dev/null
+++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeGiniBenchmark.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.yardstick.ml.trees;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.ContinuousSplitCalculators;
+import org.apache.ignite.ml.trees.trainers.columnbased.regcalcs.RegionCalculators;
+import org.apache.ignite.resources.IgniteInstanceResource;
+import org.apache.ignite.thread.IgniteThread;
+import org.apache.ignite.yardstick.IgniteAbstractBenchmark;
+
+/**
+ * Ignite benchmark that performs ML Grid operations.
+ */
+@SuppressWarnings("unused")
+public class IgniteColumnDecisionTreeGiniBenchmark extends IgniteAbstractBenchmark {
+    /** */
+    @IgniteInstanceResource
+    private Ignite ignite;
+
+    /** {@inheritDoc} */
+    @Override public boolean test(Map<Object, Object> ctx) throws Exception {
+        // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
+        // because we create ignite cache internally.
+        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
+            this.getClass().getSimpleName(), new Runnable() {
+            /** {@inheritDoc} */
+            @Override public void run() {
+                // IMPL NOTE originally taken from ColumnDecisionTreeTrainerTest#testCacheMixedGini
+                int totalPts = 1 << 10;
+                int featCnt = 2;
+
+                HashMap<Integer, Integer> catsInfo = new HashMap<>();
+                catsInfo.put(1, 3);
+
+                SplitDataGenerator<DenseLocalOnHeapVector> gen = new SplitDataGenerator<>(
+                    featCnt, catsInfo, () -> new DenseLocalOnHeapVector(featCnt + 1)).
+                    split(0, 1, new int[] {0, 2}).
+                    split(1, 0, -10.0);
+
+                gen.testByGen(totalPts, ContinuousSplitCalculators.GINI.apply(ignite),
+                    RegionCalculators.GINI, RegionCalculators.MEAN, ignite);
+            }
+        });
+
+        igniteThread.start();
+
+        igniteThread.join();
+
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeVarianceBenchmark.java
----------------------------------------------------------------------
diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeVarianceBenchmark.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeVarianceBenchmark.java
new file mode 100644
index 0000000..f9d417f
--- /dev/null
+++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/IgniteColumnDecisionTreeVarianceBenchmark.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.yardstick.ml.trees;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.ContinuousSplitCalculators;
+import org.apache.ignite.ml.trees.trainers.columnbased.regcalcs.RegionCalculators;
+import org.apache.ignite.resources.IgniteInstanceResource;
+import org.apache.ignite.thread.IgniteThread;
+import org.apache.ignite.yardstick.IgniteAbstractBenchmark;
+
+/**
+ * Ignite benchmark that performs ML Grid operations.
+ */
+@SuppressWarnings("unused")
+public class IgniteColumnDecisionTreeVarianceBenchmark extends IgniteAbstractBenchmark {
+    /** */
+    @IgniteInstanceResource
+    private Ignite ignite;
+
+    /** {@inheritDoc} */
+    @Override public boolean test(Map<Object, Object> ctx) throws Exception {
+        // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
+        // because we create ignite cache internally.
+        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
+            this.getClass().getSimpleName(), new Runnable() {
+            /** {@inheritDoc} */
+            @Override public void run() {
+                // IMPL NOTE originally taken from ColumnDecisionTreeTrainerTest#testCacheMixed
+                int totalPts = 1 << 10;
+                int featCnt = 2;
+
+                HashMap<Integer, Integer> catsInfo = new HashMap<>();
+                catsInfo.put(1, 3);
+
+                SplitDataGenerator<DenseLocalOnHeapVector> gen
+                    = new SplitDataGenerator<>(
+                    featCnt, catsInfo, () -> new DenseLocalOnHeapVector(featCnt + 1)).
+                    split(0, 1, new int[] {0, 2}).
+                    split(1, 0, -10.0);
+
+                gen.testByGen(totalPts,
+                    ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, RegionCalculators.MEAN, ignite);
+            }
+        });
+
+        igniteThread.start();
+
+        igniteThread.join();
+
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
----------------------------------------------------------------------
diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
new file mode 100644
index 0000000..f530300
--- /dev/null
+++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/SplitDataGenerator.java
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.yardstick.ml.trees;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.DoubleStream;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.lang.IgniteBiTuple;
+import org.apache.ignite.ml.math.StorageConstants;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.structures.LabeledVectorDouble;
+import org.apache.ignite.ml.trees.ContinuousRegionInfo;
+import org.apache.ignite.ml.trees.ContinuousSplitCalculator;
+import org.apache.ignite.ml.trees.models.DecisionTreeModel;
+import org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer;
+import org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainerInput;
+import org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput;
+import org.apache.ignite.ml.util.Utils;
+
+/** */
+class SplitDataGenerator<V extends Vector> {
+    /** */
+    private static final Random rnd = new Random(12349L);
+
+    /** */
+    private static final double DELTA = 100.0;
+
+    /** Map of the form of (is categorical -> list of region indexes). */
+    private final Map<Boolean, List<Integer>> di;
+
+    /** List of regions. */
+    private final List<Region> regs;
+
+    /** Data of bounds of regions. */
+    private final Map<Integer, IgniteBiTuple<Double, Double>> boundsData;
+
+    /** */
+    private final Map<Integer, Integer> catFeaturesInfo;
+
+    /** Supplier of vectors. */
+    private final Supplier<V> supplier;
+
+    /** Features count. */
+    private final int featCnt;
+
+    /**
+     * Create SplitDataGenerator.
+     *
+     * @param featCnt Features count.
+     * @param catFeaturesInfo Information about categorical features in form of map (feature index -> categories
+     * count).
+     * @param supplier Supplier of vectors.
+     */
+    SplitDataGenerator(int featCnt, Map<Integer, Integer> catFeaturesInfo, Supplier<V> supplier) {
+        regs = new LinkedList<>();
+        boundsData = new HashMap<>();
+        this.supplier = supplier;
+        this.featCnt = featCnt;
+        this.catFeaturesInfo = catFeaturesInfo;
+
+        // Divide indexes into indexes of categorical coordinates and indexes of continuous coordinates.
+        di = IntStream.range(0, featCnt).
+            boxed().
+            collect(Collectors.partitioningBy(catFeaturesInfo::containsKey));
+
+        // Categorical coordinates info.
+        Map<Integer, CatCoordInfo> catCoords = new HashMap<>();
+        di.get(true).forEach(i -> {
+            BitSet bs = new BitSet();
+            bs.set(0, catFeaturesInfo.get(i));
+            catCoords.put(i, new CatCoordInfo(bs));
+        });
+
+        // Continuous coordinates info.
+        Map<Integer, ContCoordInfo> contCoords = new HashMap<>();
+        di.get(false).forEach(i -> {
+            contCoords.put(i, new ContCoordInfo());
+            boundsData.put(i, new IgniteBiTuple<>(-1.0, 1.0));
+        });
+
+        Region firstReg = new Region(catCoords, contCoords, 0);
+        regs.add(firstReg);
+    }
+
+    /** */
+    <D extends ContinuousRegionInfo> void testByGen(int totalPts,
+        IgniteFunction<ColumnDecisionTreeTrainerInput, ? extends ContinuousSplitCalculator<D>> calc,
+        IgniteFunction<ColumnDecisionTreeTrainerInput, IgniteFunction<DoubleStream, Double>> catImpCalc,
+        IgniteFunction<DoubleStream, Double> regCalc, Ignite ignite) {
+
+        List<IgniteBiTuple<Integer, V>> lst = points(totalPts, (i, rn) -> i).collect(Collectors.toList());
+
+        Collections.shuffle(lst, rnd);
+
+        SparseDistributedMatrix m = new SparseDistributedMatrix(totalPts,
+            featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
+
+        Map<Integer, List<LabeledVectorDouble>> byRegion = new HashMap<>();
+
+        int i = 0;
+        for (IgniteBiTuple<Integer, V> bt : lst) {
+            byRegion.putIfAbsent(bt.get1(), new LinkedList<>());
+            byRegion.get(bt.get1()).add(asLabeledVector(bt.get2().getStorage().data()));
+            m.setRow(i, bt.get2().getStorage().data());
+            i++;
+        }
+
+        ColumnDecisionTreeTrainer<D> trainer =
+            new ColumnDecisionTreeTrainer<>(3, calc, catImpCalc, regCalc, ignite);
+
+        DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catFeaturesInfo));
+
+        byRegion.keySet().forEach(k -> mdl.predict(byRegion.get(k).get(0).features()));
+    }
+
+    /**
+     * Split region by continuous coordinate using given threshold.
+     *
+     * @param regIdx Region index.
+     * @param coordIdx Coordinate index.
+     * @param threshold Threshold.
+     * @return {@code this}.
+     */
+    SplitDataGenerator<V> split(int regIdx, int coordIdx, double threshold) {
+        Region regToSplit = regs.get(regIdx);
+        ContCoordInfo cci = regToSplit.contCoords.get(coordIdx);
+
+        double left = cci.left;
+        double right = cci.right;
+
+        if (threshold < left || threshold > right)
+            throw new MathIllegalArgumentException("Threshold is out of region bounds.");
+
+        regToSplit.incTwoPow();
+
+        Region newReg = Utils.copy(regToSplit);
+        newReg.contCoords.get(coordIdx).left = threshold;
+
+        regs.add(regIdx + 1, newReg);
+        cci.right = threshold;
+
+        IgniteBiTuple<Double, Double> bounds = boundsData.get(coordIdx);
+        double min = bounds.get1();
+        double max = bounds.get2();
+        boundsData.put(coordIdx, new IgniteBiTuple<>(Math.min(threshold, min), Math.max(max, threshold)));
+
+        return this;
+    }
+
+    /**
+     * Split region by categorical coordinate.
+     *
+     * @param regIdx Region index.
+     * @param coordIdx Coordinate index.
+     * @param cats Categories allowed for the left sub region.
+     * @return {@code this}.
+     */
+    SplitDataGenerator<V> split(int regIdx, int coordIdx, int[] cats) {
+        BitSet subset = new BitSet();
+        Arrays.stream(cats).forEach(subset::set);
+        Region regToSplit = regs.get(regIdx);
+        CatCoordInfo cci = regToSplit.catCoords.get(coordIdx);
+
+        BitSet ssc = (BitSet)subset.clone();
+        BitSet set = cci.bs;
+        ssc.and(set);
+        if (ssc.length() != subset.length())
+            throw new MathIllegalArgumentException("Splitter set is not a subset of a parent subset.");
+
+        ssc.xor(set);
+        set.and(subset);
+
+        regToSplit.incTwoPow();
+        Region newReg = Utils.copy(regToSplit);
+        newReg.catCoords.put(coordIdx, new CatCoordInfo(ssc));
+
+        regs.add(regIdx + 1, newReg);
+
+        return this;
+    }
+
+    /**
+     * Get stream of points generated by this generator.
+     *
+     * @param ptsCnt Points count.
+     */
+    private Stream<IgniteBiTuple<Integer, V>> points(int ptsCnt, BiFunction<Double, Random, Double> f) {
+        return IntStream.range(0, regs.size()).
+            boxed().
+            map(i -> regs.get(i).generatePoints(ptsCnt, f.apply((double)i, rnd), boundsData, di, supplier, rnd)
+                .map(v -> new IgniteBiTuple<>(i, v))).flatMap(Function.identity());
+    }
+
+    /**
+     * Convert double array to  {@link LabeledVectorDouble}
+     *
+     * @param arr Array for conversion.
+     * @return LabeledVectorDouble.
+     */
+    private static LabeledVectorDouble<DenseLocalOnHeapVector> asLabeledVector(double arr[]) {
+        return new LabeledVectorDouble<>(new DenseLocalOnHeapVector(
+            Arrays.copyOf(arr, arr.length - 1)), arr[arr.length - 1]);
+    }
+
+    /**
+     * Categorical coordinate info.
+     */
+    private static class CatCoordInfo implements Serializable {
+        /**
+         * Defines categories which are included in this region
+         */
+        private final BitSet bs;
+
+        /**
+         * Construct CatCoordInfo.
+         *
+         * @param bs Bitset.
+         */
+        CatCoordInfo(BitSet bs) {
+            this.bs = bs;
+        }
+
+        /** {@inheritDoc} */
+        @Override public String toString() {
+            return "CatCoordInfo [" +
+                "bs=" + bs +
+                ']';
+        }
+    }
+
+    /**
+     * Continuous coordinate info.
+     */
+    private static class ContCoordInfo implements Serializable {
+        /**
+         * Left (min) bound of region.
+         */
+        private double left;
+
+        /**
+         * Right (max) bound of region.
+         */
+        private double right;
+
+        /**
+         * Construct ContCoordInfo.
+         */
+        ContCoordInfo() {
+            left = Double.NEGATIVE_INFINITY;
+            right = Double.POSITIVE_INFINITY;
+        }
+
+        /** {@inheritDoc} */
+        @Override public String toString() {
+            return "ContCoordInfo [" +
+                "left=" + left +
+                ", right=" + right +
+                ']';
+        }
+    }
+
+    /**
+     * Class representing information about region.
+     */
+    private static class Region implements Serializable {
+        /**
+         * Information about categorical coordinates restrictions of this region in form of
+         * (coordinate index -> restriction)
+         */
+        private final Map<Integer, CatCoordInfo> catCoords;
+
+        /**
+         * Information about continuous coordinates restrictions of this region in form of
+         * (coordinate index -> restriction)
+         */
+        private final Map<Integer, ContCoordInfo> contCoords;
+
+        /**
+         * Region should contain {@code 1/2^twoPow * totalPoints} points.
+         */
+        private int twoPow;
+
+        /**
+         * Construct region by information about restrictions on coordinates (features) values.
+         *
+         * @param catCoords Restrictions on categorical coordinates.
+         * @param contCoords Restrictions on continuous coordinates
+         * @param twoPow Region should contain {@code 1/2^twoPow * totalPoints} points.
+         */
+        Region(Map<Integer, CatCoordInfo> catCoords, Map<Integer, ContCoordInfo> contCoords, int twoPow) {
+            this.catCoords = catCoords;
+            this.contCoords = contCoords;
+            this.twoPow = twoPow;
+        }
+
+        /** */
+        int divideBy() {
+            return 1 << twoPow;
+        }
+
+        /** */
+        void incTwoPow() {
+            twoPow++;
+        }
+
+        /** {@inheritDoc} */
+        @Override public String toString() {
+            return "Region [" +
+                "catCoords=" + catCoords +
+                ", contCoords=" + contCoords +
+                ", twoPow=" + twoPow +
+                ']';
+        }
+
+        /**
+         * Generate continuous coordinate for this region.
+         *
+         * @param coordIdx Coordinate index.
+         * @param boundsData Data with bounds
+         * @param rnd Random numbers generator.
+         * @return Categorical coordinate value.
+         */
+        double generateContCoord(int coordIdx, Map<Integer, IgniteBiTuple<Double, Double>> boundsData,
+            Random rnd) {
+            ContCoordInfo cci = contCoords.get(coordIdx);
+            double left = cci.left;
+            double right = cci.right;
+
+            if (left == Double.NEGATIVE_INFINITY)
+                left = boundsData.get(coordIdx).get1() - DELTA;
+
+            if (right == Double.POSITIVE_INFINITY)
+                right = boundsData.get(coordIdx).get2() + DELTA;
+
+            double size = right - left;
+
+            return left + rnd.nextDouble() * size;
+        }
+
+        /**
+         * Generate categorical coordinate value for this region.
+         *
+         * @param coordIdx Coordinate index.
+         * @param rnd Random numbers generator.
+         * @return Categorical coordinate value.
+         */
+        double generateCatCoord(int coordIdx, Random rnd) {
+            // Pick random bit.
+            BitSet bs = catCoords.get(coordIdx).bs;
+            int j = rnd.nextInt(bs.length());
+
+            int i = 0;
+            int bn = 0;
+            int bnp = 0;
+
+            while ((bn = bs.nextSetBit(bn)) != -1 && i <= j) {
+                i++;
+                bnp = bn;
+                bn++;
+            }
+
+            return bnp;
+        }
+
+        /**
+         * Generate points for this region.
+         *
+         * @param ptsCnt Count of points to generate.
+         * @param val Label for all points in this region.
+         * @param boundsData Data about bounds of continuous coordinates.
+         * @param catCont Data about which categories can be in this region in the form (coordinate index -> list of
+         * categories indexes).
+         * @param s Vectors supplier.
+         * @param rnd Random numbers generator.
+         * @param <V> Type of vectors.
+         * @return Stream of generated points for this region.
+         */
+        <V extends Vector> Stream<V> generatePoints(int ptsCnt, double val,
+            Map<Integer, IgniteBiTuple<Double, Double>> boundsData, Map<Boolean, List<Integer>> catCont,
+            Supplier<V> s,
+            Random rnd) {
+            return IntStream.range(0, ptsCnt / divideBy()).mapToObj(i -> {
+                V v = s.get();
+                int coordsCnt = v.size();
+                catCont.get(false).forEach(ci -> v.setX(ci, generateContCoord(ci, boundsData, rnd)));
+                catCont.get(true).forEach(ci -> v.setX(ci, generateCatCoord(ci, rnd)));
+
+                v.setX(coordsCnt - 1, val);
+                return v;
+            });
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/a0516f9e/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/package-info.java
----------------------------------------------------------------------
diff --git a/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/package-info.java b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/package-info.java
new file mode 100644
index 0000000..fc379a6
--- /dev/null
+++ b/modules/yardstick/src/main/ml/org/apache/ignite/yardstick/ml/trees/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * ML Grid decision tree benchmarks.
+ */
+package org.apache.ignite.yardstick.ml.trees;
\ No newline at end of file