You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ag...@apache.org on 2018/04/17 15:59:00 UTC
[05/23] ignite git commit: IGNITE-8169: [ML] Adopt KMeans to the new
Partitioned Dataset and cleanup old code
IGNITE-8169:
[ML] Adopt KMeans to the new Partitioned Dataset and cleanup old code
this closes #3817
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/9e21cec0
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/9e21cec0
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/9e21cec0
Branch: refs/heads/ignite-7708
Commit: 9e21cec024168105fd30bfb3acecf9fd24a52e8c
Parents: 228254a
Author: zaleslaw <za...@gmail.com>
Authored: Mon Apr 16 20:20:49 2018 +0300
Committer: Yury Babak <yb...@gridgain.com>
Committed: Mon Apr 16 20:20:49 2018 +0300
----------------------------------------------------------------------
.../clustering/DatasetWithObviousStructure.java | 105 ----
.../ml/clustering/FuzzyCMeansExample.java | 134 -----
.../ml/clustering/FuzzyCMeansLocalExample.java | 95 ----
.../clustering/KMeansClusterizationExample.java | 226 ++++++++
.../KMeansDistributedClustererExample.java | 97 ----
.../clustering/KMeansLocalClustererExample.java | 106 ----
.../ignite/ml/FuzzyCMeansModelFormat.java | 76 ---
.../org/apache/ignite/ml/KMeansModelFormat.java | 77 ---
.../ml/clustering/BaseFuzzyCMeansClusterer.java | 90 ----
.../ml/clustering/BaseKMeansClusterer.java | 96 ----
.../apache/ignite/ml/clustering/Clusterer.java | 33 --
.../ml/clustering/ClusterizationModel.java | 29 --
.../FuzzyCMeansDistributedClusterer.java | 512 -------------------
.../clustering/FuzzyCMeansLocalClusterer.java | 254 ---------
.../ignite/ml/clustering/FuzzyCMeansModel.java | 88 ----
.../clustering/KMeansDistributedClusterer.java | 306 -----------
.../ml/clustering/KMeansLocalClusterer.java | 177 -------
.../ignite/ml/clustering/KMeansModel.java | 113 ----
.../ignite/ml/clustering/WeightedClusterer.java | 38 --
.../ignite/ml/clustering/kmeans/Clusterer.java | 33 ++
.../clustering/kmeans/ClusterizationModel.java | 29 ++
.../ml/clustering/kmeans/KMeansModel.java | 112 ++++
.../ml/clustering/kmeans/KMeansModelFormat.java | 79 +++
.../ml/clustering/kmeans/KMeansTrainer.java | 320 ++++++++++++
.../ml/clustering/kmeans/package-info.java | 22 +
.../preprocessing/LabellingMachine.java | 41 --
.../ml/structures/preprocessing/Normalizer.java | 80 ---
.../org/apache/ignite/ml/LocalModelsTest.java | 26 +-
.../ml/clustering/ClusteringTestSuite.java | 7 +-
.../FuzzyCMeansDistributedClustererTest.java | 180 -------
.../FuzzyCMeansLocalClustererTest.java | 202 --------
...KMeansDistributedClustererTestMultiNode.java | 138 -----
...MeansDistributedClustererTestSingleNode.java | 198 -------
.../ml/clustering/KMeansLocalClustererTest.java | 46 --
.../ignite/ml/clustering/KMeansModelTest.java | 63 +++
.../ignite/ml/clustering/KMeansTrainerTest.java | 73 +++
.../apache/ignite/ml/clustering/KMeansUtil.java | 33 --
37 files changed, 977 insertions(+), 3357 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/DatasetWithObviousStructure.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/DatasetWithObviousStructure.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/DatasetWithObviousStructure.java
deleted file mode 100644
index 5cd0e09..0000000
--- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/DatasetWithObviousStructure.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.examples.ml.clustering;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import org.apache.ignite.ml.math.Matrix;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.VectorUtils;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-
-/**
- * See KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure.
- */
-class DatasetWithObviousStructure {
- /** */
- private final Random rnd = new Random(123456L);
-
- /** Let centers be in the vertices of square. */
- private final Map<Integer, Vector> centers = new HashMap<>();
-
- /** Square side length. */
- private final int squareSideLen;
-
- /** */
- DatasetWithObviousStructure(int squareSideLen) {
- this.squareSideLen = squareSideLen;
- centers.put(100, new DenseLocalOnHeapVector(new double[] {0.0, 0.0}));
- centers.put(900, new DenseLocalOnHeapVector(new double[] {squareSideLen, 0.0}));
- centers.put(3000, new DenseLocalOnHeapVector(new double[] {0.0, squareSideLen}));
- centers.put(6000, new DenseLocalOnHeapVector(new double[] {squareSideLen, squareSideLen}));
- }
-
- /** */
- List<Vector> generate(Matrix points) {
- int ptsCnt = points.rowSize();
-
- // Mass centers of dataset.
- List<Vector> massCenters = new ArrayList<>();
-
- int centersCnt = centers.size();
-
- List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
- Collections.shuffle(permutation, rnd);
-
- Vector[] mc = new Vector[centersCnt];
- Arrays.fill(mc, VectorUtils.zeroes(2));
-
- int totalCnt = 0;
-
- int centIdx = 0;
- massCenters.clear();
-
- for (Integer count : centers.keySet()) {
- for (int i = 0; i < count; i++) {
- Vector pnt = getPoint(count);
-
- mc[centIdx] = mc[centIdx].plus(pnt);
-
- points.assignRow(permutation.get(totalCnt), pnt);
-
- totalCnt++;
- }
- massCenters.add(mc[centIdx].times(1 / (double)count));
- centIdx++;
- }
-
- return massCenters;
- }
-
- /** */
- Map<Integer, Vector> centers() {
- return centers;
- }
-
- /** */
- private Vector getPoint(Integer cnt) {
- Vector pnt = new DenseLocalOnHeapVector(2).assign(centers.get(cnt));
- // Perturbate point on random value.
- pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
- return pnt;
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java
deleted file mode 100644
index 23aeed7..0000000
--- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansExample.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.examples.ml.clustering;
-
-import org.apache.ignite.Ignite;
-import org.apache.ignite.Ignition;
-import org.apache.ignite.examples.ExampleNodeStartup;
-import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer;
-import org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer;
-import org.apache.ignite.ml.clustering.FuzzyCMeansModel;
-import org.apache.ignite.ml.math.StorageConstants;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix;
-import org.apache.ignite.thread.IgniteThread;
-
-/**
- * <p>
- * This example shows how to use {@link FuzzyCMeansDistributedClusterer}.</p>
- * <p>
- * Remote nodes should always be started with special configuration file which
- * enables P2P class loading: {@code 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p>
- * <p>
- * Alternatively you can run {@link ExampleNodeStartup} in another JVM which will start node
- * with {@code examples/config/example-ignite.xml} configuration.</p>
- */
-public final class FuzzyCMeansExample {
- /**
- * Executes example.
- *
- * @param args Command line arguments, none required.
- */
- public static void main(String[] args) throws InterruptedException {
- System.out.println(">>> Fuzzy C-Means usage example started.");
-
- // Start ignite grid.
- try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
- System.out.println(">>> Ignite grid started.");
-
- // Start new Ignite thread.
- IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
- FuzzyCMeansExample.class.getSimpleName(),
- () -> {
- // Distance measure that computes distance between two points.
- DistanceMeasure distanceMeasure = new EuclideanDistance();
-
- // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
- double exponentialWeight = 2.0;
-
- // Condition that indicated when algorithm must stop.
- // In this example algorithm stops if memberships have changed insignificantly.
- BaseFuzzyCMeansClusterer.StopCondition stopCond =
- BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
-
- // Maximum difference between new and old membership values with which algorithm will continue to work.
- double maxDelta = 0.01;
-
- // The maximum number of FCM iterations.
- int maxIterations = 50;
-
- // Value that is used to initialize random numbers generator. You can choose it randomly.
- Long seed = null;
-
- // Number of steps of primary centers selection (more steps more candidates).
- int initializationSteps = 2;
-
- // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
- int kMeansMaxIterations = 50;
-
- // Create new distributed clusterer with parameters described above.
- System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
- FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(
- distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations,
- seed, initializationSteps, kMeansMaxIterations);
-
- // Create sample data.
- double[][] points = new double[][] {
- {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9},
- {10, 10}, {9, 11}, {10, 9}, {11, 9},
- {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9},
- {10, -10}, {9, -11}, {10, -9}, {11, -9}};
-
- // Initialize matrix of data points. Each row contains one point.
- int rows = points.length;
- int cols = points[0].length;
-
- System.out.println(">>> Create the matrix that contains sample points.");
- SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols,
- StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
-
- // Store points into matrix.
- pntMatrix.assign(points);
-
- // Call clusterization method with some number of centers.
- // It returns model that can predict results for new points.
- System.out.println(">>> Perform clusterization.");
- int numCenters = 4;
- FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
-
- // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
- Vector[] centers = mdl.centers();
-
- String res = ">>> Results:\n"
- + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n"
- + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n"
- + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n"
- + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
-
- System.out.println(res);
-
- pntMatrix.destroy();
- });
-
- igniteThread.start();
- igniteThread.join();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java
deleted file mode 100644
index 5c1753a..0000000
--- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/FuzzyCMeansLocalExample.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.examples.ml.clustering;
-
-import org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer;
-import org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer;
-import org.apache.ignite.ml.clustering.FuzzyCMeansModel;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
-
-/**
- * This example shows how to use {@link FuzzyCMeansLocalClusterer}.
- */
-public final class FuzzyCMeansLocalExample {
- /**
- * Executes example.
- *
- * @param args Command line arguments, none required.
- */
- public static void main(String[] args) {
- System.out.println(">>> Local Fuzzy C-Means usage example started.");
-
- // Distance measure that computes distance between two points.
- DistanceMeasure distanceMeasure = new EuclideanDistance();
-
- // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
- double exponentialWeight = 2.0;
-
- // Condition that indicated when algorithm must stop.
- // In this example algorithm stops if memberships have changed insignificantly.
- BaseFuzzyCMeansClusterer.StopCondition stopCond =
- BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
-
- // Maximum difference between new and old membership values with which algorithm will continue to work.
- double maxDelta = 0.01;
-
- // The maximum number of FCM iterations.
- int maxIterations = 50;
-
- // Value that is used to initialize random numbers generator. You can choose it randomly.
- Long seed = null;
-
- // Create new distributed clusterer with parameters described above.
- System.out.println(">>> Create new Local Fuzzy C-Means clusterer.");
- FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(distanceMeasure,
- exponentialWeight, stopCond,
- maxDelta, maxIterations, seed);
-
- // Create sample data.
- double[][] points = new double[][] {
- {-10, -10}, {-9, -11}, {-10, -9}, {-11, -9},
- {10, 10}, {9, 11}, {10, 9}, {11, 9},
- {-10, 10}, {-9, 11}, {-10, 9}, {-11, 9},
- {10, -10}, {9, -11}, {10, -9}, {11, -9}};
-
- // Initialize matrix of data points. Each row contains one point.
- System.out.println(">>> Create the matrix that contains sample points.");
- // Store points into matrix.
- DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points);
-
- // Call clusterization method with some number of centers.
- // It returns model that can predict results for new points.
- System.out.println(">>> Perform clusterization.");
- int numCenters = 4;
- FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
-
- // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
- Vector[] centers = mdl.centers();
-
- String res = ">>> Results:\n"
- + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n"
- + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n"
- + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n"
- + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
-
- System.out.println(res);
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java
new file mode 100644
index 0000000..8825ebb
--- /dev/null
+++ b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.clustering;
+
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cache.query.QueryCursor;
+import org.apache.ignite.cache.query.ScanQuery;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.dataset.impl.cache.CacheBasedDatasetBuilder;
+import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
+import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.clustering.kmeans.KMeansModel;
+import org.apache.ignite.ml.clustering.kmeans.KMeansTrainer;
+import org.apache.ignite.thread.IgniteThread;
+
+/**
+ * Run kNN multi-class classification trainer over distributed dataset.
+ *
+ * @see KNNClassificationTrainer
+ */
+public class KMeansClusterizationExample {
+ /** Run example. */
+ public static void main(String[] args) throws InterruptedException {
+ System.out.println();
+ System.out.println(">>> KMeans clustering algorithm over cached dataset usage example started.");
+ // Start ignite grid.
+ try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
+ System.out.println(">>> Ignite grid started.");
+
+ IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
+ KMeansClusterizationExample.class.getSimpleName(), () -> {
+ IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
+
+ KMeansTrainer trainer = new KMeansTrainer()
+ .withSeed(7867L);
+
+ KMeansModel mdl = trainer.fit(
+ new CacheBasedDatasetBuilder<>(ignite, dataCache),
+ (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+ (k, v) -> v[0]
+ );
+
+ System.out.println(">>> KMeans centroids");
+ Tracer.showAscii(mdl.centers()[0]);
+ Tracer.showAscii(mdl.centers()[1]);
+ System.out.println(">>>");
+
+ System.out.println(">>> -----------------------------------");
+ System.out.println(">>> | Predicted cluster\t| Real Label\t|");
+ System.out.println(">>> -----------------------------------");
+
+ int amountOfErrors = 0;
+ int totalAmount = 0;
+
+ try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) {
+ for (Cache.Entry<Integer, double[]> observation : observations) {
+ double[] val = observation.getValue();
+ double[] inputs = Arrays.copyOfRange(val, 1, val.length);
+ double groundTruth = val[0];
+
+ double prediction = mdl.apply(new DenseLocalOnHeapVector(inputs));
+
+ totalAmount++;
+ if (groundTruth != prediction)
+ amountOfErrors++;
+
+ System.out.printf(">>> | %.4f\t\t\t| %.4f\t\t|\n", prediction, groundTruth);
+ }
+
+ System.out.println(">>> ---------------------------------");
+
+ System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
+ System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double)totalAmount));
+ }
+ });
+
+ igniteThread.start();
+ igniteThread.join();
+ }
+ }
+
+ /**
+ * Fills cache with data and returns it.
+ *
+ * @param ignite Ignite instance.
+ * @return Filled Ignite Cache.
+ */
+ private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) {
+ CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>();
+ cacheConfiguration.setName("TEST_" + UUID.randomUUID());
+ cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10));
+
+ IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration);
+
+ for (int i = 0; i < data.length; i++)
+ cache.put(i, data[i]);
+
+ return cache;
+ }
+
+ /** The Iris dataset. */
+ private static final double[][] data = {
+ {0, 5.1, 3.5, 1.4, 0.2},
+ {0, 4.9, 3, 1.4, 0.2},
+ {0, 4.7, 3.2, 1.3, 0.2},
+ {0, 4.6, 3.1, 1.5, 0.2},
+ {0, 5, 3.6, 1.4, 0.2},
+ {0, 5.4, 3.9, 1.7, 0.4},
+ {0, 4.6, 3.4, 1.4, 0.3},
+ {0, 5, 3.4, 1.5, 0.2},
+ {0, 4.4, 2.9, 1.4, 0.2},
+ {0, 4.9, 3.1, 1.5, 0.1},
+ {0, 5.4, 3.7, 1.5, 0.2},
+ {0, 4.8, 3.4, 1.6, 0.2},
+ {0, 4.8, 3, 1.4, 0.1},
+ {0, 4.3, 3, 1.1, 0.1},
+ {0, 5.8, 4, 1.2, 0.2},
+ {0, 5.7, 4.4, 1.5, 0.4},
+ {0, 5.4, 3.9, 1.3, 0.4},
+ {0, 5.1, 3.5, 1.4, 0.3},
+ {0, 5.7, 3.8, 1.7, 0.3},
+ {0, 5.1, 3.8, 1.5, 0.3},
+ {0, 5.4, 3.4, 1.7, 0.2},
+ {0, 5.1, 3.7, 1.5, 0.4},
+ {0, 4.6, 3.6, 1, 0.2},
+ {0, 5.1, 3.3, 1.7, 0.5},
+ {0, 4.8, 3.4, 1.9, 0.2},
+ {0, 5, 3, 1.6, 0.2},
+ {0, 5, 3.4, 1.6, 0.4},
+ {0, 5.2, 3.5, 1.5, 0.2},
+ {0, 5.2, 3.4, 1.4, 0.2},
+ {0, 4.7, 3.2, 1.6, 0.2},
+ {0, 4.8, 3.1, 1.6, 0.2},
+ {0, 5.4, 3.4, 1.5, 0.4},
+ {0, 5.2, 4.1, 1.5, 0.1},
+ {0, 5.5, 4.2, 1.4, 0.2},
+ {0, 4.9, 3.1, 1.5, 0.1},
+ {0, 5, 3.2, 1.2, 0.2},
+ {0, 5.5, 3.5, 1.3, 0.2},
+ {0, 4.9, 3.1, 1.5, 0.1},
+ {0, 4.4, 3, 1.3, 0.2},
+ {0, 5.1, 3.4, 1.5, 0.2},
+ {0, 5, 3.5, 1.3, 0.3},
+ {0, 4.5, 2.3, 1.3, 0.3},
+ {0, 4.4, 3.2, 1.3, 0.2},
+ {0, 5, 3.5, 1.6, 0.6},
+ {0, 5.1, 3.8, 1.9, 0.4},
+ {0, 4.8, 3, 1.4, 0.3},
+ {0, 5.1, 3.8, 1.6, 0.2},
+ {0, 4.6, 3.2, 1.4, 0.2},
+ {0, 5.3, 3.7, 1.5, 0.2},
+ {0, 5, 3.3, 1.4, 0.2},
+ {1, 7, 3.2, 4.7, 1.4},
+ {1, 6.4, 3.2, 4.5, 1.5},
+ {1, 6.9, 3.1, 4.9, 1.5},
+ {1, 5.5, 2.3, 4, 1.3},
+ {1, 6.5, 2.8, 4.6, 1.5},
+ {1, 5.7, 2.8, 4.5, 1.3},
+ {1, 6.3, 3.3, 4.7, 1.6},
+ {1, 4.9, 2.4, 3.3, 1},
+ {1, 6.6, 2.9, 4.6, 1.3},
+ {1, 5.2, 2.7, 3.9, 1.4},
+ {1, 5, 2, 3.5, 1},
+ {1, 5.9, 3, 4.2, 1.5},
+ {1, 6, 2.2, 4, 1},
+ {1, 6.1, 2.9, 4.7, 1.4},
+ {1, 5.6, 2.9, 3.6, 1.3},
+ {1, 6.7, 3.1, 4.4, 1.4},
+ {1, 5.6, 3, 4.5, 1.5},
+ {1, 5.8, 2.7, 4.1, 1},
+ {1, 6.2, 2.2, 4.5, 1.5},
+ {1, 5.6, 2.5, 3.9, 1.1},
+ {1, 5.9, 3.2, 4.8, 1.8},
+ {1, 6.1, 2.8, 4, 1.3},
+ {1, 6.3, 2.5, 4.9, 1.5},
+ {1, 6.1, 2.8, 4.7, 1.2},
+ {1, 6.4, 2.9, 4.3, 1.3},
+ {1, 6.6, 3, 4.4, 1.4},
+ {1, 6.8, 2.8, 4.8, 1.4},
+ {1, 6.7, 3, 5, 1.7},
+ {1, 6, 2.9, 4.5, 1.5},
+ {1, 5.7, 2.6, 3.5, 1},
+ {1, 5.5, 2.4, 3.8, 1.1},
+ {1, 5.5, 2.4, 3.7, 1},
+ {1, 5.8, 2.7, 3.9, 1.2},
+ {1, 6, 2.7, 5.1, 1.6},
+ {1, 5.4, 3, 4.5, 1.5},
+ {1, 6, 3.4, 4.5, 1.6},
+ {1, 6.7, 3.1, 4.7, 1.5},
+ {1, 6.3, 2.3, 4.4, 1.3},
+ {1, 5.6, 3, 4.1, 1.3},
+ {1, 5.5, 2.5, 4, 1.3},
+ {1, 5.5, 2.6, 4.4, 1.2},
+ {1, 6.1, 3, 4.6, 1.4},
+ {1, 5.8, 2.6, 4, 1.2},
+ {1, 5, 2.3, 3.3, 1},
+ {1, 5.6, 2.7, 4.2, 1.3},
+ {1, 5.7, 3, 4.2, 1.2},
+ {1, 5.7, 2.9, 4.2, 1.3},
+ {1, 6.2, 2.9, 4.3, 1.3},
+ {1, 5.1, 2.5, 3, 1.1},
+ {1, 5.7, 2.8, 4.1, 1.3},
+ };
+}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java
deleted file mode 100644
index f8709e6..0000000
--- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansDistributedClustererExample.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.examples.ml.clustering;
-
-import java.util.Arrays;
-import java.util.List;
-import org.apache.ignite.Ignite;
-import org.apache.ignite.Ignition;
-import org.apache.ignite.examples.ExampleNodeStartup;
-import org.apache.ignite.examples.ml.math.matrix.SparseDistributedMatrixExample;
-import org.apache.ignite.ml.clustering.KMeansDistributedClusterer;
-import org.apache.ignite.ml.math.StorageConstants;
-import org.apache.ignite.ml.math.Tracer;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix;
-import org.apache.ignite.thread.IgniteThread;
-
-/**
- * <p>
- * Example of using {@link KMeansDistributedClusterer}.</p>
- * <p>
- * Note that in this example we cannot guarantee order in which nodes return results of intermediate
- * computations and therefore algorithm can return different results.</p>
- * <p>
- * Remote nodes should always be started with special configuration file which
- * enables P2P class loading: {@code 'ignite.{sh|bat} examples/config/example-ignite.xml'}.</p>
- * <p>
- * Alternatively you can run {@link ExampleNodeStartup} in another JVM which will start node
- * with {@code examples/config/example-ignite.xml} configuration.</p>
- */
-public class KMeansDistributedClustererExample {
- /**
- * Executes example.
- *
- * @param args Command line arguments, none required.
- */
- public static void main(String[] args) throws InterruptedException {
- // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
- System.out.println(">>> K-means distributed clusterer example started.");
-
- // Start ignite grid.
- try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
- System.out.println(">>> Ignite grid started.");
-
- // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
- // because we create ignite cache internally.
- IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
- SparseDistributedMatrixExample.class.getSimpleName(), () -> {
-
- int ptsCnt = 10000;
-
- SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2,
- StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
-
- DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
-
- List<Vector> massCenters = dataset.generate(points);
-
- EuclideanDistance dist = new EuclideanDistance();
-
- KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
-
- Vector[] resCenters = clusterer.cluster(points, 4).centers();
-
- System.out.println("Mass centers:");
- massCenters.forEach(Tracer::showAscii);
-
- System.out.println("Cluster centers:");
- Arrays.asList(resCenters).forEach(Tracer::showAscii);
-
- points.destroy();
-
- System.out.println("\n>>> K-means distributed clusterer example completed.");
- });
-
- igniteThread.start();
-
- igniteThread.join();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansLocalClustererExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansLocalClustererExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansLocalClustererExample.java
deleted file mode 100644
index 28ca9d9..0000000
--- a/examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansLocalClustererExample.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.examples.ml.clustering;
-
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
-import org.apache.ignite.ml.clustering.KMeansLocalClusterer;
-import org.apache.ignite.ml.clustering.KMeansModel;
-import org.apache.ignite.ml.math.Tracer;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.functions.Functions;
-import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
-
-/**
- * Example of using {@link KMeansLocalClusterer}.
- */
-public class KMeansLocalClustererExample {
- /**
- * Executes example.
- *
- * @param args Command line arguments, none required.
- */
- public static void main(String[] args) {
- // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
- System.out.println(">>> K-means local clusterer example started.");
-
- int ptsCnt = 10000;
- DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
-
- DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
-
- List<Vector> massCenters = dataset.generate(points);
-
- EuclideanDistance dist = new EuclideanDistance();
- OrderedNodesComparator comp = new OrderedNodesComparator(
- dataset.centers().values().toArray(new Vector[] {}), dist);
-
- massCenters.sort(comp);
-
- KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
-
- KMeansModel mdl = clusterer.cluster(points, 4);
- Vector[] resCenters = mdl.centers();
- Arrays.sort(resCenters, comp);
-
- System.out.println("Mass centers:");
- massCenters.forEach(Tracer::showAscii);
-
- System.out.println("Cluster centers:");
- Arrays.asList(resCenters).forEach(Tracer::showAscii);
-
- System.out.println("\n>>> K-means local clusterer example completed.");
- }
-
- /** */
- private static class OrderedNodesComparator implements Comparator<Vector> {
- /** */
- private final DistanceMeasure measure;
-
- /** */
- List<Vector> orderedNodes;
-
- /** */
- OrderedNodesComparator(Vector[] orderedNodes, DistanceMeasure measure) {
- this.orderedNodes = Arrays.asList(orderedNodes);
- this.measure = measure;
- }
-
- /** */
- private int findClosestNodeIndex(Vector v) {
- return Functions.argmin(orderedNodes, v1 -> measure.compute(v1, v)).get1();
- }
-
- /** */
- @Override public int compare(Vector v1, Vector v2) {
- int ind1 = findClosestNodeIndex(v1);
- int ind2 = findClosestNodeIndex(v2);
-
- int signum = (int)Math.signum(ind1 - ind2);
-
- if (signum != 0)
- return signum;
-
- return (int)Math.signum(orderedNodes.get(ind1).minus(v1).kNorm(2) -
- orderedNodes.get(ind2).minus(v2).kNorm(2));
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/FuzzyCMeansModelFormat.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/FuzzyCMeansModelFormat.java b/modules/ml/src/main/java/org/apache/ignite/ml/FuzzyCMeansModelFormat.java
deleted file mode 100644
index cc3d9b3..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/FuzzyCMeansModelFormat.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-
-/** Fuzzy C-Means model representation. */
-public class FuzzyCMeansModelFormat implements Serializable {
- /** Centers of clusters. */
- private final Vector[] centers;
-
- /** Distance measure. */
- private final DistanceMeasure measure;
-
- /**
- * Constructor that retains result of clusterization and distance measure.
- *
- * @param centers Centers found while clusterization.
- * @param measure Distance measure.
- */
- public FuzzyCMeansModelFormat(Vector[] centers, DistanceMeasure measure) {
- this.centers = centers;
- this.measure = measure;
- }
-
- /** Distance measure used while clusterization. */
- public DistanceMeasure getMeasure() {
- return measure;
- }
-
- /** Get cluster centers. */
- public Vector[] getCenters() {
- return centers;
- }
-
- /** {@inheritDoc} */
- @Override public int hashCode() {
- int res = 1;
-
- res = res * 37 + measure.hashCode();
- res = res * 37 + Arrays.hashCode(centers);
-
- return res;
- }
-
- /** {@inheritDoc} */
- @Override public boolean equals(Object obj) {
- if (this == obj)
- return true;
-
- if (obj == null || getClass() != obj.getClass())
- return false;
-
- FuzzyCMeansModelFormat that = (FuzzyCMeansModelFormat) obj;
-
- return measure.equals(that.measure) && Arrays.deepEquals(centers, that.centers);
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/KMeansModelFormat.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/KMeansModelFormat.java b/modules/ml/src/main/java/org/apache/ignite/ml/KMeansModelFormat.java
deleted file mode 100644
index c013198..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/KMeansModelFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-
-/**
- * K-means model representation.
- *
- * @see Exportable
- * @see Exporter
- */
-public class KMeansModelFormat implements Serializable {
- /** Centers of clusters. */
- private final Vector[] centers;
-
- /** Distance measure. */
- private final DistanceMeasure distance;
-
- /** */
- public KMeansModelFormat(Vector[] centers, DistanceMeasure distance) {
- this.centers = centers;
- this.distance = distance;
- }
-
- /** */
- public DistanceMeasure getDistance() {
- return distance;
- }
-
- /** */
- public Vector[] getCenters() {
- return centers;
- }
-
- /** {@inheritDoc} */
- @Override public int hashCode() {
- int res = 1;
-
- res = res * 37 + distance.hashCode();
- res = res * 37 + Arrays.hashCode(centers);
-
- return res;
- }
-
- /** {@inheritDoc} */
- @Override public boolean equals(Object obj) {
- if (this == obj)
- return true;
-
- if (obj == null || getClass() != obj.getClass())
- return false;
-
- KMeansModelFormat that = (KMeansModelFormat)obj;
-
- return distance.equals(that.distance) && Arrays.deepEquals(centers, that.centers);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseFuzzyCMeansClusterer.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseFuzzyCMeansClusterer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseFuzzyCMeansClusterer.java
deleted file mode 100644
index 2b2febf..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseFuzzyCMeansClusterer.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import org.apache.ignite.ml.math.Matrix;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.exceptions.ConvergenceException;
-import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException;
-
-/** The abstract class that defines the basic interface of Fuzzy C-Means clusterers */
-public abstract class BaseFuzzyCMeansClusterer<T extends Matrix> implements Clusterer<T, FuzzyCMeansModel> {
- /** Distance measure. */
- protected DistanceMeasure measure;
-
- /** Specific constant which is used in calculating of membership matrix. */
- protected double exponentialWeight;
-
- /** The maximum distance between old and new centers or the maximum difference between new and old membership matrix
- * elements for which algorithm must stop. */
- protected double maxDelta;
-
- /** The flag that tells when algorithm should stop. */
- protected StopCondition stopCond;
-
- /**
- * Constructor that stores some required parameters.
- *
- * @param measure Distance measure.
- * @param exponentialWeight Specific constant which is used in calculating of membership matrix.
- * @param stopCond Flag that tells when algorithm should stop.
- * @param maxDelta The maximum distance between old and new centers or maximum difference between new and old
- * membership matrix elements for which algorithm must stop.
- */
- protected BaseFuzzyCMeansClusterer(DistanceMeasure measure, double exponentialWeight, StopCondition stopCond,
- double maxDelta) {
- this.measure = measure;
- this.exponentialWeight = exponentialWeight;
- this.stopCond = stopCond;
- this.maxDelta = maxDelta;
- }
-
- /**
- * Perform a cluster analysis on the given set of points.
- *
- * @param points The set of points.
- * @return A list of clusters.
- * @throws MathIllegalArgumentException If points are null or the number of data points is not compatible with this
- * clusterer.
- * @throws ConvergenceException If the algorithm has not yet converged after the maximum number of iterations has
- * been exceeded.
- */
- public abstract FuzzyCMeansModel cluster(T points, int k);
-
- /**
- * Calculates the distance between two vectors. * with the configured {@link DistanceMeasure}.
- *
- * @return The distance between two points.
- */
- protected double distance(final Vector v1, final Vector v2) {
- return measure.compute(v1, v2);
- }
-
- /** Enumeration that contains different conditions under which algorithm must stop. */
- public enum StopCondition {
- /** Algorithm stops if the maximum distance between new and old centers is less than {@link #maxDelta}. */
- STABLE_CENTERS,
-
- /**
- * Algorithm stops if the maximum difference between elements of new and old membership matrix is less than
- * {@link #maxDelta}.
- */
- STABLE_MEMBERSHIPS
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseKMeansClusterer.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseKMeansClusterer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseKMeansClusterer.java
deleted file mode 100644
index 521437c..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/BaseKMeansClusterer.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import java.util.List;
-import org.apache.ignite.lang.IgniteBiTuple;
-import org.apache.ignite.ml.math.Matrix;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.exceptions.ConvergenceException;
-import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException;
-
-/**
- * This class is partly based on the corresponding class from Apache Common Math lib.
- */
-public abstract class BaseKMeansClusterer<T extends Matrix> implements Clusterer<T, KMeansModel> {
- /** The distance measure to use. */
- private DistanceMeasure measure;
-
- /**
- * Build a new clusterer with the given {@link DistanceMeasure}.
- *
- * @param measure the distance measure to use
- */
- protected BaseKMeansClusterer(final DistanceMeasure measure) {
- this.measure = measure;
- }
-
- /**
- * Perform a cluster analysis on the given set of points.
- *
- * @param points the set of points
- * @return a {@link List} of clusters
- * @throws MathIllegalArgumentException if points are null or the number of data points is not compatible with this
- * clusterer
- * @throws ConvergenceException if the algorithm has not yet converged after the maximum number of iterations has
- * been exceeded
- */
- public abstract KMeansModel cluster(T points, int k)
- throws MathIllegalArgumentException, ConvergenceException;
-
- /**
- * Returns the {@link DistanceMeasure} instance used by this clusterer.
- *
- * @return the distance measure
- */
- public DistanceMeasure getDistanceMeasure() {
- return measure;
- }
-
- /**
- * Calculates the distance between two vectors.
- * with the configured {@link DistanceMeasure}.
- *
- * @return the distance between the two clusterables
- */
- protected double distance(final Vector v1, final Vector v2) {
- return measure.compute(v1, v2);
- }
-
- /**
- * Find the closest cluster center index and distance to it from a given point.
- *
- * @param centers Centers to look in.
- * @param pnt Point.
- */
- protected IgniteBiTuple<Integer, Double> findClosest(Vector[] centers, Vector pnt) {
- double bestDistance = Double.POSITIVE_INFINITY;
- int bestInd = 0;
-
- for (int i = 0; i < centers.length; i++) {
- double dist = distance(centers[i], pnt);
- if (dist < bestDistance) {
- bestDistance = dist;
- bestInd = i;
- }
- }
-
- return new IgniteBiTuple<>(bestInd, bestDistance);
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/Clusterer.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/Clusterer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/Clusterer.java
deleted file mode 100644
index 204e28a..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/Clusterer.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import org.apache.ignite.ml.Model;
-
-/**
- * Base interface for clusterers.
- */
-public interface Clusterer<P, M extends Model> {
- /**
- * Cluster given points set into k clusters.
- *
- * @param points Points set.
- * @param k Clusters count.
- */
- public M cluster(P points, int k);
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/ClusterizationModel.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/ClusterizationModel.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/ClusterizationModel.java
deleted file mode 100644
index 99afec5..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/ClusterizationModel.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import org.apache.ignite.ml.Model;
-
-/** Base interface for all clusterization models. */
-public interface ClusterizationModel<P, V> extends Model<P, V> {
- /** Gets the clusters count. */
- public int clustersCount();
-
- /** Get cluster centers. */
- public P[] centers();
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClusterer.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClusterer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClusterer.java
deleted file mode 100644
index 8823c10..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansDistributedClusterer.java
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.UUID;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.stream.Collectors;
-import javax.cache.Cache;
-import org.apache.ignite.internal.util.GridArgumentCheck;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.VectorUtils;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.distributed.CacheUtils;
-import org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey;
-import org.apache.ignite.ml.math.exceptions.ConvergenceException;
-import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException;
-import org.apache.ignite.ml.math.functions.Functions;
-import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.math.functions.IgniteSupplier;
-import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
-import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix;
-import org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-import org.apache.ignite.ml.math.util.MatrixUtil;
-
-/** This class implements distributed version of Fuzzy C-Means clusterization of equal-weighted points. */
-public class FuzzyCMeansDistributedClusterer extends BaseFuzzyCMeansClusterer<SparseDistributedMatrix> {
- /** Random numbers generator which is used in centers selection. */
- private Random rnd;
-
- /** The value that is used to initialize random numbers generator. */
- private long seed;
-
- /** The number of initialization steps each of which adds some number of candidates for being a center. */
- private int initSteps;
-
- /** The maximum number of iterations of K-Means algorithm which selects the required number of centers. */
- private int kMeansMaxIterations;
-
- /** The maximum number of FCM iterations. */
- private int cMeansMaxIterations;
-
- /**
- * Constructor that retains all required parameters.
- *
- * @param measure Distance measure.
- * @param exponentialWeight Specific constant which is used in calculating of membership matrix.
- * @param stopCond Flag that tells when algorithm should stop.
- * @param maxDelta The maximum distance between old and new centers or maximum difference between new and old
- * membership matrix elements for which algorithm must stop.
- * @param cMeansMaxIterations The maximum number of FCM iterations.
- * @param seed Seed for random numbers generator.
- * @param initSteps Number of steps of primary centers selection (the more steps, the more candidates).
- * @param kMeansMaxIterations The maximum number of K-Means iteration in primary centers selection.
- */
- public FuzzyCMeansDistributedClusterer(DistanceMeasure measure, double exponentialWeight,
- StopCondition stopCond, double maxDelta, int cMeansMaxIterations,
- Long seed, int initSteps, int kMeansMaxIterations) {
- super(measure, exponentialWeight, stopCond, maxDelta);
-
- this.seed = seed != null ? seed : new Random().nextLong();
- this.initSteps = initSteps;
- this.cMeansMaxIterations = cMeansMaxIterations;
- this.kMeansMaxIterations = kMeansMaxIterations;
- rnd = new Random(this.seed);
- }
-
- /** {@inheritDoc} */
- @Override public FuzzyCMeansModel cluster(SparseDistributedMatrix points, int k)
- throws MathIllegalArgumentException, ConvergenceException {
- GridArgumentCheck.notNull(points, "points");
-
- if (k < 2)
- throw new MathIllegalArgumentException("The number of clusters is less than 2");
-
- Vector[] centers = initializeCenters(points, k);
-
- MembershipsAndSums membershipsAndSums = null;
-
- int iteration = 0;
- boolean finished = false;
- while (!finished && iteration < cMeansMaxIterations) {
- MembershipsAndSums newMembershipsAndSums = calculateMembership(points, centers);
- Vector[] newCenters = calculateNewCenters(points, newMembershipsAndSums, k);
-
- if (stopCond == StopCondition.STABLE_CENTERS)
- finished = isFinished(centers, newCenters);
- else
- finished = isFinished(membershipsAndSums, newMembershipsAndSums);
-
- centers = newCenters;
- membershipsAndSums = newMembershipsAndSums;
-
- iteration++;
- }
-
- if (iteration == cMeansMaxIterations)
- throw new ConvergenceException("Fuzzy C-Means algorithm has not converged after " +
- Integer.toString(iteration) + " iterations");
-
- return new FuzzyCMeansModel(centers, measure);
- }
-
- /**
- * Choose k primary centers from source points.
- *
- * @param points Matrix with source points.
- * @param k Number of centers.
- * @return Array of primary centers.
- */
- private Vector[] initializeCenters(SparseDistributedMatrix points, int k) {
- int pointsNum = points.rowSize();
-
- Vector firstCenter = points.viewRow(rnd.nextInt(pointsNum));
-
- List<Vector> centers = new ArrayList<>();
- List<Vector> newCenters = new ArrayList<>();
-
- centers.add(firstCenter);
- newCenters.add(firstCenter);
-
- ConcurrentHashMap<Integer, Double> costs = new ConcurrentHashMap<>();
-
- int step = 0;
- UUID uuid = points.getUUID();
- String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
-
- while(step < initSteps) {
- ConcurrentHashMap<Integer, Double> newCosts = getNewCosts(cacheName, uuid, newCenters);
-
- for (Integer key : newCosts.keySet())
- costs.merge(key, newCosts.get(key), Math::min);
-
- double costsSum = costs.values().stream().mapToDouble(Double::valueOf).sum();
-
- newCenters = getNewCenters(cacheName, uuid, costs, costsSum, k);
- centers.addAll(newCenters);
-
- step++;
- }
-
- return chooseKCenters(cacheName, uuid, centers, k);
- }
-
- /**
- * Calculate new distances from each point to the nearest center.
- *
- * @param cacheName Cache name of point matrix.
- * @param uuid Uuid of point matrix.
- * @param newCenters The list of centers that was added on previous step.
- * @return Hash map with distances.
- */
- private ConcurrentHashMap<Integer, Double> getNewCosts(String cacheName, UUID uuid,
- List<Vector> newCenters) {
- return CacheUtils.distributedFold(cacheName,
- (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>,
- ConcurrentHashMap<Integer, Double>,
- ConcurrentHashMap<Integer, Double>>)(vectorWithIndex, map) -> {
- Vector vector = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
-
- for (Vector center : newCenters)
- map.merge(vectorWithIndex.getKey().index(), distance(vector, center), Functions.MIN);
-
- return map;
- },
- key -> key.dataStructureId().equals(uuid),
- (map1, map2) -> {
- map1.putAll(map2);
- return map1;
- },
- ConcurrentHashMap::new);
- }
-
- /**
- * Choose some number of center candidates from source points according to their costs.
- *
- * @param cacheName Cache name of point matrix.
- * @param uuid Uuid of point matrix.
- * @param costs Hash map with costs (distances to nearest center).
- * @param costsSum The sum of costs.
- * @param k The estimated number of centers.
- * @return The list of new candidates.
- */
- private List<Vector> getNewCenters(String cacheName, UUID uuid,
- ConcurrentHashMap<Integer, Double> costs, double costsSum, int k) {
- return CacheUtils.distributedFold(cacheName,
- (IgniteBiFunction<Cache.Entry<SparseMatrixKey, Map<Integer, Double>>,
- List<Vector>,
- List<Vector>>)(vectorWithIndex, centers) -> {
- Integer idx = vectorWithIndex.getKey().index();
- Vector vector = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
-
- double probability = (costs.get(idx) * 2.0 * k) / costsSum;
-
- if (rnd.nextDouble() < probability)
- centers.add(vector);
-
- return centers;
- },
- key -> key.dataStructureId().equals(uuid),
- (list1, list2) -> {
- list1.addAll(list2);
- return list1;
- },
- ArrayList::new);
- }
-
- /**
- * Weight candidates and use K-Means to choose required number of them.
- *
- * @param cacheName Cache name of the point matrix.
- * @param uuid Uuid of the point matrix.
- * @param centers The list of candidates.
- * @param k The estimated number of centers.
- * @return {@code k} centers.
- */
- private Vector[] chooseKCenters(String cacheName, UUID uuid, List<Vector> centers, int k) {
- centers = centers.stream().distinct().collect(Collectors.toList());
-
- ConcurrentHashMap<Integer, Integer> weightsMap = weightCenters(cacheName, uuid, centers);
-
- List<Double> weights = new ArrayList<>(centers.size());
-
- for (int i = 0; i < centers.size(); i++)
- weights.add(i, Double.valueOf(weightsMap.getOrDefault(i, 0)));
-
- DenseLocalOnHeapMatrix centersMatrix = MatrixUtil.fromList(centers, true);
-
- KMeansLocalClusterer clusterer = new KMeansLocalClusterer(measure, kMeansMaxIterations, seed);
- return clusterer.cluster(centersMatrix, k, weights).centers();
- }
-
- /**
- * Weight each center with number of points for which this center is the nearest.
- *
- * @param cacheName Cache name of the point matrix.
- * @param uuid Uuid of the point matrix.
- * @param centers The list of centers.
- * @return Hash map with weights.
- */
- public ConcurrentHashMap<Integer, Integer> weightCenters(String cacheName, UUID uuid, List<Vector> centers) {
- if (centers.size() == 0)
- return new ConcurrentHashMap<>();
-
- return CacheUtils.distributedFold(cacheName,
- (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>,
- ConcurrentHashMap<Integer, Integer>,
- ConcurrentHashMap<Integer, Integer>>)(vectorWithIndex, counts) -> {
- Vector vector = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
-
- int nearest = 0;
- double minDistance = distance(centers.get(nearest), vector);
-
- for (int i = 0; i < centers.size(); i++) {
- double currDistance = distance(centers.get(i), vector);
- if (currDistance < minDistance) {
- minDistance = currDistance;
- nearest = i;
- }
- }
-
- counts.compute(nearest, (index, value) -> value == null ? 1 : value + 1);
-
- return counts;
- },
- key -> key.dataStructureId().equals(uuid),
- (map1, map2) -> {
- map1.putAll(map2);
- return map1;
- },
- ConcurrentHashMap::new);
- }
-
- /**
- * Calculate matrix of membership coefficients for each point and each center.
- *
- * @param points Matrix with source points.
- * @param centers Array of current centers.
- * @return Membership matrix and sums of membership coefficients for each center.
- */
- private MembershipsAndSums calculateMembership(SparseDistributedMatrix points, Vector[] centers) {
- String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
- UUID uuid = points.getUUID();
- double fuzzyMembershipCoefficient = 2 / (exponentialWeight - 1);
-
- MembershipsAndSumsSupplier supplier = new MembershipsAndSumsSupplier(centers.length);
-
- return CacheUtils.distributedFold(cacheName,
- (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>,
- MembershipsAndSums,
- MembershipsAndSums>)(vectorWithIndex, membershipsAndSums) -> {
- Integer idx = vectorWithIndex.getKey().index();
- Vector pnt = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
- Vector distances = new DenseLocalOnHeapVector(centers.length);
- Vector pntMemberships = new DenseLocalOnHeapVector(centers.length);
-
- for (int i = 0; i < centers.length; i++)
- distances.setX(i, distance(centers[i], pnt));
-
- for (int i = 0; i < centers.length; i++) {
- double invertedFuzzyWeight = 0.0;
-
- for (int j = 0; j < centers.length; j++) {
- double val = Math.pow(distances.getX(i) / distances.getX(j), fuzzyMembershipCoefficient);
- if (Double.isNaN(val))
- val = 1.0;
-
- invertedFuzzyWeight += val;
- }
-
- double membership = Math.pow(1.0 / invertedFuzzyWeight, exponentialWeight);
- pntMemberships.setX(i, membership);
- }
-
- membershipsAndSums.memberships.put(idx, pntMemberships);
- membershipsAndSums.membershipSums = membershipsAndSums.membershipSums.plus(pntMemberships);
-
- return membershipsAndSums;
- },
- key -> key.dataStructureId().equals(uuid),
- (mem1, mem2) -> {
- mem1.merge(mem2);
- return mem1;
- },
- supplier);
- }
-
- /**
- * Calculate new centers according to membership matrix.
- *
- * @param points Matrix with source points.
- * @param membershipsAndSums Membership matrix and sums of membership coefficient for each center.
- * @param k The number of centers.
- * @return Array of new centers.
- */
- private Vector[] calculateNewCenters(SparseDistributedMatrix points, MembershipsAndSums membershipsAndSums, int k) {
- String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
- UUID uuid = points.getUUID();
-
- CentersArraySupplier supplier = new CentersArraySupplier(k, points.columnSize());
-
- Vector[] centers = CacheUtils.distributedFold(cacheName,
- (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>,
- Vector[],
- Vector[]>)(vectorWithIndex, centerSums) -> {
- Integer idx = vectorWithIndex.getKey().index();
- Vector pnt = MatrixUtil.localCopyOf(VectorUtils.fromMap(vectorWithIndex.getValue(), false));
- Vector pntMemberships = membershipsAndSums.memberships.get(idx);
-
- for (int i = 0; i < k; i++) {
- Vector weightedPnt = pnt.times(pntMemberships.getX(i));
- centerSums[i] = centerSums[i].plus(weightedPnt);
- }
-
- return centerSums;
- },
- key -> key.dataStructureId().equals(uuid),
- (sums1, sums2) -> {
- for (int i = 0; i < k; i++)
- sums1[i] = sums1[i].plus(sums2[i]);
-
- return sums1;
- },
- supplier);
-
- for (int i = 0; i < k; i++)
- centers[i] = centers[i].divide(membershipsAndSums.membershipSums.getX(i));
-
- return centers;
- }
-
- /**
- * Check if centers have moved insignificantly.
- *
- * @param centers Old centers.
- * @param newCenters New centers.
- * @return The result of comparison.
- */
- private boolean isFinished(Vector[] centers, Vector[] newCenters) {
- int numCenters = centers.length;
-
- for (int i = 0; i < numCenters; i++)
- if (distance(centers[i], newCenters[i]) > maxDelta)
- return false;
-
- return true;
- }
-
- /**
- * Check memberships difference.
- *
- * @param membershipsAndSums Old memberships.
- * @param newMembershipsAndSums New memberships.
- * @return The result of comparison.
- */
- private boolean isFinished(MembershipsAndSums membershipsAndSums, MembershipsAndSums newMembershipsAndSums) {
- if (membershipsAndSums == null)
- return false;
-
- double currMaxDelta = 0.0;
- for (Integer key : membershipsAndSums.memberships.keySet()) {
- double distance = measure.compute(membershipsAndSums.memberships.get(key),
- newMembershipsAndSums.memberships.get(key));
- if (distance > currMaxDelta)
- currMaxDelta = distance;
- }
-
- return currMaxDelta <= maxDelta;
- }
-
- /** Service class used to optimize counting of membership sums. */
- private class MembershipsAndSums {
- /** Membership matrix. */
- public ConcurrentHashMap<Integer, Vector> memberships = new ConcurrentHashMap<>();
-
- /** Membership sums. */
- public Vector membershipSums;
-
- /**
- * Default constructor.
- *
- * @param k The number of centers.
- */
- public MembershipsAndSums(int k) {
- membershipSums = new DenseLocalOnHeapVector(k);
- }
-
- /**
- * Merge results of calculation for different parts of points.
- * @param another Another part of memberships and sums.
- */
- public void merge(MembershipsAndSums another) {
- memberships.putAll(another.memberships);
- membershipSums = membershipSums.plus(another.membershipSums);
- }
- }
-
- /** Service class that is used to create new {@link MembershipsAndSums} instances. */
- private class MembershipsAndSumsSupplier implements IgniteSupplier<MembershipsAndSums> {
- /** The number of centers */
- int k;
-
- /**
- * Constructor that retains the number of centers.
- *
- * @param k The number of centers.
- */
- public MembershipsAndSumsSupplier(int k) {
- this.k = k;
- }
-
- /**
- * Create new instance of {@link MembershipsAndSums}.
- *
- * @return {@link MembershipsAndSums} object.
- */
- @Override public MembershipsAndSums get() {
- return new MembershipsAndSums(k);
- }
- }
-
- /** Service class that is used to create new arrays of vectors. */
- private class CentersArraySupplier implements IgniteSupplier<Vector[]> {
- /** The number of centers. */
- int k;
-
- /** The number of coordinates. */
- int dim;
-
- /**
- * Constructor that retains all required parameters.
- *
- * @param k The number of centers.
- * @param dim The number of coordinates.
- */
- public CentersArraySupplier(int k, int dim) {
- this.k = k;
- this.dim = dim;
- }
-
- /**
- * Create new array of vectors.
- *
- * @return Array of vectors.
- */
- @Override public Vector[] get() {
- DenseLocalOnHeapVector[] centerSumsArr = new DenseLocalOnHeapVector[k];
- for (int i = 0; i < k; i++)
- centerSumsArr[i] = new DenseLocalOnHeapVector(dim);
- return centerSumsArr;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClusterer.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClusterer.java b/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClusterer.java
deleted file mode 100644
index a1b6d3f..0000000
--- a/modules/ml/src/main/java/org/apache/ignite/ml/clustering/FuzzyCMeansLocalClusterer.java
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-import org.apache.ignite.internal.util.GridArgumentCheck;
-import org.apache.ignite.ml.math.Matrix;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.exceptions.ConvergenceException;
-import org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException;
-import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-
-/** Implements the local version of Fuzzy C-Means algorithm for weighted points. */
-public class FuzzyCMeansLocalClusterer extends BaseFuzzyCMeansClusterer<DenseLocalOnHeapMatrix> implements
- WeightedClusterer<DenseLocalOnHeapMatrix, FuzzyCMeansModel> {
- /** The maximum number of iterations. */
- private int maxIterations;
-
- /** The random numbers generator that is used to choose primary centers. */
- private Random rnd;
-
- /**
- * Constructor that retains all required parameters.
- *
- * @param measure Distance measure.
- * @param exponentialWeight Specific constant which is used in calculating of membership matrix.
- * @param stopCond Flag that tells when algorithm should stop.
- * @param maxDelta The maximum distance between old and new centers or maximum difference between new and old
- * membership matrix elements for which algorithm must stop.
- * @param maxIterations The maximum number of FCM iterations.
- */
- public FuzzyCMeansLocalClusterer(DistanceMeasure measure, double exponentialWeight, StopCondition stopCond,
- double maxDelta, int maxIterations, Long seed) {
- super(measure, exponentialWeight, stopCond, maxDelta);
- this.maxIterations = maxIterations;
- rnd = seed != null ? new Random(seed) : new Random();
- }
-
- /** {@inheritDoc} */
- @Override public FuzzyCMeansModel cluster(DenseLocalOnHeapMatrix points, int k) {
- List<Double> ones = new ArrayList<>(Collections.nCopies(points.rowSize(), 1.0));
- return cluster(points, k, ones);
- }
-
- /** {@inheritDoc} */
- @Override public FuzzyCMeansModel cluster(DenseLocalOnHeapMatrix points, int k, List<Double> weights)
- throws MathIllegalArgumentException, ConvergenceException {
- GridArgumentCheck.notNull(points, "points");
- GridArgumentCheck.notNull(weights, "weights");
-
- if (points.rowSize() != weights.size())
- throw new MathIllegalArgumentException("The number of points and the number of weights are not equal");
-
- if (k < 2)
- throw new MathIllegalArgumentException("The number of clusters is less than 2");
-
- Matrix centers = new DenseLocalOnHeapMatrix(k, points.columnSize());
- Matrix distances = new DenseLocalOnHeapMatrix(k, points.rowSize());
- Matrix membership = new DenseLocalOnHeapMatrix(k, points.rowSize());
- Vector weightsVector = new DenseLocalOnHeapVector(weights.size());
- for (int i = 0; i < weights.size(); i++)
- weightsVector.setX(i, weights.get(i));
-
- initializeCenters(centers, points, k, weightsVector);
-
- int iteration = 0;
- boolean finished = false;
- while (iteration < maxIterations && !finished) {
- calculateDistances(distances, points, centers);
- Matrix newMembership = calculateMembership(distances, weightsVector);
- Matrix newCenters = calculateNewCenters(points, newMembership);
-
- if (this.stopCond == StopCondition.STABLE_CENTERS)
- finished = areCentersStable(centers, newCenters);
- else
- finished = areMembershipStable(membership, newMembership);
-
- centers = newCenters;
- membership = newMembership;
- iteration++;
- }
-
- if (iteration == maxIterations)
- throw new ConvergenceException("Fuzzy C-Means algorithm has not converged after " +
- Integer.toString(iteration) + " iterations");
-
- Vector[] centersArr = new Vector[k];
- for (int i = 0; i < k; i++)
- centersArr[i] = centers.getRow(i);
-
- return new FuzzyCMeansModel(centersArr, measure);
- }
-
- /**
- * Choose {@code k} centers according to their weights.
- *
- * @param centers Output matrix containing primary centers.
- * @param points Matrix of source points.
- * @param k The number of centers.
- * @param weights Vector of weights.
- */
- private void initializeCenters(Matrix centers, Matrix points, int k, Vector weights) {
- //int dimensions = points.columnSize();
- int numPoints = points.rowSize();
-
- Vector firstCenter = points.viewRow(rnd.nextInt(numPoints));
- centers.setRow(0, firstCenter.getStorage().data());
-
- Vector costs = points.foldRows(vector -> distance(vector, firstCenter));
- costs = costs.times(weights);
-
- double sum = costs.sum();
-
- for (int i = 1; i < k; i++) {
- double probe = rnd.nextDouble() * sum;
- double cntr = 0;
- int id = 0;
-
- for (int j = 0; j < numPoints; j++) {
- cntr += costs.getX(j);
- if (cntr >= probe) {
- id = j;
- break;
- }
- }
-
- centers.setRow(i, points.viewRow(id).getStorage().data());
- sum -= costs.get(id);
- costs.set(id, 0.0);
- }
- }
-
- /**
- * Calculate matrix of distances form each point to each center.
- *
- * @param distances Output matrix.
- * @param points Matrix that contains source points.
- * @param centers Matrix that contains centers.
- */
- private void calculateDistances(Matrix distances, Matrix points, Matrix centers) {
- int numPoints = points.rowSize();
- int numCenters = centers.rowSize();
-
- for (int i = 0; i < numCenters; i++)
- for (int j = 0; j < numPoints; j++)
- distances.set(i, j, distance(centers.viewRow(i), points.viewRow(j)));
- }
-
- /**
- * Calculate membership matrix.
- *
- * @param distances Matrix of distances.
- * @param weights Vector of weights.
- * @
- */
- private Matrix calculateMembership(Matrix distances, Vector weights) {
- Matrix newMembership = new DenseLocalOnHeapMatrix(distances.rowSize(), distances.columnSize());
- int numPoints = distances.columnSize();
- int numCenters = distances.rowSize();
- double fuzzyMembershipCoefficient = 2 / (exponentialWeight - 1);
-
- for (int i = 0; i < numCenters; i++) {
- for (int j = 0; j < numPoints; j++) {
- double invertedFuzzyWeight = 0.0;
-
- for (int k = 0; k < numCenters; k++) {
- double val = Math.pow(distances.get(i, j) / distances.get(k, j),
- fuzzyMembershipCoefficient);
- if (Double.isNaN(val))
- val = 1.0;
-
- invertedFuzzyWeight += val;
- }
-
- double weight = 1.0 / invertedFuzzyWeight * weights.getX(j);
- newMembership.setX(i, j, Math.pow(weight, exponentialWeight));
- }
- }
- return newMembership;
- }
-
- /**
- * Calculate new centers using membership matrix.
- *
- * @param points Matrix of source points.
- * @param membership Matrix that contains membership coefficients.
- * @return Matrix that contains new centers.
- */
- private Matrix calculateNewCenters(Matrix points, Matrix membership) {
- Vector membershipSums = membership.foldRows(Vector::sum);
- Matrix newCenters = membership.times(points);
-
- int numCenters = newCenters.rowSize();
- for (int i = 0; i < numCenters; i++)
- newCenters.viewRow(i).divide(membershipSums.getX(i));
-
- return newCenters;
- }
-
- /**
- * Check if centers have moved insignificantly.
- *
- * @param centers Old centers.
- * @param newCenters New centers.
- * @return The result of comparison.
- */
- private boolean areCentersStable(Matrix centers, Matrix newCenters) {
- int numCenters = centers.rowSize();
- for (int i = 0; i < numCenters; i++)
- if (distance(centers.viewRow(i), newCenters.viewRow(i)) > maxDelta)
- return false;
-
- return true;
- }
-
- /**
- * Check if membership matrix has changed insignificantly.
- *
- * @param membership Old membership matrix.
- * @param newMembership New membership matrix.
- * @return The result of comparison.
- */
- private boolean areMembershipStable(Matrix membership, Matrix newMembership) {
- int numCenters = membership.rowSize();
- int numPoints = membership.columnSize();
-
- for (int i = 0; i < numCenters; i++)
- for (int j = 0; j < numPoints; j++)
- if (Math.abs(newMembership.getX(i, j) - membership.getX(i, j)) > maxDelta)
- return false;
-
- return true;
- }
-}