You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ak...@apache.org on 2023/02/03 18:48:47 UTC

[mahout] branch trunk updated: Fixes 2147

This is an automated email from the ASF dual-hosted git repository.

akm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/mahout.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 3a217f4b4 Fixes 2147
     new e91c6241c Merge pull request #421 from rawkintrevo/2147
3a217f4b4 is described below

commit 3a217f4b47c98c7189eb3b193340d8775797c8e5
Author: Trevor Grant <tr...@gmail.com>
AuthorDate: Fri Feb 3 12:39:15 2023 -0600

    Fixes 2147
---
 .../mahout/math/algorithms/clustering/Canopy.scala | 58 ++++++++++++++++++++--
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/mahout/math/algorithms/clustering/Canopy.scala b/core/src/main/scala/org/apache/mahout/math/algorithms/clustering/Canopy.scala
index 81fef41d7..65a34cf5a 100644
--- a/core/src/main/scala/org/apache/mahout/math/algorithms/clustering/Canopy.scala
+++ b/core/src/main/scala/org/apache/mahout/math/algorithms/clustering/Canopy.scala
@@ -28,12 +28,28 @@ import org.apache.mahout.math.scalabindings._
 import org.apache.mahout.math.scalabindings.RLikeOps._
 import org.apache.mahout.math.{Matrix, Vector}
 
-
+/**
+ * CanopyClusteringModel extends ClusteringModel and stores the canopy centers and distance metric information.
+ *
+ * @param canopies The matrix storing the canopy centers.
+ * @param dm The symbol indicating the distance metric used for calculating distances.
+ *
+ * @constructor Creates a new instance of the CanopyClusteringModel.
+ *
+ * @property canopyCenters The matrix storing the canopy centers.
+ * @property distanceMetric The symbol indicating the distance metric used for calculating distances.
+ */
 class CanopyClusteringModel(canopies: Matrix, dm: Symbol) extends ClusteringModel {
 
   val canopyCenters = canopies
   val distanceMetric = dm
 
+ /**
+   * Assigns the input data points to their nearest canopy center.
+   *
+   * @param input The input data points to be assigned to canopies.
+   * @return The data points assigned to their nearest canopy centers.
+   */
   def cluster[K](input: DrmLike[K]): DrmLike[K] = {
 
     implicit val ctx = input.context
@@ -67,7 +83,17 @@ class CanopyClusteringModel(canopies: Matrix, dm: Symbol) extends ClusteringMode
   }
 }
 
-
+/**
+ * CanopyClustering extends ClusteringFitter and implements the fitting process for the Canopy Clustering algorithm.
+ *
+ * @constructor Creates a new instance of the CanopyClustering.
+ *
+ * @property t1 The loose distance used in the canopy clustering algorithm.
+ * @property t2 The tight distance used in the canopy clustering algorithm.
+ * @property t3 The loose distance used in merging canopy clusters.
+ * @property t4 The tight distance used in merging canopy clusters.
+ * @property distanceMeasure The symbol indicating the distance metric used for calculating distances.
+ */
 class CanopyClustering extends ClusteringFitter {
 
   var t1: Double = _  // loose distance
@@ -76,6 +102,11 @@ class CanopyClustering extends ClusteringFitter {
   var t4: Double = _
   var distanceMeasure: Symbol = _
 
+ /**
+   * Sets the standard hyperparameters for the Canopy Clustering algorithm.
+   *
+   * @param hyperparameters The hyperparameters to be set for the algorithm.
+   */
   def setStandardHyperparameters(hyperparameters: Map[Symbol, Any] = Map('foo -> None)): Unit = {
     t1 = hyperparameters.asInstanceOf[Map[Symbol, Double]].getOrElse('t1, 0.5)
     t2 = hyperparameters.asInstanceOf[Map[Symbol, Double]].getOrElse('t2, 0.1)
@@ -86,6 +117,13 @@ class CanopyClustering extends ClusteringFitter {
 
   }
 
+ /**
+   * Fits the Canopy Clustering algorithm to the input data.
+   *
+   * @param input The input data to be fit to the algorithm.
+   * @param hyperparameters The hyperparameters for the algorithm.
+   * @return The CanopyClusteringModel with the fitted results.
+   */
   def fit[K](input: DrmLike[K],
              hyperparameters: (Symbol, Any)*): CanopyClusteringModel = {
 
@@ -124,7 +162,21 @@ class CanopyClustering extends ClusteringFitter {
 
 }
 
+/**
+ * CanopyFn implements functions used in the Canopy Clustering algorithm.
+ */
 object CanopyFn extends Serializable {
+ 
+   /**
+    * findCenters method takes in a Matrix, a DistanceMetric and t1 and t2 parameters. 
+    * It returns a Matrix with the centers found.
+    *
+    * @param block The input matrix for which centers need to be found
+    * @param distanceMeasure The distance metric to be used for calculating the distance between vectors
+    * @param t1 The t1 parameter used in the Canopy algorithm
+    * @param t2 The t2 parameter used in the Canopy algorithm
+    * @return A matrix with the found centers
+    */
   def findCenters(block: Matrix, distanceMeasure: DistanceMetric, t1: Double, t2: Double): Matrix = {
     var rowAssignedToCanopy = Array.fill(block.nrow) { false }
     val clusterBuf = scala.collection.mutable.ListBuffer.empty[org.apache.mahout.math.Vector]
@@ -152,4 +204,4 @@ object CanopyFn extends Serializable {
     }
     dense(clusterBuf)
   }
-}
\ No newline at end of file
+}