You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2009/10/19 02:03:55 UTC

svn commit: r826561 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes: algorithm/BayesAlgorithm.java algorithm/CBayesAlgorithm.java common/ByScoreLabelResultComparator.java common/ClassifierResultPriorityQueue.java

Author: robinanil
Date: Mon Oct 19 00:03:55 2009
New Revision: 826561

URL: http://svn.apache.org/viewvc?rev=826561&view=rev
Log:
MAHOUT-186 Removed ClassifierProrityQueue custom class replaced by PriorityQueue<ClassifierResult> 

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
Removed:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ClassifierResultPriorityQueue.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=826561&r1=826560&r2=826561&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Mon Oct 19 00:03:55 2009
@@ -17,22 +17,24 @@
 
 package org.apache.mahout.classifier.bayes.algorithm;
 
+import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Deque;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
+import java.util.PriorityQueue;
 
-import org.apache.hadoop.util.PriorityQueue;
 import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.common.ClassifierResultPriorityQueue;
+import org.apache.mahout.classifier.bayes.common.ByScoreLabelResultComparator;
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
 
 public class BayesAlgorithm implements Algorithm{
 
-  private double alpha_i = 1.0;
+  private static final double alpha_i = 1.0;
+
   @Override
   public ClassifierResult classifyDocument(String[] document,
       Datastore datastore, String defaultCategory)
@@ -57,24 +59,28 @@
       Datastore datastore, String defaultCategory, int numResults)
       throws InvalidDatastoreException {
     Collection<String> categories = datastore.getKeys("labelWeight");    
-    PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
-    ClassifierResult tmp;
+    PriorityQueue<ClassifierResult> pq =
+        new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
     for (String category : categories){
       double prob = documentWeight(datastore, category, document);
       if (prob > 0.0) {
-        tmp = new ClassifierResult(category, prob);
-        pq.insert(tmp);
+        pq.add(new ClassifierResult(category, prob));
+        if (pq.size() > numResults) {
+          pq.remove();
+        }
       }
     }
 
-    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
-    while ((tmp = pq.pop()) != null) {
-      result.addLast(tmp);
-    }
-    if (result.isEmpty()){
-      result.add(new ClassifierResult(defaultCategory, 0));
+    if (pq.isEmpty()) {
+      return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+    } else {
+      List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
+      while (pq.isEmpty() == false) {
+        result.add(pq.remove());
+      }
+      Collections.reverse(result);
+      return result.toArray(new ClassifierResult[pq.size()]);
     }
-    return result.toArray(new ClassifierResult[result.size()]);  
   }
   
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=826561&r1=826560&r2=826561&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Mon Oct 19 00:03:55 2009
@@ -17,22 +17,23 @@
 
 package org.apache.mahout.classifier.bayes.algorithm;
 
+import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Deque;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
+import java.util.PriorityQueue;
 
-import org.apache.hadoop.util.PriorityQueue;
 import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.common.ClassifierResultPriorityQueue;
+import org.apache.mahout.classifier.bayes.common.ByScoreLabelResultComparator;
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
 
 public class CBayesAlgorithm implements Algorithm {
 
-  private double alpha_i = 1.0;
+  private static final double alpha_i = 1.0;
 
   @Override
   public ClassifierResult classifyDocument(String[] document,
@@ -58,25 +59,28 @@
       Datastore datastore, String defaultCategory, int numResults)
       throws InvalidDatastoreException {
     Collection<String> categories = datastore.getKeys("labelWeight");
-    PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(
-        numResults);
-    ClassifierResult tmp;
+    PriorityQueue<ClassifierResult> pq =
+        new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
     for (String category : categories) {
       double prob = documentWeight(datastore, category, document);
       if (prob > 0.0) {
-        tmp = new ClassifierResult(category, prob);
-        pq.insert(tmp);
+        pq.add(new ClassifierResult(category, prob));
+        if (pq.size() > numResults) {
+          pq.remove();
+        }
       }
     }
 
-    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
-    while ((tmp = pq.pop()) != null) {
-      result.addLast(tmp);
-    }
-    if (result.isEmpty()) {
-      result.add(new ClassifierResult(defaultCategory, 0));
+    if (pq.isEmpty()) {
+      return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+    } else {
+      List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
+      while (pq.isEmpty() == false) {
+        result.add(pq.remove());
+      }
+      Collections.reverse(result);
+      return result.toArray(new ClassifierResult[pq.size()]);
     }
-    return result.toArray(new ClassifierResult[result.size()]);
   }
 
   @Override

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java?rev=826561&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java Mon Oct 19 00:03:55 2009
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.bayes.common;
+
+import org.apache.mahout.classifier.ClassifierResult;
+
+import java.util.Comparator;
+
+public final class ByScoreLabelResultComparator implements Comparator<ClassifierResult> {
+
+  @Override
+  public int compare(ClassifierResult cr1, ClassifierResult cr2) {
+    double score1 = cr1.getScore();
+    double score2 = cr2.getScore();
+    if (score1 < score2) {
+      return 1;
+    } else if (score1 > score2) {
+      return -1;
+    } else {
+      return cr1.getLabel().compareTo(cr2.getLabel());
+    }
+  }
+
+}