You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2009/10/19 02:03:55 UTC
svn commit: r826561 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes:
algorithm/BayesAlgorithm.java algorithm/CBayesAlgorithm.java
common/ByScoreLabelResultComparator.java
common/ClassifierResultPriorityQueue.java
Author: robinanil
Date: Mon Oct 19 00:03:55 2009
New Revision: 826561
URL: http://svn.apache.org/viewvc?rev=826561&view=rev
Log:
MAHOUT-186 Removed ClassifierProrityQueue custom class replaced by PriorityQueue<ClassifierResult>
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
Removed:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ClassifierResultPriorityQueue.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=826561&r1=826560&r2=826561&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Mon Oct 19 00:03:55 2009
@@ -17,22 +17,24 @@
package org.apache.mahout.classifier.bayes.algorithm;
+import java.util.ArrayList;
import java.util.Collection;
-import java.util.Deque;
+import java.util.Collections;
import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
+import java.util.PriorityQueue;
-import org.apache.hadoop.util.PriorityQueue;
import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.common.ClassifierResultPriorityQueue;
+import org.apache.mahout.classifier.bayes.common.ByScoreLabelResultComparator;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
public class BayesAlgorithm implements Algorithm{
- private double alpha_i = 1.0;
+ private static final double alpha_i = 1.0;
+
@Override
public ClassifierResult classifyDocument(String[] document,
Datastore datastore, String defaultCategory)
@@ -57,24 +59,28 @@
Datastore datastore, String defaultCategory, int numResults)
throws InvalidDatastoreException {
Collection<String> categories = datastore.getKeys("labelWeight");
- PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
- ClassifierResult tmp;
+ PriorityQueue<ClassifierResult> pq =
+ new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
for (String category : categories){
double prob = documentWeight(datastore, category, document);
if (prob > 0.0) {
- tmp = new ClassifierResult(category, prob);
- pq.insert(tmp);
+ pq.add(new ClassifierResult(category, prob));
+ if (pq.size() > numResults) {
+ pq.remove();
+ }
}
}
- Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
- while ((tmp = pq.pop()) != null) {
- result.addLast(tmp);
- }
- if (result.isEmpty()){
- result.add(new ClassifierResult(defaultCategory, 0));
+ if (pq.isEmpty()) {
+ return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+ } else {
+ List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
+ while (pq.isEmpty() == false) {
+ result.add(pq.remove());
+ }
+ Collections.reverse(result);
+ return result.toArray(new ClassifierResult[pq.size()]);
}
- return result.toArray(new ClassifierResult[result.size()]);
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=826561&r1=826560&r2=826561&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Mon Oct 19 00:03:55 2009
@@ -17,22 +17,23 @@
package org.apache.mahout.classifier.bayes.algorithm;
+import java.util.ArrayList;
import java.util.Collection;
-import java.util.Deque;
+import java.util.Collections;
import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
+import java.util.PriorityQueue;
-import org.apache.hadoop.util.PriorityQueue;
import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.common.ClassifierResultPriorityQueue;
+import org.apache.mahout.classifier.bayes.common.ByScoreLabelResultComparator;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
public class CBayesAlgorithm implements Algorithm {
- private double alpha_i = 1.0;
+ private static final double alpha_i = 1.0;
@Override
public ClassifierResult classifyDocument(String[] document,
@@ -58,25 +59,28 @@
Datastore datastore, String defaultCategory, int numResults)
throws InvalidDatastoreException {
Collection<String> categories = datastore.getKeys("labelWeight");
- PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(
- numResults);
- ClassifierResult tmp;
+ PriorityQueue<ClassifierResult> pq =
+ new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
for (String category : categories) {
double prob = documentWeight(datastore, category, document);
if (prob > 0.0) {
- tmp = new ClassifierResult(category, prob);
- pq.insert(tmp);
+ pq.add(new ClassifierResult(category, prob));
+ if (pq.size() > numResults) {
+ pq.remove();
+ }
}
}
- Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
- while ((tmp = pq.pop()) != null) {
- result.addLast(tmp);
- }
- if (result.isEmpty()) {
- result.add(new ClassifierResult(defaultCategory, 0));
+ if (pq.isEmpty()) {
+ return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+ } else {
+ List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
+ while (pq.isEmpty() == false) {
+ result.add(pq.remove());
+ }
+ Collections.reverse(result);
+ return result.toArray(new ClassifierResult[pq.size()]);
}
- return result.toArray(new ClassifierResult[result.size()]);
}
@Override
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java?rev=826561&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java Mon Oct 19 00:03:55 2009
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.bayes.common;
+
+import org.apache.mahout.classifier.ClassifierResult;
+
+import java.util.Comparator;
+
+public final class ByScoreLabelResultComparator implements Comparator<ClassifierResult> {
+
+ @Override
+ public int compare(ClassifierResult cr1, ClassifierResult cr2) {
+ double score1 = cr1.getScore();
+ double score2 = cr2.getScore();
+ if (score1 < score2) {
+ return 1;
+ } else if (score1 > score2) {
+ return -1;
+ } else {
+ return cr1.getLabel().compareTo(cr2.getLabel());
+ }
+ }
+
+}