You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/11/09 14:19:28 UTC
svn commit: r1032979 [2/2] - in
/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/impl/common/
cf/taste/impl/model/ classifier/ classifier/bayes/
classifier/bayes/algorithm/ classifier/bayes/interfaces/
classifier/naivebayes/ classifier/naiv...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Tue Nov 9 13:19:26 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.fpm.pfpgrowth.
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@@ -28,7 +29,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.Set;
import org.apache.commons.lang.mutable.MutableLong;
import org.apache.hadoop.conf.Configuration;
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.mahout.common.Pair;
import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater;
@@ -63,7 +64,7 @@ public class FPGrowth<A extends Comparab
Path path) throws IOException {
List<Pair<String,TopKStringPatterns>> ret = new ArrayList<Pair<String,TopKStringPatterns>>();
- Text key = new Text();
+ Writable key = new Text();
TopKStringPatterns value = new TopKStringPatterns();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
// key is feature value is count
@@ -146,10 +147,10 @@ public class FPGrowth<A extends Comparab
* @throws IOException
*/
public final void generateTopKFrequentPatterns(Iterator<Pair<List<A>,Long>> transactionStream,
- List<Pair<A,Long>> frequencyList,
+ Collection<Pair<A, Long>> frequencyList,
long minSupport,
int k,
- Set<A> returnableFeatures,
+ Collection<A> returnableFeatures,
OutputCollector<A,List<Pair<List<A>,Long>>> output,
StatusUpdater updater) throws IOException {
@@ -178,7 +179,7 @@ public class FPGrowth<A extends Comparab
log.info("Number of unique items {}", frequencyList.size());
- Set<Integer> returnFeatures = new HashSet<Integer>();
+ Collection<Integer> returnFeatures = new HashSet<Integer>();
if (returnableFeatures != null && !returnableFeatures.isEmpty()) {
for (A attrib : returnableFeatures) {
if (attributeIdMapping.containsKey(attrib)) {
@@ -206,7 +207,7 @@ public class FPGrowth<A extends Comparab
*
* @param tree
* to be mined
- * @param minSupportMutable
+ * @param minSupportValue
* minimum support of the pattern to keep
* @param k
* Number of top frequent patterns to keep
@@ -218,14 +219,12 @@ public class FPGrowth<A extends Comparab
* @return Top K Frequent Patterns for each feature and their support
*/
private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
- MutableLong minSupportMutable,
+ long minSupportValue,
int k,
- Set<Integer> requiredFeatures,
+ Collection<Integer> requiredFeatures,
TopKPatternsOutputConverter<A> outputCollector,
StatusUpdater updater) throws IOException {
- long minSupportValue = minSupportMutable.longValue();
-
Map<Integer,FrequentPatternMaxHeap> patterns = new HashMap<Integer,FrequentPatternMaxHeap>();
FPTreeDepthCache treeCache = new FPTreeDepthCache();
for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
@@ -250,9 +249,8 @@ public class FPGrowth<A extends Comparab
private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree,
int k,
- MutableLong minSupportMutable) {
- FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
- false);
+ long minSupport) {
+ FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false);
int tempNode = FPTree.ROOTNODEID;
Pattern frequentItem = new Pattern();
@@ -262,7 +260,7 @@ public class FPGrowth<A extends Comparab
tempNode);
}
tempNode = tree.childAtIndex(tempNode, 0);
- if (tree.count(tempNode) >= minSupportMutable.intValue()) {
+ if (tree.count(tempNode) >= minSupport) {
frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
}
}
@@ -296,8 +294,11 @@ public class FPGrowth<A extends Comparab
*/
private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
Iterator<Pair<int[],Long>> transactions,
- long[] attributeFrequency, long minSupport, int k, int featureSetSize,
- Set<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
+ long[] attributeFrequency,
+ long minSupport,
+ int k,
+ int featureSetSize,
+ Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
StatusUpdater updater) throws IOException {
FPTree tree = new FPTree(featureSetSize);
@@ -306,7 +307,6 @@ public class FPGrowth<A extends Comparab
}
// Constructing initial FPTree from the list of transactions
- MutableLong minSupportMutable = new MutableLong(minSupport);
int nodecount = 0;
// int attribcount = 0;
int i = 0;
@@ -314,8 +314,7 @@ public class FPGrowth<A extends Comparab
Pair<int[],Long> transaction = transactions.next();
Arrays.sort(transaction.getFirst());
// attribcount += transaction.length;
- nodecount += treeAddCount(tree, transaction.getFirst(), transaction
- .getSecond(), minSupportMutable, attributeFrequency);
+ nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(), minSupport, attributeFrequency);
i++;
if (i % 10000 == 0) {
log.info("FPTree Building: Read {} Transactions", i);
@@ -324,8 +323,7 @@ public class FPGrowth<A extends Comparab
log.info("Number of Nodes in the FP Tree: {}", nodecount);
- return fpGrowth(tree, minSupportMutable, k, returnFeatures,
- topKPatternsOutputCollector, updater);
+ return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
}
private static FrequentPatternMaxHeap growth(FPTree tree,
@@ -350,7 +348,7 @@ public class FPGrowth<A extends Comparab
while (i < headerTableCount) {
int attribute = tree.getAttributeAtIndex(i);
long count = tree.getHeaderSupportCount(attribute);
- if (count < minSupportMutable.intValue()) {
+ if (count < minSupportMutable.longValue()) {
i++;
continue;
}
@@ -358,7 +356,7 @@ public class FPGrowth<A extends Comparab
FPTree conditionalTree = treeCache.getFirstLevelTree(attribute);
if (conditionalTree.isEmpty()) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
// printTree(conditionalTree);
}
@@ -377,7 +375,7 @@ public class FPGrowth<A extends Comparab
frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
attribute, count, false);
}
- if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
+ if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) {
minSupportMutable.setValue(frequentPatterns.leastSupport());
}
i++;
@@ -413,7 +411,7 @@ public class FPGrowth<A extends Comparab
}
if (tree.singlePath()) {
- return generateSinglePathPatterns(tree, k, minSupportMutable);
+ return generateSinglePathPatterns(tree, k, minSupportMutable.longValue());
}
updater.update("Bottom Up FP Growth");
@@ -428,7 +426,7 @@ public class FPGrowth<A extends Comparab
FrequentPatternMaxHeap returnedPatterns;
if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, true, currentAttribute, updater);
@@ -437,7 +435,7 @@ public class FPGrowth<A extends Comparab
} else {
if (attribute == currentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, true, currentAttribute, updater);
@@ -445,7 +443,7 @@ public class FPGrowth<A extends Comparab
attribute, count, true);
} else if (attribute > currentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, false, currentAttribute, updater);
frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -453,7 +451,7 @@ public class FPGrowth<A extends Comparab
}
}
- if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
+ if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) {
minSupportMutable.setValue(frequentPatterns.leastSupport());
}
}
@@ -481,14 +479,14 @@ public class FPGrowth<A extends Comparab
} else {
int attribute = tree.getAttributeAtIndex(index);
long count = tree.getHeaderSupportCount(attribute);
- if (count < minSupportMutable.intValue()) {
+ if (count < minSupportMutable.longValue()) {
return frequentPatterns;
}
}
}
if (tree.singlePath()) {
- return generateSinglePathPatterns(tree, k, minSupportMutable);
+ return generateSinglePathPatterns(tree, k, minSupportMutable.longValue());
}
updater.update("Top Down Growth:");
@@ -505,7 +503,7 @@ public class FPGrowth<A extends Comparab
FrequentPatternMaxHeap returnedPatterns;
if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, true, currentAttribute, updater);
@@ -515,7 +513,7 @@ public class FPGrowth<A extends Comparab
} else {
if (attribute == currentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, true, currentAttribute, updater);
frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -523,7 +521,7 @@ public class FPGrowth<A extends Comparab
} else if (attribute > currentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
- minSupportMutable, conditionalTree, tree);
+ minSupportMutable.longValue(), conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
k, treeCache, level + 1, false, currentAttribute, updater);
frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -531,7 +529,7 @@ public class FPGrowth<A extends Comparab
}
}
- if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
+ if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) {
minSupportMutable.setValue(frequentPatterns.leastSupport());
}
}
@@ -555,7 +553,7 @@ public class FPGrowth<A extends Comparab
}
private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
- MutableLong minSupportMutable,
+ long minSupport,
FPTree conditionalTree,
FPTree tree) {
@@ -569,8 +567,7 @@ public class FPGrowth<A extends Comparab
while (pathNode != 0) { // dummy root node
int attribute = tree.attribute(pathNode);
- if (tree.getHeaderSupportCount(attribute) < minSupportMutable
- .intValue()) {
+ if (tree.getHeaderSupportCount(attribute) < minSupport) {
pathNode = tree.parent(pathNode);
continue;
}
@@ -612,16 +609,15 @@ public class FPGrowth<A extends Comparab
tree.clearConditional();
conditionalTree.reorderHeaderTable();
- pruneFPTree(minSupportMutable, conditionalTree);
+ pruneFPTree(minSupport, conditionalTree);
// prune Conditional Tree
}
- private static void pruneFPTree(MutableLong minSupportMutable, FPTree tree) {
+ private static void pruneFPTree(long minSupport, FPTree tree) {
for (int i = 0; i < tree.getHeaderTableCount(); i++) {
int currentAttribute = tree.getAttributeAtIndex(i);
- if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable
- .intValue()) {
+ if (tree.getHeaderSupportCount(currentAttribute) < minSupport) {
int nextNode = tree.getHeaderNext(currentAttribute);
tree.removeHeaderNext(currentAttribute);
while (nextNode != -1) {
@@ -650,9 +646,7 @@ public class FPGrowth<A extends Comparab
int parent = tree.parent(nextNode);
- if (!prevNode.containsKey(parent)) {
- prevNode.put(parent, nextNode);
- } else {
+ if (prevNode.containsKey(parent)) {
int prevNodeId = prevNode.get(parent);
if (tree.childCount(prevNodeId) <= 1 && tree.childCount(nextNode) <= 1) {
tree.addCount(prevNodeId, tree.count(nextNode));
@@ -662,6 +656,8 @@ public class FPGrowth<A extends Comparab
}
tree.setNext(justPrevNode, tree.next(nextNode));
}
+ } else {
+ prevNode.put(parent, nextNode);
}
justPrevNode = nextNode;
nextNode = tree.next(nextNode);
@@ -692,7 +688,7 @@ public class FPGrowth<A extends Comparab
private static int treeAddCount(FPTree tree,
int[] myList,
long addCount,
- Number minSupport,
+ long minSupport,
long[] attributeFrequency) {
int temp = FPTree.ROOTNODEID;
@@ -700,7 +696,7 @@ public class FPGrowth<A extends Comparab
boolean addCountMode = true;
for (int attribute : myList) {
- if (attributeFrequency[attribute] < minSupport.intValue()) {
+ if (attributeFrequency[attribute] < minSupport) {
return ret;
}
int child;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Tue Nov 9 13:19:26 2010
@@ -18,7 +18,7 @@
package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
import java.util.Arrays;
-import java.util.Set;
+import java.util.Collection;
import java.util.TreeSet;
/**
@@ -72,7 +72,7 @@ public class FPTree {
private boolean singlePath;
- private final Set<Integer> sortedSet = new TreeSet<Integer>();
+ private final Collection<Integer> sortedSet = new TreeSet<Integer>();
public FPTree() {
this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Tue Nov 9 13:19:26 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.ga.watchmaker;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -54,8 +55,8 @@ public final class MahoutEvaluator {
* <code>List<Double></code> that contains the evaluated fitness for each candidate from the
* input population, sorted in the same order as the candidates.
*/
- public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population, List<Double> evaluations)
- throws IOException, ClassNotFoundException, InterruptedException {
+ public static void evaluate(FitnessEvaluator<?> evaluator, Iterable<?> population, Collection<Double> evaluations)
+ throws IOException, ClassNotFoundException, InterruptedException {
Job job = new Job();
job.setJarByClass(MahoutEvaluator.class);
Configuration conf = job.getConfiguration();
@@ -78,7 +79,7 @@ public final class MahoutEvaluator {
* population to store
* @return input <code>Path</code>
*/
- private static Path prepareInput(FileSystem fs, List<?> population) throws IOException {
+ private static Path prepareInput(FileSystem fs, Iterable<?> population) throws IOException {
Path inpath = new Path(fs.getWorkingDirectory(), "input");
HadoopUtil.overwriteOutput(inpath);
storePopulation(fs, new Path(inpath, "population"), population);
@@ -122,7 +123,7 @@ public final class MahoutEvaluator {
* @param population
* population to store
*/
- static void storePopulation(FileSystem fs, Path f, List<?> population) throws IOException {
+ static void storePopulation(FileSystem fs, Path f, Iterable<?> population) throws IOException {
FSDataOutputStream out = fs.create(f);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Tue Nov 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.ga.watchmaker;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.DoubleWritab
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Sorter;
+import org.apache.hadoop.io.Writable;
/** Utility Class that deals with the output. */
public final class OutputUtils {
@@ -48,7 +49,7 @@ public final class OutputUtils {
*/
public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException {
FileStatus[] status = fs.listStatus(outpath);
- List<Path> outpaths = new ArrayList<Path>();
+ Collection<Path> outpaths = new ArrayList<Path>();
for (FileStatus s : status) {
if (!s.isDir()) {
outpaths.add(s.getPath());
@@ -72,7 +73,7 @@ public final class OutputUtils {
public static void importEvaluations(FileSystem fs,
Configuration conf,
Path outpath,
- List<Double> evaluations) throws IOException {
+ Collection<Double> evaluations) throws IOException {
Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf);
// merge and sort the outputs
@@ -81,7 +82,7 @@ public final class OutputUtils {
sorter.merge(outfiles, output);
// import the evaluations
- LongWritable key = new LongWritable();
+ Writable key = new LongWritable();
DoubleWritable value = new DoubleWritable();
Reader reader = new Reader(fs, output, conf);
try {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Tue Nov 9 13:19:26 2010
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
@@ -104,7 +105,7 @@ public final class TimesSquaredJob {
Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now);
SequenceFile.Writer inputVectorPathWriter = new SequenceFile.Writer(fs,
conf, inputVectorPath, NullWritable.class, VectorWritable.class);
- VectorWritable inputVW = new VectorWritable(v);
+ Writable inputVW = new VectorWritable(v);
inputVectorPathWriter.append(NullWritable.get(), inputVW);
inputVectorPathWriter.close();
URI ivpURI = inputVectorPath.toUri();
@@ -158,9 +159,7 @@ public final class TimesSquaredJob {
Path inputVectorPath = new Path(localFiles[0].getPath());
FileSystem fs = inputVectorPath.getFileSystem(conf);
- SequenceFile.Reader reader = new SequenceFile.Reader(fs,
- inputVectorPath,
- conf);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputVectorPath, conf);
VectorWritable val = new VectorWritable();
NullWritable nw = NullWritable.get();
reader.next(nw, val);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Tue Nov 9 13:19:26 2010
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@@ -194,7 +195,7 @@ public class DistributedLanczosSolver ex
IntWritable iw = new IntWritable();
for (int i = 0; i < eigenVectors.numRows() - 1; i++) {
Vector v = eigenVectors.getRow(i);
- VectorWritable vw = new VectorWritable(v);
+ Writable vw = new VectorWritable(v);
iw.set(i);
seqWriter.append(iw, vw);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Tue Nov 9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.math.hadoop.de
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@@ -30,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
@@ -82,7 +84,7 @@ public class EigenVerificationJob extend
private double minEigenValue;
- private boolean loadEigensInMemory;
+ //private boolean loadEigensInMemory;
private Path tmpOut;
@@ -125,7 +127,6 @@ public class EigenVerificationJob extend
* @param minEigenValue a double representing the minimum eigenvalue
* @param inMemory a boolean requesting in-memory preparation
* @param config the JobConf to use, or null if a default is ok (saves referencing JobConf in calling classes unless needed)
- * @throws IOException
*/
public int run(Path corpusInput,
Path eigenInput,
@@ -182,7 +183,8 @@ public class EigenVerificationJob extend
return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify);
}
- private void saveCleanEigens(Configuration conf, List<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta) throws IOException {
+ private void saveCleanEigens(Configuration conf, Collection<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta)
+ throws IOException {
Path path = new Path(outPath, CLEAN_EIGENVECTORS);
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
@@ -191,9 +193,12 @@ public class EigenVerificationJob extend
for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) {
MatrixSlice s = pruneSlice.getKey();
EigenStatus meta = pruneSlice.getValue();
- EigenVector ev = new EigenVector((DenseVector) s.vector(), meta.getEigenValue(), Math.abs(1 - meta.getCosAngle()), s.index());
+ EigenVector ev = new EigenVector((DenseVector) s.vector(),
+ meta.getEigenValue(),
+ Math.abs(1 - meta.getCosAngle()),
+ s.index());
log.info("appending {} to {}", ev, path);
- VectorWritable vw = new VectorWritable(ev);
+ Writable vw = new VectorWritable(ev);
iw.set(s.index());
seqWriter.append(iw, vw);
@@ -264,14 +269,8 @@ public class EigenVerificationJob extend
/**
* Progammatic invocation of run()
- * @param conf TODO
* @param eigenInput Output of LanczosSolver
* @param corpusInput Input of LanczosSolver
- * @param output
- * @param inMemory
- * @param maxError
- * @param minEigenValue
- * @param maxEigens
*/
public void runJob(Configuration conf,
Path eigenInput,
@@ -279,7 +278,8 @@ public class EigenVerificationJob extend
Path output,
boolean inMemory,
double maxError,
- double minEigenValue, int maxEigens) throws IOException {
+ double minEigenValue,
+ int maxEigens) throws IOException {
// no need to handle command line arguments
outPath = output;
tmpOut = new Path(outPath, "tmp");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Tue Nov 9 13:19:26 2010
@@ -32,8 +32,6 @@ import org.apache.mahout.common.CommandL
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.vectorizer.collocations.llr.LLRReducer;
import org.apache.mahout.vectorizer.common.PartialVectorMerger;
-import org.apache.mahout.vectorizer.DictionaryVectorizer;
-import org.apache.mahout.vectorizer.DocumentProcessor;
import org.apache.mahout.vectorizer.tfidf.TFIDFConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java Tue Nov 9 13:19:26 2010
@@ -18,7 +18,7 @@
package org.apache.mahout.vectorizer;
/**
- * {@link org.apache.mahout.utils.vectors.Weight} based on term frequency only
+ * {@link Weight} based on term frequency only
*/
public class TF implements Weight {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Tue Nov 9 13:19:26 2010
@@ -160,7 +160,7 @@ public final class CollocDriver extends
}
/**
- * Generate all ngrams for the {@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer} job
+ * Generate all ngrams for the {@link org.apache.mahout.vectorizer.DictionaryVectorizer} job
*
* @param input
* input path containing tokenized documents
@@ -200,7 +200,7 @@ public final class CollocDriver extends
int maxNGramSize,
int reduceTasks,
int minSupport)
- throws IOException, ClassNotFoundException, InterruptedException {
+ throws IOException, ClassNotFoundException, InterruptedException {
Configuration con = new Configuration(baseConf);
con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
@@ -247,7 +247,7 @@ public final class CollocDriver extends
boolean emitUnigrams,
float minLLRValue,
int reduceTasks)
- throws IOException, InterruptedException, ClassNotFoundException {
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration(baseConf);
conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java Tue Nov 9 13:19:26 2010
@@ -48,11 +48,6 @@ public class AdaptiveWordValueEncoder ex
}
@Override
- protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
- return super.hashForProbe(originalForm, dataSize, name, probe);
- }
-
- @Override
protected double getWeight(byte[] originalForm, double w) {
return w * weight(originalForm);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java Tue Nov 9 13:19:26 2010
@@ -49,11 +49,12 @@ public class CachingContinuousValueEncod
}
protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
- Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]");
+ Preconditions.checkArgument(dataSize == this.dataSize,
+ "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
if (caches[probe].containsKey(originalForm.hashCode())) {
return caches[probe].get(originalForm.hashCode());
}
- int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe);
+ int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe);
caches[probe].put(originalForm.hashCode(), hash);
return hash;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java Tue Nov 9 13:19:26 2010
@@ -50,11 +50,12 @@ public class CachingStaticWordValueEncod
}
protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
- Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]");
+ Preconditions.checkArgument(dataSize == this.dataSize,
+ "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
if (caches[probe].containsKey(originalForm.hashCode())) {
return caches[probe].get(originalForm.hashCode());
}
- int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe);
+ int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe);
caches[probe].put(originalForm.hashCode(), hash);
return hash;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java Tue Nov 9 13:19:26 2010
@@ -24,7 +24,7 @@ package org.apache.mahout.vectorizer.enc
public abstract class CachingValueEncoder extends FeatureVectorEncoder {
private int[] cachedProbes;
- public CachingValueEncoder(String name, int seed) {
+ protected CachingValueEncoder(String name, int seed) {
super(name);
cacheProbeLocations(seed);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java Tue Nov 9 13:19:26 2010
@@ -45,7 +45,7 @@ public class Dictionary {
return dict.size();
}
- public static Dictionary fromList(List<String> values) {
+ public static Dictionary fromList(Iterable<String> values) {
Dictionary dict = new Dictionary();
for (String value : values) {
dict.intern(value);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java Tue Nov 9 13:19:26 2010
@@ -24,7 +24,7 @@ import com.google.common.collect.Multise
import org.apache.mahout.math.Vector;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
import java.util.regex.Pattern;
/**
@@ -99,7 +99,7 @@ public class TextValueEncoder extends Fe
@Override
protected Iterable<Integer> hashesForProbe(byte[] originalForm, int dataSize, String name, int probe){
- List<Integer> hashes = new ArrayList<Integer>();
+ Collection<Integer> hashes = new ArrayList<Integer>();
for (String word : tokenize(new String(originalForm, Charsets.UTF_8))){
hashes.add(hashForProbe(bytesForString(word), dataSize, name, probe));
}