You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/07/01 23:08:27 UTC
svn commit: r959784 [2/2] - in /mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/pseudo/
main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
main/java/org/apache/mahout/cf/taste/im...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java Thu Jul 1 21:08:24 2010
@@ -73,11 +73,7 @@ public class LFUCache<K,V> implements Ca
public V quickGet(K key) {
Pair<V,MutableLong> data = dataMap.get(key);
- if (data == null) {
- return null;
- } else {
- return data.getFirst();
- }
+ return data == null ? null : data.getFirst();
}
private void incrementHit(K key, long count) {
@@ -85,7 +81,7 @@ public class LFUCache<K,V> implements Ca
if (keys == null) {
throw new ConcurrentModificationException();
}
- if (keys.remove(key) == false) {
+ if (!keys.remove(key)) {
throw new ConcurrentModificationException();
}
if (keys.isEmpty()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java Thu Jul 1 21:08:24 2010
@@ -58,7 +58,7 @@ public class LeastKCache<K extends Compa
@Override
public final void set(K key, V value) {
- if (contains(key) == false) {
+ if (!contains(key)) {
queue.add(key);
}
cache.put(key, value);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/Parametered.java Thu Jul 1 21:08:24 2010
@@ -39,14 +39,14 @@ public interface Parametered {
* ends with a dot if not empty.
* @param jobConf
* configuration used for retreiving values
- * @see ParameteredGeneralizations#configureParameters(String,Parametered,org.apache.hadoop.conf.Configuration)
+ * @see ParameteredGeneralizations#configureParameters(String,Parametered,Configuration)
* invoking method
- * @see ParameteredGeneralizations#configureParametersRecusivly(Parametered,String,org.apache.hadoop.conf.Configuration)
+ * @see ParameteredGeneralizations#configureParametersRecusivly(Parametered,String,Configuration)
* invoking method
*/
void createParameters(String prefix, Configuration jobConf);
- public void configure(Configuration config);
+ void configure(Configuration config);
/** "multiple inheritance" */
final class ParameteredGeneralizations {
@@ -60,10 +60,10 @@ public interface Parametered {
/**
* Calls
- * {@link org.apache.mahout.common.parameters.Parametered#createParameters(String,org.apache.hadoop.conf.Configuration)}
+ * {@link Parametered#createParameters(String,org.apache.hadoop.conf.Configuration)}
* on parameter parmetered, and then recurse down its composite tree to invoke
- * {@link org.apache.mahout.common.parameters.Parametered#createParameters(String,org.apache.hadoop.conf.Configuration)}
- * and {@link org.apache.hadoop.conf.Configurationigurable#configure(org.apache.hadoop.conf.Configuration)} on
+ * {@link Parametered#createParameters(String,org.apache.hadoop.conf.Configuration)}
+ * and {@link Parametered#configure(org.apache.hadoop.conf.Configuration)} on
* each composite part.
*
* @param prefix
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java Thu Jul 1 21:08:24 2010
@@ -69,7 +69,7 @@ public class Bagging {
if (callback != null) {
log.debug("Oob error estimation");
for (int index = 0; index < data.size(); index++) {
- if (sampled[index] == false) {
+ if (!sampled[index]) {
int prediction = tree.classify(data.get(index));
callback.prediction(treeId, index, prediction);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/DFUtils.java Thu Jul 1 21:08:24 2010
@@ -37,10 +37,6 @@ public final class DFUtils {
/**
* Writes an Node[] into a DataOutput
- *
- * @param out
- * @param array
- * @throws IOException
*/
public static void writeArray(DataOutput out, Node[] array) throws IOException {
out.writeInt(array.length);
@@ -51,10 +47,6 @@ public final class DFUtils {
/**
* Reads a Node[] from a DataInput
- *
- * @param in
- * @return
- * @throws IOException
*/
public static Node[] readNodeArray(DataInput in) throws IOException {
int length = in.readInt();
@@ -68,10 +60,6 @@ public final class DFUtils {
/**
* Writes a double[] into a DataOutput
- *
- * @param out
- * @param array
- * @throws IOException
*/
public static void writeArray(DataOutput out, double[] array) throws IOException {
out.writeInt(array.length);
@@ -82,10 +70,6 @@ public final class DFUtils {
/**
* Reads a double[] from a DataInput
- *
- * @param in
- * @return
- * @throws IOException
*/
public static double[] readDoubleArray(DataInput in) throws IOException {
int length = in.readInt();
@@ -99,10 +83,6 @@ public final class DFUtils {
/**
* Writes an int[] into a DataOutput
- *
- * @param out
- * @param array
- * @throws IOException
*/
public static void writeArray(DataOutput out, int[] array) throws IOException {
out.writeInt(array.length);
@@ -113,10 +93,6 @@ public final class DFUtils {
/**
* Reads an int[] from a DataInput
- *
- * @param in
- * @return
- * @throws IOException
*/
public static int[] readIntArray(DataInput in) throws IOException {
int length = in.readInt();
@@ -130,13 +106,8 @@ public final class DFUtils {
/**
* Return a list of all files in the output directory
- *
- * @param fs
- * @param outputPath
- * @return
- * @throws IOException
- * @throws RuntimeException
- * if no file is found
+ *
+ * @throws IOException if no file is found
*/
public static Path[] listOutputFiles(FileSystem fs, Path outputPath) throws IOException {
Path[] outfiles = OutputUtils.listOutputFiles(fs, outputPath);
@@ -149,9 +120,6 @@ public final class DFUtils {
/**
* Formats a time interval in milliseconds to a String in the form "hours:minutes:seconds:millis"
- *
- * @param milli
- * @return
*/
public static String elapsedTime(long milli) {
long seconds = milli / 1000;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java Thu Jul 1 21:08:24 2010
@@ -60,9 +60,6 @@ public class DecisionForest implements W
/**
* Classifies the data and calls callback for each classification
- *
- * @param data
- * @param callback
*/
public void classify(Data data, PredictionCallback callback) {
if (callback == null) {
@@ -110,8 +107,6 @@ public class DecisionForest implements W
/**
* Mean number of nodes per tree
- *
- * @return
*/
public long meanNbNodes() {
long sum = 0;
@@ -125,8 +120,6 @@ public class DecisionForest implements W
/**
* Total number of nodes in all the trees
- *
- * @return
*/
public long nbNodes() {
long sum = 0;
@@ -140,8 +133,6 @@ public class DecisionForest implements W
/**
* Mean maximum depth per tree
- *
- * @return
*/
public long meanMaxDepth() {
long sum = 0;
@@ -182,8 +173,6 @@ public class DecisionForest implements W
/**
* Reads the trees from the input and adds them to the existing trees
- * @param dataInput
- * @throws IOException
*/
@Override
public void readFields(DataInput dataInput) throws IOException {
@@ -201,10 +190,6 @@ public class DecisionForest implements W
/**
* Load the forest from a single file or a directory of files
- * @param conf
- * @param forestPath
- * @return
- * @throws IOException
*/
public static DecisionForest load(Configuration conf, Path forestPath) throws IOException {
FileSystem fs = forestPath.getFileSystem(conf);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java Thu Jul 1 21:08:24 2010
@@ -198,10 +198,10 @@ public class DefaultTreeBuilder implemen
} while (selected[rind]);
result[index] = rind;
- selected[rind] = true; // temporarely set the choosen attribute to be selected
+ selected[rind] = true; // temporarily set the chosen attribute to be selected
}
- // the choosen attributes are not yet selected
+ // the chosen attributes are not yet selected
for (int attr : result) {
selected[attr] = false;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Thu Jul 1 21:08:24 2010
@@ -47,40 +47,32 @@ public class Data implements Cloneable {
}
/**
- * Returns the number of elements
- *
- * @return
+ * @return the number of elements
*/
public int size() {
return instances.size();
}
/**
- * Returns true is this data contains no element
- *
- * @return
+ * @return true if this data contains no element
*/
public boolean isEmpty() {
return instances.isEmpty();
}
/**
- * Returns true is this data contains the specified element.
- *
* @param v
* element whose presence in this list if to be searched
- * @return
+ * @return true is this data contains the specified element.
*/
public boolean contains(Instance v) {
return instances.contains(v);
}
/**
- * Returns the index of the first occurrence of the element in this data
- *
* @param v
* element to search for
- * @return -1 if the element is not found
+ * @return the index of the first occurrence of the element in this data or -1 if the element is not found
*/
public int indexof(Instance v) {
return instances.indexOf(v);
@@ -100,10 +92,7 @@ public class Data implements Cloneable {
}
/**
- * Returns the subset from this data that matches the given condition
- *
- * @param condition
- * @return
+ * @return the subset from this data that matches the given condition
*/
public Data subset(Condition condition) {
List<Instance> subset = new ArrayList<Instance>();
@@ -118,13 +107,11 @@ public class Data implements Cloneable {
}
/**
- * Returns a random subset without modifying the current data
- *
* @param rng
* Random number generator
* @param ratio
* [0,1]
- * @return
+ * @return a random subset without modifying the current data
*/
public Data rsubset(Random rng, double ratio) {
List<Instance> subset = new ArrayList<Instance>();
@@ -142,7 +129,6 @@ public class Data implements Cloneable {
* if data has N cases, sample N cases at random -but with replacement.
*
* @param rng
- * @return
*/
public Data bagging(Random rng) {
int datasize = size();
@@ -181,7 +167,6 @@ public class Data implements Cloneable {
* Splits the data in two, returns one part, and this gets the rest of the data. <b>VERY SLOW!</b>
*
* @param rng
- * @return
*/
public Data rsplit(Random rng, int subsize) {
List<Instance> subset = new ArrayList<Instance>(subsize);
@@ -218,8 +203,6 @@ public class Data implements Cloneable {
/**
* checks if all the vectors have identical label values
- *
- * @return
*/
public boolean identicalLabel() {
if (isEmpty()) {
@@ -240,7 +223,6 @@ public class Data implements Cloneable {
* finds all distinct values of a given attribute
*
* @param attr
- * @return
*/
public double[] values(int attr) {
Set<Double> result = new HashSet<Double>();
@@ -285,8 +267,6 @@ public class Data implements Cloneable {
/**
* extract the labels of all instances
- *
- * @return
*/
public int[] extractLabels() {
int[] labels = new int[size()];
@@ -306,7 +286,6 @@ public class Data implements Cloneable {
* file system
* @param path
* data path
- * @return
*/
public static int[] extractLabels(Dataset dataset, FileSystem fs, Path path) throws IOException {
FSDataInputStream input = fs.open(path);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java Thu Jul 1 21:08:24 2010
@@ -269,7 +269,7 @@ public final class DataLoader {
* @throws RuntimeException
* if no LABEL is found in the attributes description
*/
- protected static Data constructData(Attribute[] attrs, List<Instance> vectors, List<String>[] values) {
+ private static Data constructData(Attribute[] attrs, List<Instance> vectors, List<String>[] values) {
Dataset dataset = new Dataset(attrs, values, vectors.size());
return new Data(dataset, vectors);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/conditions/Condition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/conditions/Condition.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/conditions/Condition.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/conditions/Condition.java Thu Jul 1 21:08:24 2010
@@ -35,10 +35,6 @@ public abstract class Condition {
/**
* Condition that checks if the given attribute has a value "equal" to the given value
- *
- * @param attr
- * @param value
- * @return
*/
public static Condition equals(int attr, double value) {
return new Equals(attr, value);
@@ -46,10 +42,6 @@ public abstract class Condition {
/**
* Condition that checks if the given attribute has a value "lesser" than the given value
- *
- * @param attr
- * @param value
- * @return
*/
public static Condition lesser(int attr, double value) {
return new Lesser(attr, value);
@@ -57,10 +49,6 @@ public abstract class Condition {
/**
* Condition that checks if the given attribute has a value "greater or equal" than the given value
- *
- * @param attr
- * @param value
- * @return
*/
public static Condition greaterOrEquals(int attr, double value) {
return new GreaterOrEquals(attr, value);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java Thu Jul 1 21:08:24 2010
@@ -75,7 +75,6 @@ public class Classifier {
}
private void configureJob(Job job) throws IOException {
- Configuration conf = job.getConfiguration();
job.setJarByClass(Classifier.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java Thu Jul 1 21:08:24 2010
@@ -196,7 +196,7 @@ public class Step0Job {
* Outputs the first key and the size of the partition
*
*/
- protected static class Step0Mapper extends Mapper<LongWritable,Text,IntWritable,Step0Output> {
+ static class Step0Mapper extends Mapper<LongWritable,Text,IntWritable,Step0Output> {
private int partition;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Thu Jul 1 21:08:24 2010
@@ -254,7 +254,7 @@ public final class TransactionTree imple
items += p.getFirst().size();
count++;
for (Integer i : p.getFirst()) {
- if (frequencyList.containsKey(i) == false) {
+ if (!frequencyList.containsKey(i)) {
frequencyList.put(i, new MutableLong(0));
}
frequencyList.get(i).add(p.getSecond());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConverter.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConverter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConverter.java Thu Jul 1 21:08:24 2010
@@ -38,7 +38,7 @@ import org.apache.mahout.fpm.pfpgrowth.f
public final class TopKPatternsOutputConverter<A extends Comparable<? super A>> implements
OutputCollector<Integer,FrequentPatternMaxHeap> {
- private OutputCollector<A,List<Pair<List<A>,Long>>> collector;
+ private final OutputCollector<A,List<Pair<List<A>,Long>>> collector;
private final Map<Integer,A> reverseMapping;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Thu Jul 1 21:08:24 2010
@@ -92,13 +92,11 @@ public class FPGrowth<A extends Comparab
while (transactions.hasNext()) {
Pair<List<A>,Long> transaction = transactions.next();
for (A attribute : transaction.getFirst()) {
- if (attributeSupport.containsKey(attribute) == false) {
- attributeSupport.put(attribute, new MutableLong(transaction
- .getSecond()));
- } else {
- attributeSupport.get(attribute).add(
- transaction.getSecond().longValue());
+ if (attributeSupport.containsKey(attribute)) {
+ attributeSupport.get(attribute).add(transaction.getSecond().longValue());
// count++;
+ } else {
+ attributeSupport.put(attribute, new MutableLong(transaction.getSecond()));
}
}
}
@@ -159,11 +157,10 @@ public class FPGrowth<A extends Comparab
for (Pair<A,Long> feature : frequencyList) {
A attrib = feature.getFirst();
Long frequency = feature.getSecond();
- if (frequency < minSupport) {
- continue;
+ if (frequency >= minSupport) {
+ attributeIdMapping.put(attrib, id);
+ reverseMapping.put(id++, attrib);
}
- attributeIdMapping.put(attrib, id);
- reverseMapping.put(id++, attrib);
}
long[] attributeFrequency = new long[attributeIdMapping.size()];
@@ -218,11 +215,11 @@ public class FPGrowth<A extends Comparab
* @return Top K Frequent Patterns for each feature and their support
*/
private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
- MutableLong minSupportMutable,
- int k,
- Set<Integer> requiredFeatures,
- TopKPatternsOutputConverter<A> outputCollector,
- StatusUpdater updater) throws IOException {
+ MutableLong minSupportMutable,
+ int k,
+ Set<Integer> requiredFeatures,
+ TopKPatternsOutputConverter<A> outputCollector,
+ StatusUpdater updater) throws IOException {
long minSupportValue = minSupportMutable.longValue();
@@ -230,19 +227,18 @@ public class FPGrowth<A extends Comparab
FPTreeDepthCache treeCache = new FPTreeDepthCache();
for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
int attribute = tree.getAttributeAtIndex(i);
- if (requiredFeatures.contains(attribute) == false) {
- continue;
+ if (requiredFeatures.contains(attribute)) {
+ log.info("Mining FTree Tree for all patterns with {}", attribute);
+ MutableLong minSupport = new MutableLong(minSupportValue);
+ FrequentPatternMaxHeap frequentPatterns = growth(tree, minSupport, k,
+ treeCache, 0, attribute, updater);
+ patterns.put(attribute, frequentPatterns);
+ outputCollector.collect(attribute, frequentPatterns);
+
+ minSupportValue = Math.max(minSupportValue, minSupport.longValue() / 2);
+ log.info("Found {} Patterns with Least Support {}", patterns.get(
+ attribute).count(), patterns.get(attribute).leastSupport());
}
- log.info("Mining FTree Tree for all patterns with {}", attribute);
- MutableLong minSupport = new MutableLong(minSupportValue);
- FrequentPatternMaxHeap frequentPatterns = growth(tree, minSupport, k,
- treeCache, 0, attribute, updater);
- patterns.put(attribute, frequentPatterns);
- outputCollector.collect(attribute, frequentPatterns);
-
- minSupportValue = Math.max(minSupportValue, minSupport.longValue() / 2);
- log.info("Found {} Patterns with Least Support {}", patterns.get(
- attribute).count(), patterns.get(attribute).leastSupport());
}
log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}",
treeCache.getHits(), treeCache.getMisses());
@@ -263,10 +259,9 @@ public class FPGrowth<A extends Comparab
tempNode);
}
tempNode = tree.childAtIndex(tempNode, 0);
- if (tree.count(tempNode) < minSupportMutable.intValue()) {
- continue;
+ if (tree.count(tempNode) >= minSupportMutable.intValue()) {
+ frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
}
- frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
}
if (frequentItem.length() > 0) {
frequentPatterns.insert(frequentItem);
@@ -402,7 +397,7 @@ public class FPGrowth<A extends Comparab
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
false);
- if (conditionalOfCurrentAttribute == false) {
+ if (!conditionalOfCurrentAttribute) {
int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
currentAttribute);
if (index < 0) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Thu Jul 1 21:08:24 2010
@@ -46,7 +46,7 @@ public final class FrequentPatternMaxHea
for (Pattern p : queue) {
Long index = p.support();
Set<Pattern> patternList;
- if (patternIndex.containsKey(index) == false) {
+ if (!patternIndex.containsKey(index)) {
patternList = new HashSet<Pattern>();
patternIndex.put(index, patternList);
}
@@ -133,10 +133,7 @@ public final class FrequentPatternMaxHea
}
private boolean addPattern(Pattern frequentPattern) {
- if (subPatternCheck == false) {
- queue.add(frequentPattern);
- return true;
- } else {
+ if (subPatternCheck) {
Long index = frequentPattern.support();
if (patternIndex.containsKey(index)) {
Set<Pattern> indexSet = patternIndex.get(index);
@@ -153,9 +150,7 @@ public final class FrequentPatternMaxHea
}
if (replace) {
indexSet.remove(replacablePattern);
- if (indexSet.contains(frequentPattern) == false
- && queue.add(frequentPattern)) {
-
+ if (!indexSet.contains(frequentPattern) && queue.add(frequentPattern)) {
indexSet.add(frequentPattern);
}
return false;
@@ -166,15 +161,18 @@ public final class FrequentPatternMaxHea
} else {
queue.add(frequentPattern);
Set<Pattern> patternList;
- if (patternIndex.containsKey(index) == false) {
+ if (!patternIndex.containsKey(index)) {
patternList = new HashSet<Pattern>();
patternIndex.put(index, patternList);
}
patternList = patternIndex.get(index);
patternList.add(frequentPattern);
-
+
return true;
}
+ } else {
+ queue.add(frequentPattern);
+ return true;
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Thu Jul 1 21:08:24 2010
@@ -129,11 +129,8 @@ public class Pattern implements Comparab
return false;
}
Pattern other = (Pattern) obj;
- if (length == other.length && support == other.support) {
- // expensive check done only if length and support matches
- return Arrays.equals(pattern, other.pattern);
- }
- return false;
+ // expensive check done only if length and support matches
+ return length == other.length && support == other.support && Arrays.equals(pattern, other.pattern);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Thu Jul 1 21:08:24 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.io.WritableComp
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.mahout.common.IOUtils;
import org.apache.mahout.math.CardinalityException;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.Vector;
@@ -134,33 +135,25 @@ public class DistributedRowMatrix implem
return numCols;
}
- public DistributedRowMatrix times(DistributedRowMatrix other) {
+ public DistributedRowMatrix times(DistributedRowMatrix other) throws IOException {
if (numRows != other.numRows()) {
throw new CardinalityException(numRows, other.numRows());
}
Path outPath = new Path(outputTmpBasePath.getParent(), "productWith");
JobConf conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(rowPath, other.rowPath, outPath, other.numCols);
- try {
- JobClient.runJob(conf);
- DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numRows, other.numCols());
- out.configure(conf);
- return out;
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
- }
+ JobClient.runJob(conf);
+ DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numRows, other.numCols());
+ out.configure(conf);
+ return out;
}
- public DistributedRowMatrix transpose() {
+ public DistributedRowMatrix transpose() throws IOException {
Path outputPath = new Path(rowPath.getParent(), "transpose-" + (System.nanoTime() & 0xFF));
- try {
- JobConf conf = TransposeJob.buildTransposeJobConf(rowPath, outputPath, numRows);
- JobClient.runJob(conf);
- DistributedRowMatrix m = new DistributedRowMatrix(outputPath, outputTmpPath, numCols, numRows);
- m.configure(this.conf);
- return m;
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
- }
+ JobConf conf = TransposeJob.buildTransposeJobConf(rowPath, outputPath, numRows);
+ JobClient.runJob(conf);
+ DistributedRowMatrix m = new DistributedRowMatrix(outputPath, outputTmpPath, numCols, numRows);
+ m.configure(this.conf);
+ return m;
}
@Override
@@ -174,7 +167,7 @@ public class DistributedRowMatrix implem
JobClient.runJob(conf);
return TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
} catch (IOException ioe) {
- throw new RuntimeException(ioe);
+ throw new IllegalStateException(ioe);
}
}
@@ -231,10 +224,7 @@ public class DistributedRowMatrix implem
throw new IllegalStateException(ioe);
} finally {
if (!hasNext) {
- try {
- reader.close();
- } catch (IOException ioe) {
- }
+ IOUtils.quietClose(reader);
}
}
return hasNext;
@@ -303,6 +293,20 @@ public class DistributedRowMatrix implem
}
@Override
+ public boolean equals(Object o) {
+ if (!(o instanceof MatrixEntryWritable)) {
+ return false;
+ }
+ MatrixEntryWritable other = (MatrixEntryWritable) o;
+ return row == other.row && col == other.col;
+ }
+
+ @Override
+ public int hashCode() {
+ return row + 31 * col;
+ }
+
+ @Override
public void write(DataOutput out) throws IOException {
out.writeInt(row);
out.writeInt(col);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java Thu Jul 1 21:08:24 2010
@@ -78,7 +78,7 @@ public class MatrixMultiplicationJob ext
addOption("inputPathB", "ib", "Path to the second input matrix");
Map<String, String> argMap = parseArguments(strings);
- if(argMap == null) {
+ if (argMap == null) {
return -1;
}
@@ -94,8 +94,8 @@ public class MatrixMultiplicationJob ext
a.configure(new JobConf(getConf()));
b.configure(new JobConf(getConf()));
- DistributedRowMatrix c = a.times(b);
-
+ //DistributedRowMatrix c = a.times(b);
+ a.times(b);
return 0;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Thu Jul 1 21:08:24 2010
@@ -143,8 +143,8 @@ public final class TimesSquaredJob {
public static class TimesSquaredMapper<T extends WritableComparable> extends MapReduceBase
implements Mapper<T,VectorWritable, NullWritable,VectorWritable> {
- protected Vector outputVector;
- protected OutputCollector<NullWritable,VectorWritable> out;
+ Vector outputVector;
+ OutputCollector<NullWritable,VectorWritable> out;
private Vector inputVector;
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/Cooccurrence.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/Cooccurrence.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/Cooccurrence.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/Cooccurrence.java Thu Jul 1 21:08:24 2010
@@ -34,11 +34,9 @@ public class Cooccurrence implements Wri
private double valueB;
public Cooccurrence() {
- super();
}
public Cooccurrence(int column, double valueA, double valueB) {
- super();
this.column = column;
this.valueA = valueA;
this.valueB = valueB;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Thu Jul 1 21:08:24 2010
@@ -92,8 +92,8 @@ public class RowSimilarityJob extends Ab
addOutputOption();
addOption("numberOfColumns", "r", "Number of columns in the input matrix");
addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate");
- addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " +
- DEFAULT_MAX_SIMILARITIES_PER_ROW + ")", String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
+ addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: "
+ + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
Map<String,String> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
@@ -138,7 +138,7 @@ public class RowSimilarityJob extends Ab
Cooccurrence.class,
SimilarityReducer.class,
SimilarityMatrixEntryKey.class,
- DistributedRowMatrix.MatrixEntryWritable.class,
+ MatrixEntryWritable.class,
SequenceFileOutputFormat.class);
Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
@@ -153,7 +153,7 @@ public class RowSimilarityJob extends Ab
SequenceFileInputFormat.class,
Mapper.class,
SimilarityMatrixEntryKey.class,
- DistributedRowMatrix.MatrixEntryWritable.class,
+ MatrixEntryWritable.class,
EntriesToVectorsReducer.class,
IntWritable.class,
VectorWritable.class,
@@ -189,7 +189,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
super.setup(ctx);
- similarity = RowSimilarityJob.instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
+ similarity = instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
}
@Override
@@ -264,7 +264,7 @@ public class RowSimilarityJob extends Ab
* computes the pairwise similarities
*/
public static class SimilarityReducer
- extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,DistributedRowMatrix.MatrixEntryWritable> {
+ extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey, MatrixEntryWritable> {
private DistributedVectorSimilarity similarity;
private int numberOfColumns;
@@ -272,7 +272,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
super.setup(ctx);
- similarity = RowSimilarityJob.instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
+ similarity = instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
numberOfColumns = ctx.getConfiguration().getInt(NUMBER_OF_COLUMNS, -1);
if (numberOfColumns < 1) {
throw new IllegalStateException("Number of columns was not correctly set!");
@@ -290,7 +290,7 @@ public class RowSimilarityJob extends Ab
if (!Double.isNaN(similarityValue)) {
SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
- DistributedRowMatrix.MatrixEntryWritable entry = new DistributedRowMatrix.MatrixEntryWritable();
+ MatrixEntryWritable entry = new MatrixEntryWritable();
entry.setVal(similarityValue);
entry.setRow(rowA);
@@ -312,7 +312,7 @@ public class RowSimilarityJob extends Ab
* collects all {@link MatrixEntryWritable} for each column and creates a {@link VectorWritable}
*/
public static class EntriesToVectorsReducer
- extends Reducer<SimilarityMatrixEntryKey,DistributedRowMatrix.MatrixEntryWritable,IntWritable,VectorWritable> {
+ extends Reducer<SimilarityMatrixEntryKey, MatrixEntryWritable,IntWritable,VectorWritable> {
private int maxSimilaritiesPerRow;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java Thu Jul 1 21:08:24 2010
@@ -44,11 +44,9 @@ public class SimilarityMatrixEntryKey im
}
public SimilarityMatrixEntryKey() {
- super();
}
public SimilarityMatrixEntryKey(int row, double value) {
- super();
this.row = row;
this.value = value;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java Thu Jul 1 21:08:24 2010
@@ -33,12 +33,10 @@ class WeightedOccurrence implements Writ
private double value;
private double weight;
- public WeightedOccurrence() {
- super();
+ WeightedOccurrence() {
}
- public WeightedOccurrence(int row, double value, double weight) {
- super();
+ WeightedOccurrence(int row, double value, double weight) {
this.row = row;
this.value = value;
this.weight = weight;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java Thu Jul 1 21:08:24 2010
@@ -24,11 +24,11 @@ import org.apache.hadoop.io.ArrayWritabl
*/
class WeightedOccurrenceArray extends ArrayWritable {
- public WeightedOccurrenceArray() {
+ WeightedOccurrenceArray() {
super(WeightedOccurrence.class);
}
- public WeightedOccurrenceArray(WeightedOccurrence[] weightedOccurrences) {
+ WeightedOccurrenceArray(WeightedOccurrence[] weightedOccurrences) {
super(WeightedOccurrence.class);
set(weightedOccurrences);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedRowPair.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedRowPair.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedRowPair.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedRowPair.java Thu Jul 1 21:08:24 2010
@@ -36,11 +36,9 @@ public class WeightedRowPair implements
private double weightB;
public WeightedRowPair() {
- super();
}
public WeightedRowPair(int rowA, int rowB, double weightA, double weightB) {
- super();
this.rowA = rowA;
this.rowB = rowB;
this.weightA = weightA;
@@ -99,7 +97,7 @@ public class WeightedRowPair implements
public boolean equals(Object other) {
if (other instanceof WeightedRowPair) {
WeightedRowPair otherPair = (WeightedRowPair) other;
- return (rowA == otherPair.rowA && rowB == otherPair.rowB);
+ return rowA == otherPair.rowA && rowB == otherPair.rowB;
}
return false;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/AbstractDistributedVectorSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/AbstractDistributedVectorSimilarity.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/AbstractDistributedVectorSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/AbstractDistributedVectorSimilarity.java Thu Jul 1 21:08:24 2010
@@ -46,9 +46,6 @@ public abstract class AbstractDistribute
/**
* computes the number of elements in the {@link Iterable}
- *
- * @param iterable
- * @return
*/
protected int countElements(Iterable<?> iterable) {
return countElements(iterable.iterator());
@@ -56,9 +53,6 @@ public abstract class AbstractDistribute
/**
* computes the number of elements in the {@link Iterator}
- *
- * @param iterator
- * @return
*/
protected int countElements(Iterator<?> iterator) {
int count = 0;
@@ -73,17 +67,13 @@ public abstract class AbstractDistribute
* do the actual similarity computation
*
* @see DistributedVectorSimilarity#similarity(int, int, Iterable, double, double, int)
- *
- * @param rowA
- * @param rowB
- * @param cooccurrences
- * @param weightOfVectorA
- * @param weightOfVectorB
- * @param numberOfColumns
- * @return
*/
- protected abstract double doComputeResult(int rowA, int rowB, Iterable<Cooccurrence> cooccurrences,
- double weightOfVectorA, double weightOfVectorB, int numberOfColumns);
+ protected abstract double doComputeResult(int rowA,
+ int rowB,
+ Iterable<Cooccurrence> cooccurrences,
+ double weightOfVectorA,
+ double weightOfVectorB,
+ int numberOfColumns);
/**
* vectors have no weight (NaN) by default, subclasses may override this
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarity.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarity.java Thu Jul 1 21:08:24 2010
@@ -33,8 +33,6 @@ public interface DistributedVectorSimila
/**
* compute the weight (e.g. length) of a vector
- * @param v
- * @return
*/
double weight(Vector v);
@@ -46,9 +44,11 @@ public interface DistributedVectorSimila
* @param cooccurrences all column entries where both vectors have a nonZero entry
* @param weightOfVectorA the result of {@link DistributedVectorSimilarity#weight(Vector)} for the first row vector
* @param weightOfVectorB the result of {@link DistributedVectorSimilarity#weight(Vector)} for the first row vector
- * @param numberOfCols the overall number of columns
- * @return
*/
- double similarity(int rowA, int rowB, Iterable<Cooccurrence> cooccurrences, double weightOfVectorA,
- double weightOfVectorB, int numberOfColumns);
+ double similarity(int rowA,
+ int rowB,
+ Iterable<Cooccurrence> cooccurrences,
+ double weightOfVectorA,
+ double weightOfVectorB,
+ int numberOfColumns);
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -123,7 +123,7 @@ public final class ItemSimilarityTest ex
* @throws Exception
*/
public void testPrefsToItemUserMatrixMapper() throws Exception {
- Mapper<LongWritable,Text,VarIntWritable,DistributedRowMatrix.MatrixEntryWritable>.Context context =
+ Mapper<LongWritable,Text,VarIntWritable, MatrixEntryWritable>.Context context =
EasyMock.createMock(Mapper.Context.class);
context.write(EasyMock.eq(new VarIntWritable(TasteHadoopUtils.idToIndex(100L))),
MathHelper.matrixEntryMatches(TasteHadoopUtils.idToIndex(100L),
@@ -145,15 +145,15 @@ public final class ItemSimilarityTest ex
* @throws Exception
*/
public void testPrefsToItemUserMatrixReducer() throws Exception {
- Reducer<VarIntWritable,DistributedRowMatrix.MatrixEntryWritable,IntWritable,VectorWritable>.Context context =
+ Reducer<VarIntWritable, MatrixEntryWritable,IntWritable,VectorWritable>.Context context =
EasyMock.createMock(Reducer.Context.class);
context.write(EasyMock.eq(new IntWritable(123)), MathHelper.vectorMatches(MathHelper.elem(1, 0.5d),
- MathHelper.elem(7, 2d)));
+ MathHelper.elem(7, 2.0d)));
EasyMock.replay(context);
List<MatrixEntryWritable> entries = Arrays.asList(MathHelper.matrixEntry(123, 1, 0.5d),
- MathHelper.matrixEntry(123, 7, 2d));
+ MathHelper.matrixEntry(123, 7, 2.0d));
new PrefsToItemUserMatrixReducer().reduce(new VarIntWritable(123), entries, context);
@@ -181,7 +181,7 @@ public final class ItemSimilarityTest ex
Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE);
vector.set(12, 0.2d);
- vector.set(34, 1d);
+ vector.set(34, 1.0d);
vector.set(56, 0.9d);
MostSimilarItemPairsMapper mapper = new MostSimilarItemPairsMapper();
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockMapperContext.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockMapperContext.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockMapperContext.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockMapperContext.java Thu Jul 1 21:08:24 2010
@@ -12,7 +12,7 @@ import org.apache.mahout.common.DummyOut
public class MockMapperContext<K extends WritableComparable, V extends Writable> extends Context {
- private DummyOutputCollector<K, V> collector;
+ private final DummyOutputCollector<K, V> collector;
public MockMapperContext(Mapper<?,?,?,?> mapper, Configuration arg0,
DummyOutputCollector<K,V> collector) throws IOException, InterruptedException {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockReducerContext.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockReducerContext.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockReducerContext.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/MockReducerContext.java Thu Jul 1 21:08:24 2010
@@ -13,7 +13,7 @@ import org.apache.mahout.common.MockIter
public class MockReducerContext<K extends WritableComparable, V extends Writable> extends Context {
- private DummyOutputCollector<K, V> collector;
+ private final DummyOutputCollector<K, V> collector;
public MockReducerContext(Reducer<?,?,?,?> reducer, Configuration conf, DummyOutputCollector<K, V> collector, Class keyIn,
Class<?> valueIn) throws IOException, InterruptedException {
@@ -21,17 +21,11 @@ public class MockReducerContext<K extend
this.collector = collector;
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.TaskInputOutputContext#setStatus(java.lang.String)
- */
@Override
public void setStatus(String status) {
// TODO Auto-generated method stub
}
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.TaskInputOutputContext#write(java.lang.Object, java.lang.Object)
- */
@Override
public void write(Object key, Object value) throws IOException, InterruptedException {
collector.collect((K) key, (V) value);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Thu Jul 1 21:08:24 2010
@@ -121,7 +121,7 @@ public class TestFuzzyKmeansClustering e
list.add(new WeightedVectorWritable(clusterPdf, new VectorWritable(point)));
double totalProb = 0;
for (int i = 0; i < clusterList.size(); i++) {
- SoftCluster cluster = clusterList.get(i);
+ //SoftCluster cluster = clusterList.get(i);
double probWeight = clusterer.computeProbWeight(clusterDistanceList.get(i), clusterDistanceList);
totalProb += probWeight;
}
@@ -162,8 +162,7 @@ public class TestFuzzyKmeansClustering e
// iterate for each cluster
int size = 0;
- for (int cId : pointClusterInfo.keySet()) {
- List<WeightedVectorWritable> pts = pointClusterInfo.get(cId);
+ for (List<WeightedVectorWritable> pts : pointClusterInfo.values()) {
size += pts.size();
}
assertEquals("total size", size, points.size());
@@ -398,7 +397,7 @@ public class TestFuzzyKmeansClustering e
}
List<Vector> pointsVectors = new ArrayList<Vector>();
for (VectorWritable point : points) {
- pointsVectors.add((Vector) point.get());
+ pointsVectors.add(point.get());
}
FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, 0.001, 2);
@@ -517,16 +516,18 @@ public class TestFuzzyKmeansClustering e
// Now compare the clustermapper results with reference implementation
assertEquals("mapper and reference sizes", refClusters.size(), clusterMapperCollector.getKeys().size());
- for (int pcId : refClusters.keySet()) {
- System.out.println("refClusters=" + refClusters.get(pcId) + " mapClusters="
- + clusterMapperCollector.getValue(new IntWritable(pcId)));
- assertEquals("cluster " + pcId + " sizes", refClusters.get(pcId).size(), clusterMapperCollector.getValue(
- new IntWritable(pcId)).size());
+ for (Map.Entry<Integer, List<WeightedVectorWritable>> entry : refClusters.entrySet()) {
+ int key = entry.getKey();
+ List<WeightedVectorWritable> value = entry.getValue();
+ System.out.println("refClusters=" + value + " mapClusters="
+ + clusterMapperCollector.getValue(new IntWritable(key)));
+ assertEquals("cluster " + key + " sizes",
+ value.size(),
+ clusterMapperCollector.getValue(new IntWritable(key)).size());
}
// make sure all points are allocated to a cluster
int size = 0;
- for (int cId : refClusters.keySet()) {
- List<WeightedVectorWritable> pts = refClusters.get(cId);
+ for (List<WeightedVectorWritable> pts: refClusters.values()) {
size += pts.size();
}
assertEquals("total size", size, points.size());
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Thu Jul 1 21:08:24 2010
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.MockMapperContext;
import org.apache.mahout.clustering.MockReducerContext;
@@ -304,7 +303,7 @@ public class TestKmeansClustering extend
}
List<Vector> pointsVectors = new ArrayList<Vector>();
for (VectorWritable point : points) {
- pointsVectors.add((Vector) point.get());
+ pointsVectors.add(point.get());
}
boolean converged = KMeansClusterer.runKMeansIteration(pointsVectors, reference, measure, 0.001);
if (k == 8) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java Thu Jul 1 21:08:24 2010
@@ -18,7 +18,6 @@ package org.apache.mahout.clustering.lda
import org.easymock.classextension.EasyMock;
-import java.io.File;
import java.util.Iterator;
import java.util.Random;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java Thu Jul 1 21:08:24 2010
@@ -81,7 +81,8 @@ public abstract class MahoutTestCase ext
return tempFileOrDir;
}
- protected final void setField(Object target, String fieldname, Object value) throws Exception {
+ protected static void setField(Object target, String fieldname, Object value)
+ throws NoSuchFieldException, IllegalAccessException {
Field field = target.getClass().getDeclaredField(fieldname);
field.setAccessible(true);
field.set(target, value);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java Thu Jul 1 21:08:24 2010
@@ -62,8 +62,8 @@ public class Step1MapperTest extends Mah
* Special Step1Mapper that can be configured without using a Configuration
*
*/
- protected static class MockStep1Mapper extends Step1Mapper {
- protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
+ private static class MockStep1Mapper extends Step1Mapper {
+ private MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
int partition, int numMapTasks, int numTrees) {
configure(false, true, treeBuilder, dataset);
configure(seed, partition, numMapTasks, numTrees);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java Thu Jul 1 21:08:24 2010
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.common.IOUtils;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -43,6 +44,9 @@ public class MathHelper {
/** the "close enough" value for floating point computations */
public static final double EPSILON = 0.00001d;
+ private MathHelper() {
+ }
+
/**
* applies an {@link IArgumentMatcher} to {@link MatrixEntryWritable}s
*
@@ -85,25 +89,24 @@ public class MathHelper {
}
/**
- * convenience method to create a {@link Vector.Element}
+ * convenience method to create a {@link Element}
*
* @param index
* @param value
* @return
*/
- public static Vector.Element elem(int index, double value) {
+ public static Element elem(int index, double value) {
return new ElementToCheck(index, value);
}
/**
- * a simple implementation of {@link Vector.Element}
+ * a simple implementation of {@link Element}
*/
- static class ElementToCheck implements Vector.Element {
- private int index;
+ static class ElementToCheck implements Element {
+ private final int index;
private double value;
- public ElementToCheck(int index, double value) {
- super();
+ ElementToCheck(int index, double value) {
this.index = index;
this.value = value;
}
@@ -127,13 +130,13 @@ public class MathHelper {
* @param elements
* @return
*/
- public static VectorWritable vectorMatches(final Vector.Element... elements) {
+ public static VectorWritable vectorMatches(final Element... elements) {
EasyMock.reportMatcher(new IArgumentMatcher() {
@Override
public boolean matches(Object argument) {
if (argument instanceof VectorWritable) {
Vector v = ((VectorWritable) argument).get();
- for (Vector.Element element : elements) {
+ for (Element element : elements) {
boolean matches = Math.abs(element.get() - v.get(element.index())) <= EPSILON;
if (!matches) {
return false;
@@ -179,7 +182,7 @@ public class MathHelper {
}
}
} finally {
- reader.close();
+ IOUtils.quietClose(reader);
}
return matrix;
}
@@ -206,7 +209,7 @@ public class MathHelper {
writer.append(new IntWritable(n), new VectorWritable(v));
}
} finally {
- writer.close();
+ IOUtils.quietClose(writer);
}
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java Thu Jul 1 21:08:24 2010
@@ -18,27 +18,22 @@
package org.apache.mahout.math.hadoop.similarity;
import java.io.File;
-import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
-import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.hadoop.MathHelper;
import org.apache.mahout.math.hadoop.DistributedRowMatrix.MatrixEntryWritable;
import org.apache.mahout.math.hadoop.similarity.RowSimilarityJob.EntriesToVectorsReducer;
@@ -62,8 +57,8 @@ public class TestRowSimilarityJob extend
Mapper<IntWritable,VectorWritable,VarIntWritable,WeightedOccurrence>.Context context =
EasyMock.createMock(Mapper.Context.class);
- context.write(new VarIntWritable(456), new WeightedOccurrence(123, 0.5d, 2d));
- context.write(new VarIntWritable(789), new WeightedOccurrence(123, 0.1d, 2d));
+ context.write(new VarIntWritable(456), new WeightedOccurrence(123, 0.5d, 2.0d));
+ context.write(new VarIntWritable(789), new WeightedOccurrence(123, 0.1d, 2.0d));
EasyMock.replay(context);
@@ -86,8 +81,8 @@ public class TestRowSimilarityJob extend
*/
public void testWeightedOccurrencesPerColumnReducer() throws Exception {
- List<WeightedOccurrence> weightedOccurrences = Arrays.asList(new WeightedOccurrence(45, 0.5d, 1d),
- new WeightedOccurrence(78, 3d, 9d));
+ List<WeightedOccurrence> weightedOccurrences = Arrays.asList(new WeightedOccurrence(45, 0.5d, 1.0d),
+ new WeightedOccurrence(78, 3.0d, 9.0d));
Reducer<VarIntWritable,WeightedOccurrence,VarIntWritable,WeightedOccurrenceArray>.Context context =
EasyMock.createMock(Reducer.Context.class);
@@ -143,14 +138,14 @@ public class TestRowSimilarityJob extend
Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context context =
EasyMock.createMock(Mapper.Context.class);
- context.write(new WeightedRowPair(34, 34, 1d, 1d), new Cooccurrence(12, 0.5d, 0.5d));
- context.write(new WeightedRowPair(34, 56, 1d, 3d), new Cooccurrence(12, 0.5d, 1d));
- context.write(new WeightedRowPair(56, 56, 3d, 3d), new Cooccurrence(12, 1d, 1d));
+ context.write(new WeightedRowPair(34, 34, 1.0d, 1.0d), new Cooccurrence(12, 0.5d, 0.5d));
+ context.write(new WeightedRowPair(34, 56, 1.0d, 3.0d), new Cooccurrence(12, 0.5d, 1.0d));
+ context.write(new WeightedRowPair(56, 56, 3.0d, 3.0d), new Cooccurrence(12, 1.0d, 1.0d));
EasyMock.replay(context);
WeightedOccurrenceArray weightedOccurrences = new WeightedOccurrenceArray(new WeightedOccurrence[] {
- new WeightedOccurrence(34, 0.5d, 1d), new WeightedOccurrence(56, 1d, 3d) });
+ new WeightedOccurrence(34, 0.5d, 1.0d), new WeightedOccurrence(56, 1.0d, 3.0d) });
new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), weightedOccurrences, context);
@@ -158,7 +153,7 @@ public class TestRowSimilarityJob extend
}
/**
- * @tests {@link RowSimilarityJob.SimilarityReducer}
+ * @tests {@link SimilarityReducer}
*
* @throws Exception
*/
@@ -174,17 +169,17 @@ public class TestRowSimilarityJob extend
EasyMock.replay(context);
- SimilarityReducer reducer = new RowSimilarityJob.SimilarityReducer();
+ SimilarityReducer reducer = new SimilarityReducer();
setField(reducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
- reducer.reduce(new WeightedRowPair(12, 34, 3d, 3d), Arrays.asList(new Cooccurrence(56, 1d, 2d),
- new Cooccurrence(78, 3d, 6d)), context);
+ reducer.reduce(new WeightedRowPair(12, 34, 3.0d, 3.0d), Arrays.asList(new Cooccurrence(56, 1.0d, 2.0d),
+ new Cooccurrence(78, 3.0d, 6.0d)), context);
EasyMock.verify(context);
}
/**
- * @tests {@link RowSimilarityJob.SimilarityReducer} in the special case of computing the similarity of a row to
+ * @tests {@link SimilarityReducer} in the special case of computing the similarity of a row to
* itself
*
* @throws Exception
@@ -194,21 +189,21 @@ public class TestRowSimilarityJob extend
Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context context =
EasyMock.createMock(Reducer.Context.class);
- context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1d)), MathHelper.matrixEntryMatches(90, 90, 1d));
+ context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1.0d)), MathHelper.matrixEntryMatches(90, 90, 1.0d));
EasyMock.replay(context);
- SimilarityReducer reducer = new RowSimilarityJob.SimilarityReducer();
+ SimilarityReducer reducer = new SimilarityReducer();
setField(reducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
- reducer.reduce(new WeightedRowPair(90, 90, 2d, 2d), Arrays.asList(new Cooccurrence(56, 1d, 2d),
- new Cooccurrence(78, 3d, 6d)), context);
+ reducer.reduce(new WeightedRowPair(90, 90, 2.0d, 2.0d), Arrays.asList(new Cooccurrence(56, 1.0d, 2.0d),
+ new Cooccurrence(78, 3.0d, 6.0d)), context);
EasyMock.verify(context);
}
/**
- * @tests {@link RowSimilarityJob.EntriesToVectorsReducer}
+ * @tests {@link EntriesToVectorsReducer}
*
* @throws Exception
*/
@@ -220,10 +215,10 @@ public class TestRowSimilarityJob extend
EasyMock.replay(context);
- EntriesToVectorsReducer reducer = new RowSimilarityJob.EntriesToVectorsReducer();
+ EntriesToVectorsReducer reducer = new EntriesToVectorsReducer();
setField(reducer, "maxSimilaritiesPerRow", 1);
- reducer.reduce(new SimilarityMatrixEntryKey(12, 1d), Arrays.asList(
+ reducer.reduce(new SimilarityMatrixEntryKey(12, 1.0d), Arrays.asList(
MathHelper.matrixEntry(12, 34, 0.8d),
MathHelper.matrixEntry(12, 56, 0.7d)), context);
@@ -285,14 +280,14 @@ public class TestRowSimilarityJob extend
assertEquals(3, similarityMatrix.numCols());
assertEquals(3, similarityMatrix.numRows());
- assertEquals(1d, similarityMatrix.get(0, 0));
- assertEquals(1d, similarityMatrix.get(1, 1));
- assertEquals(1d, similarityMatrix.get(2, 2));
-
- assertEquals(0d, similarityMatrix.get(2, 0));
- assertEquals(0d, similarityMatrix.get(2, 1));
- assertEquals(0d, similarityMatrix.get(0, 2));
- assertEquals(0d, similarityMatrix.get(1, 2));
+ assertEquals(1.0d, similarityMatrix.get(0, 0));
+ assertEquals(1.0d, similarityMatrix.get(1, 1));
+ assertEquals(1.0d, similarityMatrix.get(2, 2));
+
+ assertEquals(0.0d, similarityMatrix.get(2, 0));
+ assertEquals(0.0d, similarityMatrix.get(2, 1));
+ assertEquals(0.0d, similarityMatrix.get(0, 2));
+ assertEquals(0.0d, similarityMatrix.get(1, 2));
assertEquals(0.6666d, similarityMatrix.get(0, 1), 0.0001);
assertEquals(0.6666d, similarityMatrix.get(1, 0), 0.0001);
@@ -377,17 +372,17 @@ public class TestRowSimilarityJob extend
assertEquals(3, similarityMatrix.numCols());
assertEquals(3, similarityMatrix.numRows());
- assertEquals(0d, similarityMatrix.get(0, 0));
+ assertEquals(0.0d, similarityMatrix.get(0, 0));
assertEquals(0.5d, similarityMatrix.get(0, 1));
- assertEquals(0d, similarityMatrix.get(0, 2));
+ assertEquals(0.0d, similarityMatrix.get(0, 2));
assertEquals(0.5d, similarityMatrix.get(1, 0));
- assertEquals(0d, similarityMatrix.get(1, 1));
- assertEquals(0d, similarityMatrix.get(1, 2));
+ assertEquals(0.0d, similarityMatrix.get(1, 1));
+ assertEquals(0.0d, similarityMatrix.get(1, 2));
assertEquals(0.4d, similarityMatrix.get(2, 0));
- assertEquals(0d, similarityMatrix.get(2, 1));
- assertEquals(0d, similarityMatrix.get(2, 2));
+ assertEquals(0.0d, similarityMatrix.get(2, 1));
+ assertEquals(0.0d, similarityMatrix.get(2, 2));
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -27,11 +27,11 @@ public class DistributedEuclideanDistanc
assertSimilar(new DistributedEuclideanDistanceVectorSimilarity(),
asVector(3, -2),
- asVector(3, -2), 2, 1d);
+ asVector(3, -2), 2, 1.0d);
assertSimilar(new DistributedEuclideanDistanceVectorSimilarity(),
asVector(3, 3),
- asVector(3, 3), 2, 1d);
+ asVector(3, 3), 2, 1.0d);
assertSimilar(new DistributedEuclideanDistanceVectorSimilarity(),
asVector(1, 2, 3),
@@ -39,6 +39,6 @@ public class DistributedEuclideanDistanc
assertSimilar(new DistributedEuclideanDistanceVectorSimilarity(),
asVector(1, 0),
- asVector(0, 1), 2, 0d);
+ asVector(0, 1), 2, 0.0d);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -26,7 +26,7 @@ public class DistributedPearsonCorrelati
public void testPearsonCorrelation() throws Exception {
assertSimilar(new DistributedPearsonCorrelationVectorSimilarity(),
asVector(3, -2),
- asVector(3, -2), 2, 1d);
+ asVector(3, -2), 2, 1.0d);
assertSimilar(new DistributedPearsonCorrelationVectorSimilarity(),
asVector(3, 3),
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -35,6 +35,6 @@ public class DistributedTanimotoCoeffici
assertSimilar(new DistributedTanimotoCoefficientVectorSimilarity(),
asVector(0, 1),
- asVector(0, 1), 2, 1d);
+ asVector(0, 1), 2, 1.0d);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -27,7 +27,7 @@ public class DistributedUncenteredCosine
assertSimilar(new DistributedUncenteredCosineVectorSimilarity(),
asVector(0, 0, 0, 0, 1),
- asVector(0, 1, 1, 1, 1), 5, 1d);
+ asVector(0, 1, 1, 1, 1), 5, 1.0d);
assertSimilar(new DistributedUncenteredCosineVectorSimilarity(),
asVector(0, 1),
@@ -35,7 +35,7 @@ public class DistributedUncenteredCosine
assertSimilar(new DistributedUncenteredCosineVectorSimilarity(),
asVector(1, 0),
- asVector(1, 0), 2, 1d);
+ asVector(1, 0), 2, 1.0d);
assertSimilar(new DistributedUncenteredCosineVectorSimilarity(),
asVector(1, 1, 2),
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java Thu Jul 1 21:08:24 2010
@@ -34,6 +34,6 @@ public class DistributedUncenteredZeroAs
assertSimilar(new DistributedUncenteredZeroAssumingCosineVectorSimilarity(),
asVector(1, 0),
- asVector(1, 0), 2, 1d);
+ asVector(1, 0), 2, 1.0d);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java?rev=959784&r1=959783&r2=959784&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java Thu Jul 1 21:08:24 2010
@@ -77,7 +77,7 @@ public abstract class DistributedVectorS
for (int n = 0; n < numberOfColumns; n++) {
double valueA = v1.get(n);
double valueB = v2.get(n);
- if (valueA != 0d && valueB != 0d) {
+ if (valueA != 0.0d && valueB != 0.0d) {
cooccurrences.add(new Cooccurrence(n, valueA, valueB));
}
}