You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/15 19:18:05 UTC
svn commit: r910282 [4/6] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/o...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Mon Feb 15 18:17:54 2010
@@ -138,13 +138,13 @@
private List<Pair<List<Integer>,Long>> transactionSet = new ArrayList<Pair<List<Integer>,Long>>();
public TransactionTree() {
- this(TransactionTree.DEFAULT_INITIAL_SIZE);
+ this(DEFAULT_INITIAL_SIZE);
representedAsList = false;
}
public TransactionTree(int size) {
- if (size < TransactionTree.DEFAULT_INITIAL_SIZE) {
- size = TransactionTree.DEFAULT_INITIAL_SIZE;
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
childCount = new int[size];
attribute = new int[size];
@@ -183,7 +183,7 @@
}
public int addPattern(List<Integer> myList, long addCount) {
- int temp = TransactionTree.ROOTNODEID;
+ int temp = ROOTNODEID;
int ret = 0;
boolean addCountMode = true;
for (int attributeValue : myList) {
@@ -286,13 +286,13 @@
size += p.getFirst().size() + 2;
}
- TransactionTree.log.debug("Nodes in UnCompressed Tree: {} ", nodes);
- TransactionTree.log.debug("UnCompressed Tree Size: {}", (this.nodes * 4 * 4 + this.childCount() * 4)
+ log.debug("Nodes in UnCompressed Tree: {} ", nodes);
+ log.debug("UnCompressed Tree Size: {}", (this.nodes * 4 * 4 + this.childCount() * 4)
/ (double) 1000000);
- TransactionTree.log.debug("Nodes in Compressed Tree: {} ", node);
- TransactionTree.log.debug("Compressed Tree Size: {}", (node * 4 * 4 + ctree.childCount() * 4)
+ log.debug("Nodes in Compressed Tree: {} ", node);
+ log.debug("Compressed Tree Size: {}", (node * 4 * 4 + ctree.childCount() * 4)
/ (double) 1000000);
- TransactionTree.log.debug("TransactionSet Size: {}", size * 4 / (double) 1000000);
+ log.debug("TransactionSet Size: {}", size * 4 / (double) 1000000);
if (node * 4 * 4 + ctree.childCount() * 4 <= size * 4) {
return ctree;
} else {
@@ -413,7 +413,7 @@
this.attribute[nodes] = attributeValue;
nodeCount[nodes] = count;
if (nodeChildren[nodes] == null) {
- nodeChildren[nodes] = new int[TransactionTree.DEFAULT_CHILDREN_INITIAL_SIZE];
+ nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
int childNodeId = nodes++;
@@ -426,15 +426,15 @@
attribute[nodes] = -1;
nodeCount[nodes] = 0;
if (nodeChildren[nodes] == null) {
- nodeChildren[nodes] = new int[TransactionTree.DEFAULT_CHILDREN_INITIAL_SIZE];
+ nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
return nodes++;
}
private void resize() {
- int size = (int) (TransactionTree.GROWTH_RATE * nodes);
- if (size < TransactionTree.DEFAULT_INITIAL_SIZE) {
- size = TransactionTree.DEFAULT_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * nodes);
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
int[] oldChildCount = childCount;
@@ -455,9 +455,9 @@
private void resizeChildren(int nodeId) {
int length = childCount[nodeId];
- int size = (int) (TransactionTree.GROWTH_RATE * length);
- if (size < TransactionTree.DEFAULT_CHILDREN_INITIAL_SIZE) {
- size = TransactionTree.DEFAULT_CHILDREN_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * length);
+ if (size < DEFAULT_CHILDREN_INITIAL_SIZE) {
+ size = DEFAULT_CHILDREN_INITIAL_SIZE;
}
int[] oldNodeChildren = nodeChildren[nodeId];
nodeChildren[nodeId] = new int[size];
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java Mon Feb 15 18:17:54 2010
@@ -51,7 +51,7 @@
context.setStatus("Writing Top K patterns for: " + key.toString());
context.write(key, value);
} catch (InterruptedException e) {
- ContextWriteOutputCollector.log.error("{}", e);
+ log.error("{}", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Mon Feb 15 18:17:54 2010
@@ -49,11 +49,13 @@
/**
* Implementation of PFGrowth Algorithm with FP-Bonsai pruning
*
- * Generic parameter A is the object type used as the cell items in a
- * transaction list.
+ * Generic parameter A is the object type used as the cell items in a transaction list.
+ *
+ * @param <A>
+ * the type used
*/
public class FPGrowth<A extends Comparable<? super A>> {
-
+
private static final Logger log = LoggerFactory.getLogger(FPGrowth.class);
public static List<Pair<String,TopKStringPatterns>> readFrequentPattern(FileSystem fs,
@@ -174,14 +176,14 @@
attributeFrequency[attributeIdMapping.get(attrib)] = frequency;
}
- FPGrowth.log.info("Number of unique items {}", frequencyList.size());
+ log.info("Number of unique items {}", frequencyList.size());
Set<Integer> returnFeatures = new HashSet<Integer>();
if (returnableFeatures.isEmpty() == false) {
for (A attrib : returnableFeatures) {
if (attributeIdMapping.containsKey(attrib)) {
returnFeatures.add(attributeIdMapping.get(attrib));
- FPGrowth.log.info("Adding Pattern {}=>{}", attrib, attributeIdMapping
+ log.info("Adding Pattern {}=>{}", attrib, attributeIdMapping
.get(attrib));
}
}
@@ -191,7 +193,7 @@
}
}
- FPGrowth.log.info("Number of unique pruned items {}", attributeIdMapping.size());
+ log.info("Number of unique pruned items {}", attributeIdMapping.size());
generateTopKFrequentPatterns(new TransactionIterator<A>(transactionStream,
attributeIdMapping), attributeFrequency, minSupport, k, reverseMapping
.size(), returnFeatures, new TopKPatternsOutputConverter<A>(output,
@@ -231,7 +233,7 @@
if (requiredFeatures.contains(attribute) == false) {
continue;
}
- FPGrowth.log.info("Mining FTree Tree for all patterns with {}", attribute);
+ log.info("Mining FTree Tree for all patterns with {}", attribute);
MutableLong minSupport = new MutableLong(minSupportValue);
FrequentPatternMaxHeap frequentPatterns = FPGrowth.growth(tree, minSupport, k,
treeCache, 0, attribute, updater);
@@ -239,10 +241,10 @@
outputCollector.collect(attribute, frequentPatterns);
minSupportValue = Math.max(minSupportValue, minSupport.longValue() / 2);
- FPGrowth.log.info("Found {} Patterns with Least Support {}", patterns.get(
+ log.info("Found {} Patterns with Least Support {}", patterns.get(
attribute).count(), patterns.get(attribute).leastSupport());
}
- FPGrowth.log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}",
+ log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}",
treeCache.getHits(), treeCache.getMisses());
return patterns;
}
@@ -257,7 +259,7 @@
Pattern frequentItem = new Pattern();
while (tree.childCount(tempNode) != 0) {
if (tree.childCount(tempNode) > 1) {
- FPGrowth.log.info("This should not happen {} {}", tree.childCount(tempNode),
+ log.info("This should not happen {} {}", tree.childCount(tempNode),
tempNode);
}
tempNode = tree.childAtIndex(tempNode, 0);
@@ -294,13 +296,10 @@
* format to the corresponding A Format
* @return Top K frequent patterns for each attribute
*/
- private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(Iterator<Pair<int[],Long>> transactions,
- long[] attributeFrequency,
- long minSupport,
- int k,
- int featureSetSize,
- Set<Integer> returnFeatures,
- TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
+ private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
+ Iterator<Pair<int[],Long>> transactions,
+ long[] attributeFrequency, long minSupport, int k, int featureSetSize,
+ Set<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
StatusUpdater updater) throws IOException {
FPTree tree = new FPTree(featureSetSize);
@@ -321,11 +320,11 @@
.getSecond(), minSupportMutable, attributeFrequency);
i++;
if (i % 10000 == 0) {
- FPGrowth.log.info("FPTree Building: Read {} Transactions", i);
+ log.info("FPTree Building: Read {} Transactions", i);
}
}
- FPGrowth.log.info("Number of Nodes in the FP Tree: {}", nodecount);
+ log.info("Number of Nodes in the FP Tree: {}", nodecount);
return fpGrowth(tree, minSupportMutable, k, returnFeatures,
topKPatternsOutputCollector, updater);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Mon Feb 15 18:17:54 2010
@@ -75,16 +75,16 @@
private final Set<Integer> sortedSet = new TreeSet<Integer>();
public FPTree() {
- this(FPTree.DEFAULT_INITIAL_SIZE, FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE);
+ this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
}
public FPTree(int size) {
- this(size, FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE);
+ this(size, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
}
private FPTree(int size, int headersize) {
- if (size < FPTree.DEFAULT_INITIAL_SIZE) {
- size = FPTree.DEFAULT_INITIAL_SIZE;
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
parent = new int[size];
@@ -96,11 +96,11 @@
nodeChildren = new int[size][];
conditional = new int[size];
- headerTableAttributes = new int[FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE];
- headerTableAttributeCount = new long[FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE];
- headerTableLookup = new int[FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE];
+ headerTableAttributes = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
+ headerTableAttributeCount = new long[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
+ headerTableLookup = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
Arrays.fill(headerTableLookup, -1);
- headerTableProperties = new int[FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE][];
+ headerTableProperties = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE][];
singlePath = true;
createRootNode();
@@ -134,12 +134,12 @@
public final void addHeaderNext(int attributeValue, int nodeId) {
int index = getHeaderIndex(attributeValue);
- if (headerTableProperties[index][FPTree.HT_NEXT] == -1) {
- headerTableProperties[index][FPTree.HT_NEXT] = nodeId;
- headerTableProperties[index][FPTree.HT_LAST] = nodeId;
+ if (headerTableProperties[index][HT_NEXT] == -1) {
+ headerTableProperties[index][HT_NEXT] = nodeId;
+ headerTableProperties[index][HT_LAST] = nodeId;
} else {
- setNext(headerTableProperties[index][FPTree.HT_LAST], nodeId);
- headerTableProperties[index][FPTree.HT_LAST] = nodeId;
+ setNext(headerTableProperties[index][HT_LAST], nodeId);
+ headerTableProperties[index][HT_LAST] = nodeId;
}
}
@@ -203,7 +203,7 @@
nodeCount[nodes] = count;
if (nodeChildren[nodes] == null) {
- nodeChildren[nodes] = new int[FPTree.DEFAULT_CHILDREN_INITIAL_SIZE];
+ nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
return nodes++;
@@ -222,7 +222,7 @@
conditional[nodes] = 0;
if (nodeChildren[nodes] == null) {
- nodeChildren[nodes] = new int[FPTree.DEFAULT_CHILDREN_INITIAL_SIZE];
+ nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
int childNodeId = nodes++;
@@ -238,7 +238,7 @@
attribute[nodes] = -1;
nodeCount[nodes] = 0;
if (nodeChildren[nodes] == null) {
- nodeChildren[nodes] = new int[FPTree.DEFAULT_CHILDREN_INITIAL_SIZE];
+ nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
return nodes++;
}
@@ -249,7 +249,7 @@
public final int getHeaderNext(int attributeValue) {
int index = getHeaderIndex(attributeValue);
- return headerTableProperties[index][FPTree.HT_NEXT];
+ return headerTableProperties[index][HT_NEXT];
}
public final long getHeaderSupportCount(int attributeValue) {
@@ -281,7 +281,7 @@
public final void removeHeaderNext(int attributeValue) {
int index = getHeaderIndex(attributeValue);
- headerTableProperties[index][FPTree.HT_NEXT] = -1;
+ headerTableProperties[index][HT_NEXT] = -1;
}
public final void reorderHeaderTable() {
@@ -351,11 +351,11 @@
}
headerTableAttributes[headerTableCount] = attributeValue;
if (headerTableProperties[headerTableCount] == null) {
- headerTableProperties[headerTableCount] = new int[FPTree.HEADERTABLEBLOCKSIZE];
+ headerTableProperties[headerTableCount] = new int[HEADERTABLEBLOCKSIZE];
}
headerTableAttributeCount[headerTableCount] = 0;
- headerTableProperties[headerTableCount][FPTree.HT_NEXT] = -1;
- headerTableProperties[headerTableCount][FPTree.HT_LAST] = -1;
+ headerTableProperties[headerTableCount][HT_NEXT] = -1;
+ headerTableProperties[headerTableCount][HT_LAST] = -1;
index = headerTableCount++;
headerTableLookup[attributeValue] = index;
sortedSet.add(attributeValue);
@@ -364,9 +364,9 @@
}
private void resize() {
- int size = (int) (FPTree.GROWTH_RATE * nodes);
- if (size < FPTree.DEFAULT_INITIAL_SIZE) {
- size = FPTree.DEFAULT_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * nodes);
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
int[] oldChildCount = childCount;
@@ -397,9 +397,9 @@
private void resizeChildren(int nodeId) {
int length = childCount[nodeId];
- int size = (int) (FPTree.GROWTH_RATE * length);
- if (size < FPTree.DEFAULT_CHILDREN_INITIAL_SIZE) {
- size = FPTree.DEFAULT_CHILDREN_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * length);
+ if (size < DEFAULT_CHILDREN_INITIAL_SIZE) {
+ size = DEFAULT_CHILDREN_INITIAL_SIZE;
}
int[] oldNodeChildren = nodeChildren[nodeId];
nodeChildren[nodeId] = new int[size];
@@ -407,7 +407,7 @@
}
private void resizeHeaderLookup(int attributeValue) {
- int size = (int) (attributeValue * FPTree.GROWTH_RATE);
+ int size = (int) (attributeValue * GROWTH_RATE);
int[] oldLookup = headerTableLookup;
headerTableLookup = new int[size];
Arrays.fill(headerTableLookup, oldLookup.length, size, -1);
@@ -415,9 +415,9 @@
}
private void resizeHeaderTable() {
- int size = (int) (FPTree.GROWTH_RATE * headerTableCount);
- if (size < FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE) {
- size = FPTree.DEFAULT_HEADER_TABLE_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * headerTableCount);
+ if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE) {
+ size = DEFAULT_HEADER_TABLE_INITIAL_SIZE;
}
int[] oldAttributes = headerTableAttributes;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java Mon Feb 15 18:17:54 2010
@@ -46,7 +46,7 @@
private final List<FPTree> treeCache = new ArrayList<FPTree>();
public FPTreeDepthCache() {
- FPTreeDepthCache.log.info("Initializing FPTreeCache with firstLevelCacheSize: {}",
+ log.info("Initializing FPTreeCache with firstLevelCacheSize: {}",
FPTreeDepthCache.firstLevelCacheSize);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Mon Feb 15 18:17:54 2010
@@ -43,12 +43,12 @@
private long[] supportValues;
public Pattern() {
- this(Pattern.DEFAULT_INITIAL_SIZE);
+ this(DEFAULT_INITIAL_SIZE);
}
private Pattern(int size) {
- if (size < Pattern.DEFAULT_INITIAL_SIZE) {
- size = Pattern.DEFAULT_INITIAL_SIZE;
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
this.pattern = new int[size];
this.supportValues = new long[size];
@@ -144,9 +144,9 @@
}
private void resize() {
- int size = (int) (Pattern.GROWTH_RATE * length);
- if (size < Pattern.DEFAULT_INITIAL_SIZE) {
- size = Pattern.DEFAULT_INITIAL_SIZE;
+ int size = (int) (GROWTH_RATE * length);
+ if (size < DEFAULT_INITIAL_SIZE) {
+ size = DEFAULT_INITIAL_SIZE;
}
int[] oldpattern = pattern;
long[] oldSupport = supportValues;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java Mon Feb 15 18:17:54 2010
@@ -49,7 +49,7 @@
@Override
public void configure(JobConf job) {
- String evlstr = job.get(EvalMapper.MAHOUT_GA_EVALUATOR);
+ String evlstr = job.get(MAHOUT_GA_EVALUATOR);
if (evlstr == null) {
throw new IllegalArgumentException("'MAHOUT_GA_EVALUATOR' job parameter non found");
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java Mon Feb 15 18:17:54 2010
@@ -24,7 +24,7 @@
import java.util.Random;
public final class CacheTest extends TasteTestCase {
-
+
public void testLotsOfGets() throws TasteException {
Retriever<Object,Object> retriever = new IdentityRetriever();
Cache<Object,Object> cache = new Cache<Object,Object>(retriever, 1000);
@@ -32,7 +32,7 @@
assertEquals(i, cache.get(i));
}
}
-
+
public void testMixedUsage() throws TasteException {
Random random = RandomUtils.getRandom();
Retriever<Object,Object> retriever = new IdentityRetriever();
@@ -42,13 +42,13 @@
if (r < 0.01) {
cache.clear();
} else if (r < 0.1) {
- cache.remove(r-100);
+ cache.remove(r - 100);
} else {
assertEquals(i, cache.get(i));
}
}
}
-
+
private static class IdentityRetriever implements Retriever<Object,Object> {
@Override
public Object get(Object key) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/DummySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/DummySimilarity.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/DummySimilarity.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/neighborhood/DummySimilarity.java Mon Feb 15 18:17:54 2010
@@ -27,33 +27,33 @@
import java.util.Collection;
final class DummySimilarity implements UserSimilarity, ItemSimilarity {
-
+
private final DataModel dataModel;
-
+
DummySimilarity(DataModel dataModel) {
this.dataModel = dataModel;
}
-
+
@Override
public double userSimilarity(long userID1, long userID2) throws TasteException {
- return 1.0 / (1.0 + Math.abs(dataModel.getPreferencesFromUser(userID1).get(0).getValue() -
- dataModel.getPreferencesFromUser(userID2).get(0).getValue()));
+ return 1.0 / (1.0 + Math.abs(dataModel.getPreferencesFromUser(userID1).get(0).getValue()
+ - dataModel.getPreferencesFromUser(userID2).get(0).getValue()));
}
-
+
@Override
public double itemSimilarity(long itemID1, long itemID2) {
// Make up something wacky
return 1.0 / (1.0 + Math.abs(itemID1 - itemID2));
}
-
+
@Override
public void setPreferenceInferrer(PreferenceInferrer inferrer) {
throw new UnsupportedOperationException();
}
-
+
@Override
public void refresh(Collection<Refreshable> alreadyRefreshed) {
- // do nothing
+ // do nothing
}
-
+
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Feb 15 18:17:54 2010
@@ -412,7 +412,7 @@
Canopy canopy = new Canopy();
assertTrue("more to come", reader.next(key, canopy));
assertEquals("1st key", "C0", key.toString());
- //Canopy canopy = new Canopy(value);//Canopy.decodeCanopy(value.toString());
+ //Canopy canopy = new Canopy(value); //Canopy.decodeCanopy(value.toString());
assertEquals("1st x value", 1.5, canopy.getCenter().get(0));
assertEquals("1st y value", 1.5, canopy.getCenter().get(1));
assertTrue("more to come", reader.next(key, canopy));
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java Mon Feb 15 18:17:54 2010
@@ -44,29 +44,34 @@
import org.apache.mahout.math.VectorWritable;
public class TestMapReduce extends MahoutTestCase {
-
+
private List<VectorWritable> sampleData = new ArrayList<VectorWritable>();
-
+
private FileSystem fs;
-
+
private Configuration conf;
-
+
/**
* Generate random samples and add them to the sampleData
- *
- * @param num int number of samples to generate
- * @param mx double x-value of the sample mean
- * @param my double y-value of the sample mean
- * @param sdx double x-standard deviation of the samples
- * @param sdy double y-standard deviation of the samples
+ *
+ * @param num
+ * int number of samples to generate
+ * @param mx
+ * double x-value of the sample mean
+ * @param my
+ * double y-value of the sample mean
+ * @param sdx
+ * double x-standard deviation of the samples
+ * @param sdy
+ * double y-standard deviation of the samples
*/
private void generateSamples(int num, double mx, double my, double sdx, double sdy) {
System.out.println("Generating " + num + " samples m=[" + mx + ", " + my + "] sd=[" + sdx + ", " + sdy + ']');
for (int i = 0; i < num; i++) {
- addSample(new double[] { UncommonDistributions.rNorm(mx, sdx), UncommonDistributions.rNorm(my, sdy) });
+ addSample(new double[] {UncommonDistributions.rNorm(mx, sdx), UncommonDistributions.rNorm(my, sdy)});
}
}
-
+
private void addSample(double[] values) {
Vector v = new DenseVector(2);
for (int j = 0; j < values.length; j++) {
@@ -74,22 +79,26 @@
}
sampleData.add(new VectorWritable(v));
}
-
+
/**
* Generate random samples and add them to the sampleData
- *
- * @param num int number of samples to generate
- * @param mx double x-value of the sample mean
- * @param my double y-value of the sample mean
- * @param sd double standard deviation of the samples
+ *
+ * @param num
+ * int number of samples to generate
+ * @param mx
+ * double x-value of the sample mean
+ * @param my
+ * double y-value of the sample mean
+ * @param sd
+ * double standard deviation of the samples
*/
private void generateSamples(int num, double mx, double my, double sd) {
System.out.println("Generating " + num + " samples m=[" + mx + ", " + my + "] sd=" + sd);
for (int i = 0; i < num; i++) {
- addSample(new double[] { UncommonDistributions.rNorm(mx, sd), UncommonDistributions.rNorm(my, sd) });
+ addSample(new double[] {UncommonDistributions.rNorm(mx, sd), UncommonDistributions.rNorm(my, sd)});
}
}
-
+
@Override
protected void setUp() throws Exception {
super.setUp();
@@ -101,54 +110,54 @@
File f = new File("input");
f.mkdir();
}
-
+
/** Test the basic Mapper */
public void testMapper() throws Exception {
generateSamples(10, 0, 0, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new NormalModelDistribution(new VectorWritable(
- new DenseVector(2))), 5, 1, 0, 0);
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new NormalModelDistribution(
+ new VectorWritable(new DenseVector(2))), 5, 1, 0, 0);
DirichletMapper mapper = new DirichletMapper();
mapper.configure(state);
-
- DummyOutputCollector<Text, VectorWritable> collector = new DummyOutputCollector<Text, VectorWritable>();
+
+ DummyOutputCollector<Text,VectorWritable> collector = new DummyOutputCollector<Text,VectorWritable>();
for (VectorWritable v : sampleData) {
mapper.map(null, v, collector, null);
}
- //Map<String, List<VectorWritable>> data = collector.getData();
+ // Map<String, List<VectorWritable>> data = collector.getData();
// this seed happens to produce two partitions, but they work
- //assertEquals("output size", 3, data.size());
+ // assertEquals("output size", 3, data.size());
}
-
+
/** Test the basic Reducer */
public void testReducer() throws Exception {
generateSamples(100, 0, 0, 1);
generateSamples(100, 2, 0, 1);
generateSamples(100, 0, 2, 1);
generateSamples(100, 2, 2, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(
- new DenseVector(2))), 20, 1, 1, 0);
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(
+ new VectorWritable(new DenseVector(2))), 20, 1, 1, 0);
DirichletMapper mapper = new DirichletMapper();
mapper.configure(state);
-
- DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
+
+ DummyOutputCollector<Text,VectorWritable> mapCollector = new DummyOutputCollector<Text,VectorWritable>();
for (VectorWritable v : sampleData) {
mapper.map(null, v, mapCollector, null);
}
- //Map<String, List<VectorWritable>> data = mapCollector.getData();
+ // Map<String, List<VectorWritable>> data = mapCollector.getData();
// this seed happens to produce three partitions, but they work
- //assertEquals("output size", 7, data.size());
-
+ // assertEquals("output size", 7, data.size());
+
DirichletReducer reducer = new DirichletReducer();
reducer.configure(state);
- OutputCollector<Text, DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
+ OutputCollector<Text,DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text,DirichletCluster<VectorWritable>>();
for (String key : mapCollector.getKeys()) {
reducer.reduce(new Text(key), mapCollector.getValue(key).iterator(), reduceCollector, null);
}
-
+
Model<VectorWritable>[] newModels = reducer.getNewModels();
state.update(newModels);
}
-
+
private static void printModels(Iterable<Model<VectorWritable>[]> results, int significant) {
int row = 0;
for (Model<VectorWritable>[] r : results) {
@@ -163,40 +172,40 @@
}
System.out.println();
}
-
+
/** Test the Mapper and Reducer in an iteration loop */
public void testMRIterations() throws Exception {
generateSamples(100, 0, 0, 1);
generateSamples(100, 2, 0, 1);
generateSamples(100, 0, 2, 1);
generateSamples(100, 2, 2, 1);
- DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(new VectorWritable(
- new DenseVector(2))), 20, 1.0, 1, 0);
-
+ DirichletState<VectorWritable> state = new DirichletState<VectorWritable>(new SampledNormalDistribution(
+ new VectorWritable(new DenseVector(2))), 20, 1.0, 1, 0);
+
List<Model<VectorWritable>[]> models = new ArrayList<Model<VectorWritable>[]>();
-
+
for (int iteration = 0; iteration < 10; iteration++) {
DirichletMapper mapper = new DirichletMapper();
mapper.configure(state);
- DummyOutputCollector<Text, VectorWritable> mapCollector = new DummyOutputCollector<Text, VectorWritable>();
+ DummyOutputCollector<Text,VectorWritable> mapCollector = new DummyOutputCollector<Text,VectorWritable>();
for (VectorWritable v : sampleData) {
mapper.map(null, v, mapCollector, null);
}
-
+
DirichletReducer reducer = new DirichletReducer();
reducer.configure(state);
- OutputCollector<Text, DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text, DirichletCluster<VectorWritable>>();
+ OutputCollector<Text,DirichletCluster<VectorWritable>> reduceCollector = new DummyOutputCollector<Text,DirichletCluster<VectorWritable>>();
for (String key : mapCollector.getKeys()) {
reducer.reduce(new Text(key), mapCollector.getValue(key).iterator(), reduceCollector, null);
}
-
+
Model<VectorWritable>[] newModels = reducer.getNewModels();
state.update(newModels);
models.add(newModels);
}
printModels(models, 0);
}
-
+
/** Test the Mapper and Reducer using the Driver */
public void testDriverMRIterations() throws Exception {
File f = new File("input");
@@ -209,12 +218,13 @@
generateSamples(100, 2, 2, 1);
ClusteringTestUtils.writePointsToFile(sampleData, "input/data.txt", fs, conf);
// Now run the driver
- DirichletDriver.runJob("input", "output", "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 10,
- 1.0, 1);
+ DirichletDriver.runJob("input", "output",
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 10, 1.0, 1);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
JobConf conf = new JobConf(KMeansDriver.class);
- conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
+ conf.set(DirichletDriver.MODEL_FACTORY_KEY,
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2");
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
@@ -225,7 +235,7 @@
}
printResults(clusters, 0);
}
-
+
private static void printResults(List<List<DirichletCluster<VectorWritable>>> clusters, int significant) {
int row = 0;
for (List<DirichletCluster<VectorWritable>> r : clusters) {
@@ -241,7 +251,7 @@
}
System.out.println();
}
-
+
/** Test the Mapper and Reducer using the Driver */
public void testDriverMnRIterations() throws Exception {
File f = new File("input");
@@ -250,12 +260,13 @@
}
generate4Datasets();
// Now run the driver
- DirichletDriver.runJob("input", "output", "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 15,
- 1.0, 1);
+ DirichletDriver.runJob("input", "output",
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 15, 1.0, 1);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
JobConf conf = new JobConf(KMeansDriver.class);
- conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
+ conf.set(DirichletDriver.MODEL_FACTORY_KEY,
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2");
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
@@ -266,7 +277,7 @@
}
printResults(clusters, 0);
}
-
+
private void generate4Datasets() throws IOException {
generateSamples(500, 0, 0, 0.5);
ClusteringTestUtils.writePointsToFile(sampleData, "input/data1.txt", fs, conf);
@@ -280,7 +291,7 @@
generateSamples(500, 2, 2, 1);
ClusteringTestUtils.writePointsToFile(sampleData, "input/data4.txt", fs, conf);
}
-
+
/** Test the Mapper and Reducer using the Driver */
public void testDriverMnRnIterations() throws Exception {
File f = new File("input");
@@ -289,12 +300,13 @@
}
generate4Datasets();
// Now run the driver
- DirichletDriver.runJob("input", "output", "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 15,
- 1.0, 2);
+ DirichletDriver.runJob("input", "output",
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", 20, 15, 1.0, 2);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
JobConf conf = new JobConf(KMeansDriver.class);
- conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
+ conf.set(DirichletDriver.MODEL_FACTORY_KEY,
+ "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2");
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
@@ -305,7 +317,7 @@
}
printResults(clusters, 0);
}
-
+
/** Test the Mapper and Reducer using the Driver */
public void testDriverMnRnIterationsAsymmetric() throws Exception {
File f = new File("input");
@@ -324,13 +336,13 @@
generateSamples(500, 2, 2, 1);
ClusteringTestUtils.writePointsToFile(sampleData, "input/data4.txt", fs, conf);
// Now run the driver
- DirichletDriver.runJob("input", "output", "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution",
- 20, 15, 1.0, 2);
+ DirichletDriver.runJob("input", "output",
+ "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution", 20, 15, 1.0, 2);
// and inspect results
List<List<DirichletCluster<VectorWritable>>> clusters = new ArrayList<List<DirichletCluster<VectorWritable>>>();
JobConf conf = new JobConf(KMeansDriver.class);
- conf
- .set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution");
+ conf.set(DirichletDriver.MODEL_FACTORY_KEY,
+ "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2");
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
@@ -341,11 +353,11 @@
}
printResults(clusters, 0);
}
-
- //=================== New Tests of Writable Implementations ====================
-
+
+ // =================== New Tests of Writable Implementations ====================
+
public void testNormalModelWritableSerialization() throws Exception {
- double[] m = { 1.1, 2.2, 3.3 };
+ double[] m = {1.1, 2.2, 3.3};
Model<?> model = new NormalModel(new DenseVector(m), 3.3);
DataOutputBuffer out = new DataOutputBuffer();
model.write(out);
@@ -355,9 +367,9 @@
model2.readFields(in);
assertEquals("models", model.toString(), model2.toString());
}
-
+
public void testSampledNormalModelWritableSerialization() throws Exception {
- double[] m = { 1.1, 2.2, 3.3 };
+ double[] m = {1.1, 2.2, 3.3};
Model<?> model = new SampledNormalModel(new DenseVector(m), 3.3);
DataOutputBuffer out = new DataOutputBuffer();
model.write(out);
@@ -367,10 +379,10 @@
model2.readFields(in);
assertEquals("models", model.toString(), model2.toString());
}
-
+
public void testAsymmetricSampledNormalModelWritableSerialization() throws Exception {
- double[] m = { 1.1, 2.2, 3.3 };
- double[] s = { 3.3, 4.4, 5.5 };
+ double[] m = {1.1, 2.2, 3.3};
+ double[] s = {3.3, 4.4, 5.5};
Model<?> model = new AsymmetricSampledNormalModel(new DenseVector(m), new DenseVector(s));
DataOutputBuffer out = new DataOutputBuffer();
model.write(out);
@@ -380,9 +392,9 @@
model2.readFields(in);
assertEquals("models", model.toString(), model2.toString());
}
-
+
public void testClusterWritableSerialization() throws Exception {
- double[] m = { 1.1, 2.2, 3.3 };
+ double[] m = {1.1, 2.2, 3.3};
DirichletCluster<?> cluster = new DirichletCluster(new NormalModel(new DenseVector(m), 4), 10);
DataOutputBuffer out = new DataOutputBuffer();
cluster.write(out);
@@ -394,5 +406,5 @@
assertNotNull("model null", cluster2.getModel());
assertEquals("model", cluster.getModel().toString(), cluster2.getModel().toString());
}
-
+
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestLDAInference.java Mon Feb 15 18:17:54 2010
@@ -32,21 +32,24 @@
import org.apache.mahout.common.RandomUtils;
public class TestLDAInference extends MahoutTestCase {
-
+
private static final int NUM_TOPICS = 20;
-
+
private Random random;
-
+
@Override
protected void setUp() throws Exception {
super.setUp();
random = RandomUtils.getRandom();
}
-
+
/**
* Generate random document vector
- * @param numWords int number of words in the vocabulary
- * @param numWords E[count] for each word
+ *
+ * @param numWords
+ * int number of words in the vocabulary
+ * @param numWords
+ * E[count] for each word
*/
private Vector generateRandomDoc(int numWords, double sparsity) throws MathException {
Vector v = new DenseVector(numWords);
@@ -57,12 +60,12 @@
}
return v;
}
-
+
private LDAState generateRandomState(int numWords, int numTopics) {
double topicSmoothing = 50.0 / numTopics; // whatever
Matrix m = new DenseMatrix(numTopics, numWords);
double[] logTotals = new double[numTopics];
-
+
for (int k = 0; k < numTopics; ++k) {
double total = 0.0; // total number of pseudo counts we made
for (int w = 0; w < numWords; ++w) {
@@ -71,45 +74,42 @@
total += pseudocount;
m.setQuick(k, w, Math.log(pseudocount));
}
-
+
logTotals[k] = Math.log(total);
}
-
+
double ll = Double.NEGATIVE_INFINITY;
return new LDAState(numTopics, numWords, topicSmoothing, m, logTotals, ll);
}
-
-
+
private void runTest(int numWords, double sparsity, int numTests) throws MathException {
LDAState state = generateRandomState(numWords, NUM_TOPICS);
LDAInference lda = new LDAInference(state);
for (int t = 0; t < numTests; ++t) {
Vector v = generateRandomDoc(numWords, sparsity);
LDAInference.InferredDocument doc = lda.infer(v);
-
+
assertEquals("wordCounts", doc.getWordCounts(), v);
assertNotNull("gamma", doc.getGamma());
- for (Iterator<Vector.Element> iter = v.iterateNonZero();
- iter.hasNext(); ) {
+ for (Iterator<Vector.Element> iter = v.iterateNonZero(); iter.hasNext();) {
int w = iter.next().index();
for (int k = 0; k < NUM_TOPICS; ++k) {
double logProb = doc.phi(k, w);
- assertTrue(k + " " + w + " logProb " + logProb, logProb <= 0.0);
+ assertTrue(k + " " + w + " logProb " + logProb, logProb <= 0.0);
}
}
assertTrue("log likelihood", doc.logLikelihood <= 1.0E-10);
}
}
-
-
+
public void testLDAEasy() throws MathException {
runTest(10, 1.0, 5); // 1 word per doc in expectation
}
-
+
public void testLDASparse() throws MathException {
runTest(100, 0.4, 5); // 40 words per doc in expectation
}
-
+
public void testLDADense() throws MathException {
runTest(100, 3.0, 5); // 300 words per doc in expectation
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderEvaluatorRunner.java Mon Feb 15 18:17:54 2010
@@ -52,7 +52,7 @@
model,
0.95,
0.05);
- BookCrossingRecommenderEvaluatorRunner.log.info(String.valueOf(evaluation));
+ log.info(String.valueOf(evaluation));
}
-}
\ No newline at end of file
+}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Mon Feb 15 18:17:54 2010
@@ -34,10 +34,10 @@
public final class GroupLensDataModel extends FileDataModel {
private static final String COLON_DELIMTER = "::";
- private static final Pattern COLON_DELIMITER_PATTERN = Pattern.compile(GroupLensDataModel.COLON_DELIMTER);
+ private static final Pattern COLON_DELIMITER_PATTERN = Pattern.compile(COLON_DELIMTER);
public GroupLensDataModel() throws IOException {
- this(GroupLensDataModel.readResourceToTempFile("/org/apache/mahout/cf/taste/example/grouplens/ratings.dat"));
+ this(readResourceToTempFile("/org/apache/mahout/cf/taste/example/grouplens/ratings.dat"));
}
/**
@@ -45,7 +45,7 @@
* @throws IOException if an error occurs while reading or writing files
*/
public GroupLensDataModel(File ratingsFile) throws IOException {
- super(GroupLensDataModel.convertGLFile(ratingsFile));
+ super(convertGLFile(ratingsFile));
}
private static File convertGLFile(File originalFile) throws IOException {
@@ -58,7 +58,7 @@
try {
writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
for (String line : new FileLineIterable(originalFile, false)) {
- String convertedLine = GroupLensDataModel.COLON_DELIMITER_PATTERN.matcher(line.substring(0, line.lastIndexOf(GroupLensDataModel.COLON_DELIMTER))).replaceAll(",");
+ String convertedLine = COLON_DELIMITER_PATTERN.matcher(line.substring(0, line.lastIndexOf(COLON_DELIMTER))).replaceAll(",");
writer.println(convertedLine);
}
writer.flush();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensRecommenderEvaluatorRunner.java Mon Feb 15 18:17:54 2010
@@ -55,7 +55,7 @@
model,
0.9,
0.3);
- GroupLensRecommenderEvaluatorRunner.log.info(String.valueOf(evaluation));
+ log.info(String.valueOf(evaluation));
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java Mon Feb 15 18:17:54 2010
@@ -51,7 +51,7 @@
model,
0.9,
0.1);
- JesterRecommenderEvaluatorRunner.log.info(String.valueOf(evaluation));
+ log.info(String.valueOf(evaluation));
}
-}
\ No newline at end of file
+}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixDataModel.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixDataModel.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixDataModel.java Mon Feb 15 18:17:54 2010
@@ -67,12 +67,12 @@
this.useSubset = useSubset;
- NetflixDataModel.log.info("Creating NetflixDataModel for directory: {}", dataDirectory);
+ log.info("Creating NetflixDataModel for directory: {}", dataDirectory);
- NetflixDataModel.log.info("Reading preference data...");
+ log.info("Reading preference data...");
FastByIDMap<PreferenceArray> users = readUsers(dataDirectory);
- NetflixDataModel.log.info("Creating delegate DataModel...");
+ log.info("Creating delegate DataModel...");
delegate = new GenericDataModel(users);
}
@@ -88,7 +88,7 @@
while (lineIterator.hasNext()) {
line = lineIterator.next();
if (++counter % 100000 == 0) {
- NetflixDataModel.log.info("Processed {} prefs", counter);
+ log.info("Processed {} prefs", counter);
}
int firstComma = line.indexOf(',');
long userID = Long.parseLong(line.substring(0, firstComma));
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixRecommenderEvaluatorRunner.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixRecommenderEvaluatorRunner.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/NetflixRecommenderEvaluatorRunner.java Mon Feb 15 18:17:54 2010
@@ -43,10 +43,10 @@
if (ratingsFile != null) {
DataModel model = new NetflixDataModel(ratingsFile, true);
double evaluation = evaluator.evaluate(new NetflixRecommenderBuilder(), null, model, 0.9, 0.1);
- NetflixRecommenderEvaluatorRunner.log.info(String.valueOf(evaluation));
+ log.info(String.valueOf(evaluation));
} else {
- NetflixRecommenderEvaluatorRunner.log.error("Netflix Recommender needs a ratings file to work. Please provide it with the -i command line option.");
+ log.error("Netflix Recommender needs a ratings file to work. Please provide it with the -i command line option.");
}
}
-}
\ No newline at end of file
+}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java Mon Feb 15 18:17:54 2010
@@ -63,7 +63,7 @@
Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
for (File byItemFile : byItemDirectory.listFiles()) {
- TransposeToByUser.log.info("Processing {}", byItemFile);
+ log.info("Processing {}", byItemFile);
Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
String line = lineIterator.next();
String movieIDString = line.substring(0, line.length() - 1);
@@ -88,7 +88,7 @@
private static void maybeFlushCache(File byUserDirectory, Map<String, List<String>> byUserEntryCache) throws IOException {
if (byUserEntryCache.size() >= 100000) {
- TransposeToByUser.log.info("Flushing cache");
+ log.info("Flushing cache");
for (Map.Entry<String, List<String>> entry : byUserEntryCache.entrySet()) {
String userID = entry.getKey();
List<String> lines = entry.getValue();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Feb 15 18:17:54 2010
@@ -119,14 +119,14 @@
WikipediaDatasetCreatorDriver.runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt),
analyzerClass);
} catch (OptionException e) {
- WikipediaDatasetCreatorDriver.log.error("Exception", e);
+ log.error("Exception", e);
CommandLineUtil.printHelp(group);
} catch (ClassNotFoundException e) {
- WikipediaDatasetCreatorDriver.log.error("Exception: Analyzer class not found", e);
+ log.error("Exception: Analyzer class not found", e);
} catch (IllegalAccessException e) {
- WikipediaDatasetCreatorDriver.log.error("Exception: Couldn't instantiate the class", e);
+ log.error("Exception: Couldn't instantiate the class", e);
} catch (InstantiationException e) {
- WikipediaDatasetCreatorDriver.log.error("Exception: Couldn't instantiate the class", e);
+ log.error("Exception: Couldn't instantiate the class", e);
}
}
@@ -151,7 +151,7 @@
JobClient client = new JobClient();
JobConf conf = new JobConf(WikipediaDatasetCreatorDriver.class);
if (WikipediaDatasetCreatorDriver.log.isInfoEnabled()) {
- WikipediaDatasetCreatorDriver.log.info("Input: {} Out: {} Categories: {}", new Object[] {input, output,
+ log.info("Input: {} Out: {} Categories: {}", new Object[] {input, output,
catFile});
}
conf.set("key.value.separator.in.input.line", " ");
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Mon Feb 15 18:17:54 2010
@@ -133,7 +133,7 @@
} catch (InstantiationException e) {
throw new IllegalStateException(e);
}
- WikipediaDatasetCreatorMapper.log.info(
+ log.info(
"Configure: Input Categories size: {} Exact Match: {} Analyzer: {}", new Object[] {
inputCategories
.size(),
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java Mon Feb 15 18:17:54 2010
@@ -99,7 +99,7 @@
try {
cmdLine = parser.parse(args);
} catch (OptionException e) {
- WikipediaXmlSplitter.log.error("Error while parsing options", e);
+ log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
return;
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java Mon Feb 15 18:17:54 2010
@@ -61,8 +61,8 @@
private final DataOutputBuffer buffer = new DataOutputBuffer();
public XmlRecordReader(FileSplit split, JobConf jobConf) throws IOException {
- startTag = jobConf.get(XmlInputFormat.START_TAG_KEY).getBytes("utf-8");
- endTag = jobConf.get(XmlInputFormat.END_TAG_KEY).getBytes("utf-8");
+ startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
+ endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");
// open the file and seek to the start of the split
start = split.getStart();
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Mon Feb 15 18:17:54 2010
@@ -32,7 +32,8 @@
class Display2dASNDirichlet extends DisplayDirichlet {
Display2dASNDirichlet() {
initialize();
- this.setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>" + (int) (DisplayDirichlet.significance * 100)
+ this.setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>"
+ + (int) (DisplayDirichlet.significance * 100)
+ "% of population)");
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java Mon Feb 15 18:17:54 2010
@@ -81,7 +81,7 @@
res = Toolkit.getDefaultToolkit().getScreenResolution();
// Set Frame size in inches
- this.setSize(DisplayDirichlet.size * res, DisplayDirichlet.size * res);
+ this.setSize(size * res, size * res);
this.setVisible(true);
this.setTitle("Dirichlet Process Sample Data");
@@ -96,7 +96,7 @@
public static void main(String[] args) throws IOException, InvocationTargetException, NoSuchMethodException {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ generateSamples();
new DisplayDirichlet();
}
@@ -108,31 +108,31 @@
Vector v = new DenseVector(2);
Vector dv = new DenseVector(2);
g2.setColor(Color.RED);
- for (Vector param : DisplayDirichlet.sampleParams) {
+ for (Vector param : sampleParams) {
v.set(0, param.get(0));
v.set(1, param.get(1));
dv.set(0, param.get(2) * 3);
dv.set(1, param.get(3) * 3);
- DisplayDirichlet.plotEllipse(g2, v, dv);
+ plotEllipse(g2, v, dv);
}
}
public void plotSampleData(Graphics g) {
Graphics2D g2 = (Graphics2D) g;
- double sx = (double) res / DisplayDirichlet.ds;
+ double sx = (double) res / ds;
g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
// plot the axes
g2.setColor(Color.BLACK);
- Vector dv = new DenseVector(2).assign(DisplayDirichlet.size / 2.0);
- DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(2), dv);
- DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
+ Vector dv = new DenseVector(2).assign(size / 2.0);
+ plotRectangle(g2, new DenseVector(2).assign(2), dv);
+ plotRectangle(g2, new DenseVector(2).assign(-2), dv);
// plot the sample data
g2.setColor(Color.DARK_GRAY);
dv.assign(0.03);
- for (VectorWritable v : DisplayDirichlet.sampleData) {
- DisplayDirichlet.plotRectangle(g2, v.get(), dv);
+ for (VectorWritable v : sampleData) {
+ plotRectangle(g2, v.get(), dv);
}
}
@@ -150,12 +150,12 @@
double[] flip = {1, -1};
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
- int h = DisplayDirichlet.size / 2;
+ int h = size / 2;
double x = v2.get(0) + h;
double y = v2.get(1) + h;
- g2.draw(new Rectangle2D.Double(x * DisplayDirichlet.ds, y * DisplayDirichlet.ds, dv.get(0)
- * DisplayDirichlet.ds,
- dv.get(1) * DisplayDirichlet.ds));
+ g2.draw(new Rectangle2D.Double(x * ds, y * ds, dv.get(0)
+ * ds,
+ dv.get(1) * ds));
}
/**
@@ -172,12 +172,12 @@
double[] flip = {1, -1};
Vector v2 = v.clone().assign(new DenseVector(flip), new TimesFunction());
v2 = v2.minus(dv.divide(2));
- int h = DisplayDirichlet.size / 2;
+ int h = size / 2;
double x = v2.get(0) + h;
double y = v2.get(1) + h;
- g2.draw(new Ellipse2D.Double(x * DisplayDirichlet.ds, y * DisplayDirichlet.ds, dv.get(0)
- * DisplayDirichlet.ds,
- dv.get(1) * DisplayDirichlet.ds));
+ g2.draw(new Ellipse2D.Double(x * ds, y * ds, dv.get(0)
+ * ds,
+ dv.get(1) * ds));
}
private static void printModels(List<Model<VectorWritable>[]> results, int significant) {
@@ -196,15 +196,15 @@
}
public static void generateSamples() {
- DisplayDirichlet.generateSamples(400, 1, 1, 3);
- DisplayDirichlet.generateSamples(300, 1, 0, 0.5);
- DisplayDirichlet.generateSamples(300, 0, 2, 0.1);
+ generateSamples(400, 1, 1, 3);
+ generateSamples(300, 1, 0, 0.5);
+ generateSamples(300, 0, 2, 0.1);
}
public static void generate2dSamples() {
- DisplayDirichlet.generate2dSamples(400, 1, 1, 3, 1);
- DisplayDirichlet.generate2dSamples(300, 1, 0, 0.5, 1);
- DisplayDirichlet.generate2dSamples(300, 0, 2, 0.1, 0.5);
+ generate2dSamples(400, 1, 1, 3, 1);
+ generate2dSamples(300, 1, 0, 0.5, 1);
+ generate2dSamples(300, 0, 2, 0.1, 0.5);
}
/**
@@ -221,10 +221,10 @@
*/
private static void generateSamples(int num, double mx, double my, double sd) {
double[] params = {mx, my, sd, sd};
- DisplayDirichlet.sampleParams.add(new DenseVector(params));
+ sampleParams.add(new DenseVector(params));
System.out.println("Generating " + num + " samples m=[" + mx + ", " + my + "] sd=" + sd);
for (int i = 0; i < num; i++) {
- DisplayDirichlet.sampleData.add(new VectorWritable(new DenseVector(new double[] {
+ sampleData.add(new VectorWritable(new DenseVector(new double[] {
UncommonDistributions
.rNorm(mx, sd),
UncommonDistributions
@@ -248,11 +248,11 @@
*/
private static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
double[] params = {mx, my, sdx, sdy};
- DisplayDirichlet.sampleParams.add(new DenseVector(params));
+ sampleParams.add(new DenseVector(params));
System.out.println("Generating " + num + " samples m=[" + mx + ", " + my + "] sd=[" + sdx + ", " + sdy
+ ']');
for (int i = 0; i < num; i++) {
- DisplayDirichlet.sampleData
+ sampleData
.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sdx),
UncommonDistributions.rNorm(my, sdy)})));
}
@@ -260,13 +260,13 @@
public static void generateResults(ModelDistribution<VectorWritable> modelDist) {
DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(
- DisplayDirichlet.sampleData, modelDist, 1.0, 10, 2, 2);
- DisplayDirichlet.result = dc.cluster(20);
- DisplayDirichlet.printModels(DisplayDirichlet.result, 5);
+ sampleData, modelDist, 1.0, 10, 2, 2);
+ result = dc.cluster(20);
+ printModels(result, 5);
}
public static boolean isSignificant(Model<VectorWritable> model) {
- return (double) model.count() / DisplayDirichlet.sampleData.size() > DisplayDirichlet.significance;
+ return (double) model.count() / sampleData.size() > significance;
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Mon Feb 15 18:17:54 2010
@@ -41,7 +41,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class InputDriver {
+public final class InputDriver {
/**Logger for this class.*/
private static final Logger LOG = LoggerFactory.getLogger(InputDriver.class);
@@ -75,7 +75,8 @@
String input = cmdLine.getValue(inputOpt, "testdata").toString();
String output = cmdLine.getValue(outputOpt, "output").toString();
- String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector").toString();
+ String vectorClassName = cmdLine.getValue(vectorOpt,
+ "org.apache.mahout.math.RandomAccessSparseVector").toString();
InputDriver.runJob(input, output, vectorClassName);
} catch (OptionException e) {
InputDriver.LOG.error("Exception parsing command line: ", e);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Mon Feb 15 18:17:54 2010
@@ -37,7 +37,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class Job {
+public final class Job {
/** Logger for this class. */
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
@@ -60,7 +60,8 @@
"The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
// Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
// abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
- // withDescription("The Vector implementation class name. Default is RandomAccessSparseVector.class").withShortName("v").create();
+ // withDescription("The Vector implementation class name. Default is RandomAccessSparseVector.class")
+ // .withShortName("v").create();
Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
@@ -123,7 +124,8 @@
* @param t2
* the canopy T2 threshold
*/
- private static void runJob(String input, String output, String measureClassName, double t1, double t2) throws IOException {
+ private static void runJob(String input, String output, String measureClassName,
+ double t1, double t2) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(Job.class);
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Mon Feb 15 18:17:54 2010
@@ -108,7 +108,7 @@
.runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers,
vectorClassName);
} catch (OptionException e) {
- Job.log.error("Exception parsing command line: ", e);
+ log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
@@ -236,6 +236,6 @@
result.append('\n');
}
result.append('\n');
- Job.log.info(result.toString());
+ log.info(result.toString());
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Mon Feb 15 18:17:54 2010
@@ -45,7 +45,7 @@
private static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
- private Job() { }
+ private Job() {}
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -142,8 +142,8 @@
MeanShiftCanopyJob.runJob(directoryContainingConvertedInput, output + "/meanshift", measureClassName, t1,
t2, convergenceDelta, maxIterations);
FileStatus[] status = dfs.listStatus(new Path(output + "/meanshift"));
- OutputDriver.runJob(status[status.length - 1].getPath().toString(),
- output + Job.CLUSTERED_POINTS_OUTPUT_DIRECTORY);
+ OutputDriver.runJob(status[status.length - 1].getPath().toString(), output
+ + CLUSTERED_POINTS_OUTPUT_DIRECTORY);
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputMapper.java Mon Feb 15 18:17:54 2010
@@ -33,10 +33,11 @@
private static final Logger log = LoggerFactory.getLogger(OutputMapper.class);
- private int clusters = 0;
+ private int clusters;
@Override
- public void map(Text key, MeanShiftCanopy canopy, OutputCollector<Text,Text> output, Reporter reporter) throws IOException {
+ public void map(Text key, MeanShiftCanopy canopy, OutputCollector<Text,Text> output,
+ Reporter reporter) throws IOException {
clusters++;
for (Vector point : canopy.getBoundPoints()) {
output.collect(key, new Text(point.asFormatString()));
@@ -45,7 +46,7 @@
@Override
public void close() throws IOException {
- OutputMapper.log.info("+++ Clusters={}", clusters);
+ log.info("+++ Clusters={}", clusters);
super.close();
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java Mon Feb 15 18:17:54 2010
@@ -95,7 +95,7 @@
int nblabels = data.getDataset().nblabels();
- BreimanExample.log.info("Splitting the data");
+ log.info("Splitting the data");
Data train = data.clone();
Data test = train.rsplit(rng, (int) (data.size() * 0.1));
@@ -112,7 +112,7 @@
treeBuilder.setM(m);
long time = System.currentTimeMillis();
- BreimanExample.log.info("Growing a forest with m={}", m);
+ log.info("Growing a forest with m={}", m);
DecisionForest forestM = forestBuilder.build(nbtrees, errorM);
sumTimeM += System.currentTimeMillis() - time;
numNodesM += forestM.nbNodes();
@@ -125,7 +125,7 @@
treeBuilder.setM(1);
time = System.currentTimeMillis();
- BreimanExample.log.info("Growing a forest with m=1");
+ log.info("Growing a forest with m=1");
DecisionForest forestOne = forestBuilder.build(nbtrees, errorOne);
sumTimeOne += System.currentTimeMillis() - time;
numNodesOne += forestOne.nbNodes();
@@ -209,7 +209,7 @@
dataPath = new Path(dataName);
datasetPath = new Path(datasetName);
} catch (OptionException e) {
- BreimanExample.log.error("Error while parsing options", e);
+ log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
return -1;
}
@@ -225,18 +225,18 @@
Random rng = RandomUtils.getRandom();
for (int iteration = 0; iteration < nbIterations; iteration++) {
- BreimanExample.log.info("Iteration {}", iteration);
+ log.info("Iteration {}", iteration);
runIteration(rng, data, m, nbTrees);
}
- BreimanExample.log.info("********************************************");
- BreimanExample.log.info("Selection error : {}", sumTestErr / nbIterations);
- BreimanExample.log.info("Single Input error : {}", sumOneErr / nbIterations);
- BreimanExample.log.info("One Tree error : {}", sumTreeErr / nbIterations);
- BreimanExample.log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
- BreimanExample.log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
- BreimanExample.log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
- BreimanExample.log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);
+ log.info("********************************************");
+ log.info("Selection error : {}", sumTestErr / nbIterations);
+ log.info("Single Input error : {}", sumOneErr / nbIterations);
+ log.info("One Tree error : {}", sumTreeErr / nbIterations);
+ log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
+ log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
+ log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
+ log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);
return 0;
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java Mon Feb 15 18:17:54 2010
@@ -131,19 +131,19 @@
seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
}
- BuildForest.log.debug("data : {}", dataName);
- BuildForest.log.debug("dataset : {}", datasetName);
- BuildForest.log.debug("m : {}", m);
- BuildForest.log.debug("seed : {}", seed);
- BuildForest.log.debug("nbtrees : {}", nbTrees);
- BuildForest.log.debug("isPartial : {}", isPartial);
- BuildForest.log.debug("isOob : {}", isOob);
+ log.debug("data : {}", dataName);
+ log.debug("dataset : {}", datasetName);
+ log.debug("m : {}", m);
+ log.debug("seed : {}", seed);
+ log.debug("nbtrees : {}", nbTrees);
+ log.debug("isPartial : {}", isPartial);
+ log.debug("isOob : {}", isOob);
dataPath = new Path(dataName);
datasetPath = new Path(datasetName);
} catch (OptionException e) {
- BuildForest.log.error("Error while parsing options", e);
+ log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
return -1;
}
@@ -165,20 +165,20 @@
Builder forestBuilder;
if (isPartial) {
- BuildForest.log.info("Partial Mapred implementation");
+ log.info("Partial Mapred implementation");
forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
} else {
- BuildForest.log.info("InMem Mapred implementation");
+ log.info("InMem Mapred implementation");
forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
}
- BuildForest.log.info("Building the forest...");
+ log.info("Building the forest...");
long time = System.currentTimeMillis();
DecisionForest forest = forestBuilder.build(nbTrees, callback);
time = System.currentTimeMillis() - time;
- BuildForest.log.info("Build Time: {}", DFUtils.elapsedTime(time));
+ log.info("Build Time: {}", DFUtils.elapsedTime(time));
if (isOob) {
Random rng;
@@ -191,7 +191,7 @@
FileSystem fs = dataPath.getFileSystem(getConf());
int[] labels = Data.extractLabels(dataset, fs, dataPath);
- BuildForest.log.info("oob error estimate : "
+ log.info("oob error estimate : "
+ ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
}
@@ -199,10 +199,10 @@
}
protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
- BuildForest.log.info("Loading the data...");
+ log.info("Loading the data...");
FileSystem fs = dataPath.getFileSystem(conf);
Data data = DataLoader.loadData(dataset, fs, dataPath);
- BuildForest.log.info("Data Loaded");
+ log.info("Data Loaded");
return data;
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java Mon Feb 15 18:17:54 2010
@@ -131,13 +131,13 @@
seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
}
- BuildForest.log.debug("data : {}", dataName);
- BuildForest.log.debug("dataset : {}", datasetName);
- BuildForest.log.debug("m : {}", m);
- BuildForest.log.debug("seed : {}", seed);
- BuildForest.log.debug("nbtrees : {}", nbTrees);
- BuildForest.log.debug("isPartial : {}", isPartial);
- BuildForest.log.debug("isOob : {}", isOob);
+ log.debug("data : {}", dataName);
+ log.debug("dataset : {}", datasetName);
+ log.debug("m : {}", m);
+ log.debug("seed : {}", seed);
+ log.debug("nbtrees : {}", nbTrees);
+ log.debug("isPartial : {}", isPartial);
+ log.debug("isOob : {}", isOob);
dataPath = new Path(dataName);
datasetPath = new Path(datasetName);
@@ -165,19 +165,19 @@
Builder forestBuilder;
if (isPartial) {
- BuildForest.log.info("Partial Mapred implementation");
+ log.info("Partial Mapred implementation");
forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
} else {
- BuildForest.log.info("InMem Mapred implementation");
+ log.info("InMem Mapred implementation");
forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
}
- BuildForest.log.info("Building the forest...");
+ log.info("Building the forest...");
long time = System.currentTimeMillis();
DecisionForest forest = forestBuilder.build(nbTrees, callback);
time = System.currentTimeMillis() - time;
- BuildForest.log.info("Build Time: {}", DFUtils.elapsedTime(time));
+ log.info("Build Time: {}", DFUtils.elapsedTime(time));
if (isOob) {
Random rng;
@@ -190,7 +190,7 @@
FileSystem fs = dataPath.getFileSystem(getConf());
int[] labels = Data.extractLabels(dataset, fs, dataPath);
- BuildForest.log.info("oob error estimate : "
+ log.info("oob error estimate : "
+ ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
}
@@ -198,10 +198,10 @@
}
protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
- BuildForest.log.info("Loading the data...");
+ log.info("Loading the data...");
FileSystem fs = dataPath.getFileSystem(conf);
Data data = DataLoader.loadData(dataset, fs, dataPath);
- BuildForest.log.info("Data Loaded");
+ log.info("Data Loaded");
return data;
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java Mon Feb 15 18:17:54 2010
@@ -48,7 +48,7 @@
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = splitter.split(value.toString());
if (fields.length != 4) {
- KeyBasedStringTupleMapper.log.info("{} {}", fields.length, value.toString());
+ log.info("{} {}", fields.length, value.toString());
context.getCounter("Map", "ERROR").increment(1);
return;
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDGA.java Mon Feb 15 18:17:54 2010
@@ -154,7 +154,7 @@
CDGA.printElapsedTime(end - start);
} catch (OptionException e) {
- CDGA.log.error("Error while parsing options", e);
+ log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
}
}
@@ -194,7 +194,7 @@
engine.addEvolutionObserver(new EvolutionObserver<CDRule>() {
@Override
public void populationUpdate(PopulationData<? extends CDRule> data) {
- CDGA.log.info("Generation {}", data.getGenerationNumber());
+ log.info("Generation {}", data.getGenerationNumber());
}
});
@@ -209,8 +209,8 @@
CDFitness bestTestFit = CDMahoutEvaluator.evaluate(solution, target, inpath, split);
// evaluate the solution over the testing set
- CDGA.log.info("Best solution fitness (train set) : {}", bestTrainFit);
- CDGA.log.info("Best solution fitness (test set) : {}", bestTestFit);
+ log.info("Best solution fitness (train set) : {}", bestTrainFit);
+ log.info("Best solution fitness (test set) : {}", bestTestFit);
}
private static void printElapsedTime(long milli) {
@@ -223,7 +223,7 @@
long hours = minutes / 60;
minutes %= 60;
- CDGA.log.info("Elapsed time (Hours:minutes:seconds:milli) : {}:{}:{}:{}", new Object[] {hours, minutes,
+ log.info("Elapsed time (Hours:minutes:seconds:milli) : {}:{}:{}:{}", new Object[] {hours, minutes,
seconds, milli});
}
}