You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC
svn commit: r1352052 [5/7] - in /mahout/trunk: ./ buildtools/
buildtools/src/main/resources/ core/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/cf/t...
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Wed Jun 20 12:07:50 2012
@@ -19,7 +19,6 @@ package org.apache.mahout.common;
import com.google.common.collect.Lists;
-import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.List;
@@ -68,8 +67,7 @@ public final class DummyRecordWriter<K,
public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper,
Configuration configuration,
- RecordWriter<K2, V2> output)
- throws IOException, InterruptedException {
+ RecordWriter<K2, V2> output) {
// Use reflection since the context types changed incompatibly between 0.20
// and 0.23.
@@ -88,8 +86,7 @@ public final class DummyRecordWriter<K,
Configuration configuration,
RecordWriter<K2, V2> output,
Class<K1> keyClass,
- Class<V1> valueClass)
- throws IOException, InterruptedException {
+ Class<V1> valueClass) {
// Use reflection since the context types changed incompatibly between 0.20
// and 0.23.
@@ -113,7 +110,7 @@ public final class DummyRecordWriter<K,
new TaskAttemptID(), null, output, null, new DummyStatusReporter(), null);
Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
- Object wrappedMapper = wrappedMapperClass.newInstance();
+ Object wrappedMapper = wrappedMapperClass.getConstructor().newInstance();
Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
return (Mapper.Context) getMapContext.invoke(wrappedMapper, mapContextImpl);
}
@@ -147,7 +144,7 @@ public final class DummyRecordWriter<K,
valueClass);
Class<?> wrappedReducerClass = Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
- Object wrappedReducer = wrappedReducerClass.newInstance();
+ Object wrappedReducer = wrappedReducerClass.getConstructor().newInstance();
Method getReducerContext = wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
return (Reducer.Context) getReducerContext.invoke(wrappedReducer, reduceContextImpl);
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java Wed Jun 20 12:07:50 2012
@@ -32,7 +32,7 @@ public final class DummyStatusReporter e
private final Map<Enum<?>, Counter> counters = Maps.newHashMap();
private final Map<String, Counter> counterGroups = Maps.newHashMap();
- private Counter newCounter() {
+ private static Counter newCounter() {
try {
// 0.23 case
String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
@@ -69,7 +69,7 @@ public final class DummyStatusReporter e
}
public float getProgress() {
- return 0;
+ return 0.0f;
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java Wed Jun 20 12:07:50 2012
@@ -65,6 +65,6 @@ public final class StringUtilsTest exten
@Test
public void testEscape() throws Exception {
String res = StringUtils.escapeXML("\",\',&,>,<");
- assertTrue(res.equals("_,_,_,_,_"));
+ assertEquals("_,_,_,_,_", res);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
package org.apache.mahout.fpm.pfpgrowth;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -43,17 +44,18 @@ public final class FPGrowthRetailDataTes
private static final Logger log = LoggerFactory.getLogger(PFPGrowthRetailDataTestVs.class);
- private long bestResults(Map<Set<String>, Long> res, Set<String> feats) {
+ private static long bestResults(Map<Set<String>, Long> res, Set<String> feats) {
Long best = res.get(feats);
- if (best != null)
+ if (best != null) {
return best;
- else
- best = -1L;
+ }
+ best = -1L;
for (Map.Entry<Set<String>, Long> ent : res.entrySet()) {
Set<String> r = ent.getKey();
Long supp = ent.getValue();
- if (supp <= best)
+ if (supp <= best) {
continue;
+ }
boolean hasAll = true;
for (String f : feats) {
if (!r.contains(f)) {
@@ -61,16 +63,17 @@ public final class FPGrowthRetailDataTes
break;
}
}
- if (hasAll)
+ if (hasAll) {
best = supp;
+ }
}
return best;
}
private static class MapCollector implements OutputCollector<String,List<Pair<List<String>,Long>>> {
- private Map<Set<String>,Long> results;
+ private final Map<Set<String>,Long> results;
- public MapCollector(Map<Set<String>,Long> results) {
+ private MapCollector(Map<Set<String>, Long> results) {
this.results = results;
}
@@ -84,7 +87,7 @@ public final class FPGrowthRetailDataTes
}
}
- private class DummyUpdater implements StatusUpdater {
+ private static class DummyUpdater implements StatusUpdater {
@Override
public void update(String status) { }
}
@@ -93,7 +96,7 @@ public final class FPGrowthRetailDataTes
public void testVsWithRetailData() throws IOException {
String inputFilename = "retail.dat";
int minSupport = 500;
- Set<String> returnableFeatures = new HashSet<String>();
+ Collection<String> returnableFeatures = new HashSet<String>();
org.apache.mahout.fpm.pfpgrowth.fpgrowth.
FPGrowth<String> fp1 = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();
@@ -118,8 +121,10 @@ public final class FPGrowthRetailDataTes
new HashSet<String>(),
new MapCollector(initialResults2), new DummyUpdater());
- Map<Set<String>, Long> results2 = new HashMap<Set<String>, Long>();
- if (!returnableFeatures.isEmpty()) {
+ Map<Set<String>, Long> results2;
+ if (returnableFeatures.isEmpty()) {
+ results2 = initialResults2;
+ } else {
Map<Set<String>, Long> tmpResult = new HashMap<Set<String>, Long>();
for (Map.Entry<Set<String>, Long> result2 : initialResults2.entrySet()) {
Set<String> r2feats = result2.getKey();
@@ -130,25 +135,23 @@ public final class FPGrowthRetailDataTes
break;
}
}
- if (hasSome)
+ if (hasSome) {
tmpResult.put(result2.getKey(), result2.getValue());
+ }
}
results2 = tmpResult;
- } else {
- results2 = initialResults2;
- }
+ }
- boolean allMatch = true;
- allMatch &= hasAll(results1, results2);
+ boolean allMatch = hasAll(results1, results2);
log.info("checked "+results1.size()+" itemsets iterating through #1");
allMatch &= hasAll(results2, results1);
log.info("checked "+results2.size()+" itemsets iterating through #2");
- assertEquals( "Had mismatches!", allMatch, true);
+ assertTrue("Had mismatches!", allMatch);
}
- public boolean hasAll(Map<Set<String>, Long> ref, Map<Set<String>, Long> other) {
+ public static boolean hasAll(Map<Set<String>, Long> ref, Map<Set<String>, Long> other) {
boolean hasAll = true;
for (Map.Entry<Set<String>, Long> refEnt : ref.entrySet()) {
Set<String> feats = refEnt.getKey();
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java Wed Jun 20 12:07:50 2012
@@ -18,6 +18,7 @@
package org.apache.mahout.fpm.pfpgrowth;
import java.io.IOException;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -44,10 +45,9 @@ public final class FPGrowthSyntheticData
FPGrowthObj<String> fp = new FPGrowthObj<String>();
String inputFilename = "FPGsynth.dat";
- int minSupport = 50;
- StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
- inputFilename).openStream()), "\\s+");
+ StringRecordIterator it =
+ new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+");
int patternCnt_10_13_1669 = 0;
int patternCnt_10_13 = 0;
while (it.hasNext()) {
@@ -60,11 +60,13 @@ public final class FPGrowthSyntheticData
}
}
}
-
- if (patternCnt_10_13_1669 < minSupport)
+
+ int minSupport = 50;
+ if (patternCnt_10_13_1669 < minSupport) {
throw new IllegalStateException("the test is broken or data is missing ("
- + patternCnt_10_13_1669+", "
- + patternCnt_10_13+")");
+ + patternCnt_10_13_1669 + ", "
+ + patternCnt_10_13 + ')');
+ }
final Map<Set<String>,Long> results = Maps.newHashMap();
@@ -77,8 +79,7 @@ public final class FPGrowthSyntheticData
returnableFeatures.add("13");
returnableFeatures.add("1669");
- fp.generateTopKFrequentPatterns(
- new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+ fp.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
.openStream()), "\\s+"), minSupport), minSupport, 100000,
@@ -106,17 +107,18 @@ public final class FPGrowthSyntheticData
}
- private long highestSupport(Map<Set<String>, Long> res, Set<String> feats) {
+ private static long highestSupport(Map<Set<String>, Long> res, Set<String> feats) {
Long best= res.get(feats);
- if (best != null)
+ if (best != null) {
return best;
- else
- best= -1L;
- for (Map.Entry<Set<String>, Long> ent : res.entrySet()) {
+ }
+ best = -1L;
+ for (Map.Entry<Set<String>, Long> ent : res.entrySet()) {
Set<String> r= ent.getKey();
Long supp= ent.getValue();
- if (supp <= best)
+ if (supp <= best) {
continue;
+ }
boolean hasAll= true;
for (String f : feats) {
if (!r.contains(f)) {
@@ -124,17 +126,16 @@ public final class FPGrowthSyntheticData
break;
}
}
- if (hasAll)
- best= supp;
+ if (hasAll) {
+ best = supp;
+ }
}
return best;
}
@Test
- public void testVsWithSynthData() throws IOException {
- String inputFilename= "FPGsynth.dat";
- int minSupport= 100;
- Set<String> returnableFeatures = new HashSet<String>();
+ public void testVsWithSynthData() throws IOException {
+ Collection<String> returnableFeatures = new HashSet<String>();
// not limiting features (or including too many) can cause
// the test to run a very long time
@@ -145,9 +146,10 @@ public final class FPGrowthSyntheticData
FPGrowth<String> fp1 = new FPGrowth<String>();
final Map<Set<String>,Long> results1 = Maps.newHashMap();
-
- fp1.generateTopKFrequentPatterns(
- new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+
+ String inputFilename = "FPGsynth.dat";
+ int minSupport = 100;
+ fp1.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
.openStream()), "\\s+"), minSupport), minSupport, 1000000,
@@ -172,8 +174,7 @@ public final class FPGrowthSyntheticData
FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
- fp2.generateTopKFrequentPatterns(
- new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
+ fp2.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),
fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
.openStream()), "\\s+"), minSupport), minSupport, 1000000,
@@ -196,27 +197,28 @@ public final class FPGrowthSyntheticData
public void update(String status) {}
});
- Map<Set<String>, Long> results2= new HashMap<Set<String>, Long>();
- if (!returnableFeatures.isEmpty()) {
- Map<Set<String>, Long> tmpResult= new HashMap<Set<String>, Long>();
+ Map<Set<String>, Long> results2;
+ if (returnableFeatures.isEmpty()) {
+ results2 = initialResults2;
+ } else {
+ Map<Set<String>, Long> tmpResult = new HashMap<Set<String>, Long>();
for (Map.Entry<Set<String>, Long> result2 : initialResults2.entrySet()) {
- Set<String> r2feats= result2.getKey();
- boolean hasSome= false;
+ Set<String> r2feats = result2.getKey();
+ boolean hasSome = false;
for (String rf : returnableFeatures) {
if (r2feats.contains(rf)) {
- hasSome= true;
+ hasSome = true;
break;
}
}
- if (hasSome)
+ if (hasSome) {
tmpResult.put(result2.getKey(), result2.getValue());
+ }
}
- results2= tmpResult;
- } else {
- results2= initialResults2;
+ results2 = tmpResult;
}
- boolean allMatch= true;
+ boolean allMatch = true;
int itemsetsChecked= 0;
for (Map.Entry<Set<String>, Long> result1 : results1.entrySet()) {
itemsetsChecked++;
@@ -243,7 +245,7 @@ public final class FPGrowthSyntheticData
}
System.out.println("checked "+itemsetsChecked+" itemsets iterating through #2");
- assertEquals("Had mismatches!", allMatch, true);
+ assertTrue("Had mismatches!", allMatch);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java Wed Jun 20 12:07:50 2012
@@ -124,7 +124,7 @@ public class PFPGrowthRetailDataTest ext
} else {
if (!expectedResults.get(key).equals(results.get(entry.getKey()))) {
System.out.println("invalid (1): " + key + ", expected: " + expectedResults.get(key) + ", got: "
- + + results.get(entry.getKey()));
+ + results.get(entry.getKey()));
} else {
System.out.println("matched (1): " + key + ", with: " + expectedResults.get(key));
}
@@ -165,8 +165,9 @@ public class PFPGrowthRetailDataTest ext
int numGroups = params.getInt(PFPGrowth.NUM_GROUPS,
PFPGrowth.NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
- if (fList.size() % numGroups != 0)
+ if (fList.size() % numGroups != 0) {
maxPerGroup++;
+ }
params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
PFPGrowth.startParallelFPGrowth(params, conf);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java Wed Jun 20 12:07:50 2012
@@ -165,8 +165,9 @@ public class PFPGrowthRetailDataTest2 ex
int numGroups = params.getInt(PFPGrowth.NUM_GROUPS,
PFPGrowth.NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
- if (fList.size() % numGroups != 0)
+ if (fList.size() % numGroups != 0) {
maxPerGroup++;
+ }
params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java Wed Jun 20 12:07:50 2012
@@ -136,7 +136,7 @@ public final class PFPGrowthRetailDataTe
} else {
if (!results2.get(key).equals(results1.get(entry.getKey()))) {
System.out.println("invalid (1): " + key + ", expected: " + results2.get(key) + ", got: "
- + + results1.get(entry.getKey()));
+ + results1.get(entry.getKey()));
} else {
System.out.println("matched (1): " + key + ", with: " + results2.get(key));
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java Wed Jun 20 12:07:50 2012
@@ -94,7 +94,7 @@ public class PFPGrowthSynthDataTest2 ext
@Test
public void testVsSequential() throws Exception {
- final Map<Set<String>,Long> parallelResult = Maps.newHashMap();
+ Map<Set<String>,Long> parallelResult = Maps.newHashMap();
PFPGrowth.runPFPGrowth(params);
List<Pair<String,TopKStringPatterns>> tmpParallel = PFPGrowth.readFrequentPattern(params);
@@ -107,8 +107,6 @@ public class PFPGrowthSynthDataTest2 ext
}
}
- //////
-
String inputFilename= "FPGsynth.dat";
int minSupport= 100;
@@ -143,11 +141,11 @@ public class PFPGrowthSynthDataTest2 ext
if (seqResult.get(key) == null) {
log.info("spurious (1): " + key+ " with " +entry.getValue());
} else {
- if (!seqResult.get(key).equals(parallelResult.get(entry.getKey()))) {
- log.info("invalid (1): " + key + ", expected: " + seqResult.get(key) + ", got: "
- + + parallelResult.get(entry.getKey()));
- } else {
+ if (seqResult.get(key).equals(parallelResult.get(entry.getKey()))) {
log.info("matched (1): " + key + ", with: " + seqResult.get(key));
+ } else {
+ log.info("invalid (1): " + key + ", expected: " + seqResult.get(key) + ", got: "
+ + parallelResult.get(entry.getKey()));
}
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java Wed Jun 20 12:07:50 2012
@@ -114,8 +114,9 @@ public final class PFPGrowthTest extends
int numGroups = params.getInt(PFPGrowth.NUM_GROUPS,
PFPGrowth.NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
- if (fList.size() % numGroups != 0)
+ if (fList.size() % numGroups != 0) {
maxPerGroup++;
+ }
params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java Wed Jun 20 12:07:50 2012
@@ -115,8 +115,9 @@ public final class PFPGrowthTest2 extend
int numGroups = params.getInt(PFPGrowth.NUM_GROUPS,
PFPGrowth.NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
- if (fList.size() % numGroups != 0)
+ if (fList.size() % numGroups != 0) {
maxPerGroup++;
+ }
params.set(PFPGrowth.MAX_PER_GROUP, Integer.toString(maxPerGroup));
log.info("Starting Parallel FPGrowth Test: {}", params.get(PFPGrowth.MAX_HEAPSIZE));
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java Wed Jun 20 12:07:50 2012
@@ -70,7 +70,7 @@ public final class TestDistributedLanczo
Configuration conf = new Configuration();
corpus.setConf(conf);
DistributedLanczosSolver solver = new DistributedLanczosSolver();
- Vector intitialVector = solver.getInitialVector(corpus);
+ Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus);
LanczosState state;
if (hdfsBackedState) {
HdfsBackedLanczosState hState = new HdfsBackedLanczosState(corpus,
@@ -96,7 +96,7 @@ public final class TestDistributedLanczo
corpus.setConf(conf);
DistributedLanczosSolver solver = new DistributedLanczosSolver();
int rank = 10;
- Vector intitialVector = solver.getInitialVector(corpus);
+ Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus);
HdfsBackedLanczosState state = new HdfsBackedLanczosState(corpus, rank,
intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter));
solver.solve(state, rank, symmetric);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java Wed Jun 20 12:07:50 2012
@@ -159,11 +159,9 @@ public final class TestDistributedLanczo
}
for (int newRow = 0; newRow < eigenVectors2.numRows(); newRow++) {
Vector newEigen = eigenVectors2.viewRow(newRow);
- if (newEigen != null) {
- if (oldEigen.dot(newEigen) > 0.9) {
- oldEigensFound.add(row);
- break;
- }
+ if (newEigen != null && oldEigen.dot(newEigen) > 0.9) {
+ oldEigensFound.add(row);
+ break;
}
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.IOUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseMatrix;
@@ -104,6 +105,7 @@ public class LocalSSVDPCADenseTest exten
closeables.remove(w);
Closeables.close(w, true);
+ // TODO fix test so that 1.0/m works as intended!
xi.assign(Functions.mult(1 / m));
FileSystem fs = FileSystem.get(conf);
@@ -158,9 +160,11 @@ public class LocalSSVDPCADenseTest exten
double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);
// subtract pseudo pca mean
- for (int i = 0; i < m; i++)
- for (int j = 0; j < n; j++)
+ for (int i = 0; i < m; i++) {
+ for (int j = 0; j < n; j++) {
a[i][j] -= xi.getQuick(j);
+ }
+ }
SingularValueDecomposition svd2 =
new SingularValueDecomposition(new DenseMatrix(a));
@@ -180,6 +184,7 @@ public class LocalSSVDPCADenseTest exten
false,
s_epsilon);
+ IOUtils.close(closeables);
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.IOUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseMatrix;
@@ -184,22 +185,7 @@ public class LocalSSVDSolverSparseSequen
false,
s_epsilon);
- /*
- * removing tests on U and V to keep this test leaner. I will keep U,V
- * computation and assertions in the dense tests though.
- */
-
- /*
- * double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new
- * Path(svdOutPath, "U/[^_]*"), conf);
- *
- * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(u), false,
- * s_epsilon); double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new
- * Path(svdOutPath, "V/[^_]*"), conf);
- *
- * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(v), false,
- * s_epsilon);
- */
+ IOUtils.close(closeables);
}
static void dumpSv(Vector s) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java Wed Jun 20 12:07:50 2012
@@ -148,12 +148,12 @@ public final class SequentialOutOfCoreSv
assertEquals(u1, u2);
}
- private Matrix lowRankMatrixInMemory(int rows, int columns) throws IOException {
+ private static Matrix lowRankMatrixInMemory(int rows, int columns) throws IOException {
return lowRankMatrix(null, null, 0, rows, columns);
}
- private void assertEquals(Matrix u1, Matrix u2) {
- assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS), 1e-10);
+ private static void assertEquals(Matrix u1, Matrix u2) {
+ assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS), 1.0e-10);
}
@Test
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java Wed Jun 20 12:07:50 2012
@@ -194,7 +194,7 @@ public class SparseVectorsFromSequenceFi
Path tfidfVectors = new Path(outputPath, "tfidf-vectors");
DictionaryVectorizerTest.validateVectors(conf, numDocs, tfVectors, sequential, named);
- if (tfWeighting == false) {
+ if (!tfWeighting) {
DictionaryVectorizerTest.validateVectors(conf, numDocs, tfidfVectors, sequential, named);
}
return outputPath;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java Wed Jun 20 12:07:50 2012
@@ -38,8 +38,8 @@ public final class GramKeyGroupComparato
assertEquals(0, cmp.compare(a, b));
assertEquals(0, cmp.compare(a, c));
- assertTrue(0 > cmp.compare(a, d));
- assertTrue(0 < cmp.compare(a, e));
- assertTrue(0 < cmp.compare(d, e));
+ assertTrue(cmp.compare(a, d) < 0);
+ assertTrue(cmp.compare(a, e) > 0);
+ assertTrue(cmp.compare(d, e) > 0);
}
}
Modified: mahout/trunk/distribution/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/distribution/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/distribution/pom.xml (original)
+++ mahout/trunk/distribution/pom.xml Wed Jun 20 12:07:50 2012
@@ -38,6 +38,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
+ <version>2.3</version>
<executions>
<execution>
<id>bin-assembly</id>
Modified: mahout/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/pom.xml (original)
+++ mahout/trunk/examples/pom.xml Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
+ <version>2.4</version>
<configuration>
<encoding>UTF-8</encoding>
<source>1.6</source>
@@ -50,6 +51,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
+ <version>2.4</version>
<executions>
<execution>
<id>copy-dependencies</id>
@@ -68,6 +70,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
+ <version>2.3</version>
<executions>
<execution>
<id>job</id>
@@ -89,6 +92,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
+ <version>1.3</version>
<configuration>
<appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
<resourceBundles>
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java Wed Jun 20 12:07:50 2012
@@ -74,8 +74,7 @@ public class BuildForest extends Configu
private boolean isPartial; // use partial data implementation
@Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException,
- InstantiationException, IllegalAccessException {
+ public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java Wed Jun 20 12:07:50 2012
@@ -19,6 +19,7 @@ package org.apache.mahout.classifier.df.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.Random;
import java.util.Scanner;
@@ -254,8 +255,12 @@ public class TestForest extends Configur
}
}
- private void testDirectory(Path outPath, DataConverter converter, DecisionForest forest,
- Dataset dataset, List<double[]> results, Random rng) throws IOException {
+ private void testDirectory(Path outPath,
+ DataConverter converter,
+ DecisionForest forest,
+ Dataset dataset,
+ Collection<double[]> results,
+ Random rng) throws IOException {
Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
for (Path path : infiles) {
@@ -265,8 +270,13 @@ public class TestForest extends Configur
}
}
- private void testFile(Path inPath, Path outPath, DataConverter converter, DecisionForest forest,
- Dataset dataset, List<double[]> results, Random rng) throws IOException {
+ private void testFile(Path inPath,
+ Path outPath,
+ DataConverter converter,
+ DecisionForest forest,
+ Dataset dataset,
+ Collection<double[]> results,
+ Random rng) throws IOException {
// create the predictions file
FSDataOutputStream ofile = null;
@@ -276,7 +286,7 @@ public class TestForest extends Configur
FSDataInputStream input = dataFS.open(inPath);
try {
- Scanner scanner = new Scanner(input);
+ Scanner scanner = new Scanner(input, "UTF-8");
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Wed Jun 20 12:07:50 2012
@@ -154,12 +154,9 @@ public final class PosTagger {
// determine the IDs
Integer wordID = wordIDs.get(tags[0]);
Integer tagID = tagIDs.get(tags[1]);
- // handle unknown values
- wordID = wordID == null ? 0 : wordID;
- tagID = tagID == null ? 0 : tagID;
// now construct the current sequence
- observedSequence.add(wordID);
- hiddenSequence.add(tagID);
+ observedSequence.add(wordID == null ? 0 : wordID);
+ hiddenSequence.add(tagID == null ? 0 : tagID);
}
// if there is still something in the pipe, register it
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java Wed Jun 20 12:07:50 2012
@@ -62,11 +62,9 @@ public class AdaptiveLogisticModelParame
public void checkParameters() {
if (prior != null) {
- if ("TP".equals(prior.toUpperCase(Locale.ENGLISH).trim()) ||
- "EBP".equals(prior.toUpperCase(Locale.ENGLISH).trim())) {
- if (Double.isNaN(priorOption)) {
- throw new IllegalArgumentException("You must specify a double value for TPrior and ElasticBandPrior.");
- }
+ String priorUppercase = prior.toUpperCase(Locale.ENGLISH).trim();
+ if (("TP".equals(priorUppercase) || "EBP".equals(priorUppercase)) && Double.isNaN(priorOption)) {
+ throw new IllegalArgumentException("You must specify a double value for TPrior and ElasticBandPrior.");
}
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
package org.apache.mahout.classifier.sgd;
+import com.google.common.base.Charsets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -32,10 +33,10 @@ import org.apache.mahout.math.Vector;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.HashMap;
-import java.util.Locale;
import java.util.Map;
public final class RunAdaptiveLogistic {
@@ -50,7 +51,7 @@ public final class RunAdaptiveLogistic {
}
public static void main(String[] args) throws Exception {
- mainToOutput(args, new PrintWriter(System.out, true));
+ mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -67,14 +68,13 @@ public final class RunAdaptiveLogistic {
State<Wrapper, CrossFoldLearner> best = lr.getBest();
if (best == null) {
- output.printf("%s\n",
- "AdaptiveLogisticRegression has not be trained probably.");
+ output.println("AdaptiveLogisticRegression has not be trained probably.");
return;
}
CrossFoldLearner learner = best.getPayload().getLearner();
BufferedReader in = TrainAdaptiveLogistic.open(inputFile);
- BufferedWriter out = new BufferedWriter(new FileWriter(outputFile));
+ BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), Charsets.UTF_8));
out.write(idColumn + ",target,score");
out.newLine();
@@ -104,13 +104,13 @@ public final class RunAdaptiveLogistic {
}
k++;
if (k % 100 == 0) {
- output.printf(Locale.ENGLISH, "%d records processed \n", k);
+ output.println(k + " records processed");
}
line = in.readLine();
}
out.flush();
out.close();
- output.printf(Locale.ENGLISH, "%d records processed totally.\n", k);
+ output.println(k + " records processed totally.");
}
private static boolean parseArgs(String[] args) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
package org.apache.mahout.classifier.sgd;
+import com.google.common.base.Charsets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -32,6 +33,7 @@ import org.apache.mahout.classifier.eval
import java.io.BufferedReader;
import java.io.File;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Locale;
@@ -47,7 +49,7 @@ public final class RunLogistic {
}
public static void main(String[] args) throws Exception {
- mainToOutput(args, new PrintWriter(System.out, true));
+ mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -67,7 +69,7 @@ public final class RunLogistic {
csv.firstLine(line);
line = in.readLine();
if (showScores) {
- output.printf(Locale.ENGLISH, "\"%s\",\"%s\",\"%s\"\n", "target", "model-output", "log-likelihood");
+ output.println("\"target\",\"model-output\",\"log-likelihood\"");
}
while (line != null) {
Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Wed Jun 20 12:07:50 2012
@@ -35,8 +35,10 @@ import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.util.List;
@@ -78,7 +80,8 @@ public final class SimpleCsvExamples {
long t0 = System.currentTimeMillis();
Vector v = new DenseVector(1000);
if ("--generate".equals(args[0])) {
- PrintWriter out = new PrintWriter(new File(args[2]));
+ PrintWriter out =
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8));
try {
int n = Integer.parseInt(args[1]);
for (int i = 0; i < n; i++) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
package org.apache.mahout.classifier.sgd;
+import com.google.common.base.Charsets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -41,6 +42,7 @@ import org.apache.mahout.vectorizer.enco
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
/**
@@ -57,7 +59,7 @@ public final class TestASFEmail {
public static void main(String[] args) throws IOException {
TestASFEmail runner = new TestASFEmail();
if (runner.parseArgs(args)) {
- runner.run(new PrintWriter(System.out, true));
+ runner.run(new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
}
@@ -70,7 +72,6 @@ public final class TestASFEmail {
Dictionary asfDictionary = new Dictionary();
- //<String> overallCounts = HashMultiset.create();
Configuration conf = new Configuration();
PathFilter testFilter = new PathFilter() {
@Override
@@ -88,7 +89,7 @@ public final class TestASFEmail {
numItems++;
}
- System.out.printf("%d test files\n", numItems);
+ System.out.println(numItems + " test files");
ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
null, true, conf);
@@ -105,7 +106,7 @@ public final class TestASFEmail {
ra.addInstance(asfDictionary.values().get(actual), cr);
}
- output.printf("%s\n\n", ra.toString());
+ output.println(ra);
}
boolean parseArgs(String[] args) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java Wed Jun 20 12:07:50 2012
@@ -20,10 +20,12 @@ package org.apache.mahout.classifier.sgd
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
+import com.google.common.base.Charsets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -56,7 +58,7 @@ public final class TestNewsGroups {
public static void main(String[] args) throws IOException {
TestNewsGroups runner = new TestNewsGroups();
if (runner.parseArgs(args)) {
- runner.run(new PrintWriter(System.out, true));
+ runner.run(new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
}
@@ -64,8 +66,8 @@ public final class TestNewsGroups {
File base = new File(inputFile);
//contains the best model
- OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class);
-
+ OnlineLogisticRegression classifier =
+ ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class);
Dictionary newsGroups = new Dictionary();
Multiset<String> overallCounts = HashMultiset.create();
@@ -77,7 +79,7 @@ public final class TestNewsGroups {
files.addAll(Arrays.asList(newsgroup.listFiles()));
}
}
- System.out.printf("%d test files\n", files.size());
+ System.out.println(files.size() + " test files");
ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
for (File file : files) {
String ng = file.getParentFile().getName();
@@ -93,7 +95,7 @@ public final class TestNewsGroups {
ra.addInstance(newsGroups.values().get(actual), cr);
}
- output.printf("%s\n\n", ra.toString());
+ output.println(ra);
}
boolean parseArgs(String[] args) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java Wed Jun 20 12:07:50 2012
@@ -35,26 +35,23 @@ import org.apache.mahout.math.VectorWrit
import org.apache.mahout.vectorizer.encoders.Dictionary;
import java.io.File;
-import java.io.IOException;
import java.util.Collections;
import java.util.List;
public final class TrainASFEmail extends AbstractJob {
- //private static final String[] LEAK_LABELS = {"none", "month-year", "day-month-year"};
-
private TrainASFEmail() {
}
@Override
public int run(String[] args) throws Exception {
- int result = 0;
addInputOption();
addOutputOption();
addOption("categories", "nc", "The number of categories to train on", true);
addOption("cardinality", "c", "The size of the vectors to use", "100000");
addOption("threads", "t", "The number of threads to use in the learner", "20");
- addOption("poolSize", "p", "The number of CrossFoldLearners to use in the AdaptiveLogisticRegression. Higher values require more memory.", "5");
+ addOption("poolSize", "p", "The number of CrossFoldLearners to use in the AdaptiveLogisticRegression. "
+ + "Higher values require more memory.", "5");
if (parseArguments(args) == null) {
return -1;
}
@@ -69,7 +66,8 @@ public final class TrainASFEmail extends
int threadCount = Integer.parseInt(getOption("threads", "20"));
int poolSize = Integer.parseInt(getOption("poolSize", "5"));
Dictionary asfDictionary = new Dictionary();
- AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(numCats, cardinality, new L1(), threadCount, poolSize);
+ AdaptiveLogisticRegression learningAlgorithm =
+ new AdaptiveLogisticRegression(numCats, cardinality, new L1(), threadCount, poolSize);
learningAlgorithm.setInterval(800);
learningAlgorithm.setAveragingWindow(500);
@@ -81,8 +79,13 @@ public final class TrainASFEmail extends
return path.getName().contains("training");
}
};
- SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, trainFilter,
- null, true, conf);
+ SequenceFileDirIterator<Text, VectorWritable> iter =
+ new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()),
+ PathType.LIST,
+ trainFilter,
+ null,
+ true,
+ conf);
long numItems = 0;
while (iter.hasNext()) {
Pair<Text, VectorWritable> next = iter.next();
@@ -90,7 +93,7 @@ public final class TrainASFEmail extends
numItems++;
}
- System.out.printf("%d training files\n", numItems);
+ System.out.println(numItems + " training files");
SGDInfo info = new SGDInfo();
@@ -118,20 +121,20 @@ public final class TrainASFEmail extends
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
List<Integer> counts = Lists.newArrayList();
- System.out.printf("Word counts\n");
+ System.out.println("Word counts");
for (String count : overallCounts.elementSet()) {
counts.add(overallCounts.count(count));
}
Collections.sort(counts, Ordering.natural().reverse());
k = 0;
for (Integer count : counts) {
- System.out.printf("%d\t%d\n", k, count);
+ System.out.println(k + "\t" + count);
k++;
if (k > 1000) {
break;
}
}
- return result;
+ return 0;
}
public static void main(String[] args) throws Exception {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
import java.util.Locale;
@@ -60,7 +61,7 @@ public final class TrainAdaptiveLogistic
}
public static void main(String[] args) throws Exception {
- mainToOutput(args, new PrintWriter(System.out, true));
+ mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -117,8 +118,7 @@ public final class TrainAdaptiveLogistic
learner = best.getPayload().getLearner();
}
if (learner == null) {
- output.printf(Locale.ENGLISH,
- "%s\n", "AdaptiveLogisticRegression has failed to train a model.");
+ output.println("AdaptiveLogisticRegression has failed to train a model.");
return;
}
@@ -131,8 +131,8 @@ public final class TrainAdaptiveLogistic
}
OnlineLogisticRegression lr = learner.getModels().get(0);
- output.printf(Locale.ENGLISH, "%d\n", lmp.getNumFeatures());
- output.printf(Locale.ENGLISH, "%s ~ ", lmp.getTargetVariable());
+ output.println(lmp.getNumFeatures());
+ output.println(lmp.getTargetVariable() + " ~ ");
String sep = "";
for (String v : csv.getTraceDictionary().keySet()) {
double weight = predictorWeight(lr, 0, csv, v);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
import java.util.Locale;
@@ -61,7 +62,7 @@ public final class TrainLogistic {
}
public static void main(String[] args) throws Exception {
- mainToOutput(args, new PrintWriter(System.out, true));
+ mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
static void mainToOutput(String[] args, PrintWriter output) throws Exception {
@@ -78,7 +79,6 @@ public final class TrainLogistic {
csv.firstLine(in.readLine());
String line = in.readLine();
- int lineCount = 0;
while (line != null) {
// for each new line, get target and predictors
Vector input = new RandomAccessSparseVector(lmp.getNumFeatures());
@@ -104,7 +104,6 @@ public final class TrainLogistic {
lr.train(targetValue, input);
line = in.readLine();
- lineCount++;
}
} finally {
Closeables.closeQuietly(in);
@@ -118,8 +117,8 @@ public final class TrainLogistic {
Closeables.closeQuietly(modelOutput);
}
- output.printf(Locale.ENGLISH, "%d\n", lmp.getNumFeatures());
- output.printf(Locale.ENGLISH, "%s ~ ", lmp.getTargetVariable());
+ output.println(lmp.getNumFeatures());
+ output.println(lmp.getTargetVariable() + " ~ ");
String sep = "";
for (String v : csv.getTraceDictionary().keySet()) {
double weight = predictorWeight(lr, 0, csv, v);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Wed Jun 20 12:07:50 2012
@@ -97,7 +97,8 @@ public final class TrainNewsGroups {
NewsgroupHelper helper = new NewsgroupHelper();
helper.getEncoder().setProbes(2);
- AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
+ AdaptiveLogisticRegression learningAlgorithm =
+ new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
learningAlgorithm.setInterval(800);
learningAlgorithm.setAveragingWindow(500);
@@ -109,7 +110,7 @@ public final class TrainNewsGroups {
}
}
Collections.shuffle(files);
- System.out.printf("%d training files\n", files.size());
+ System.out.println(files.size() + " training files");
SGDInfo info = new SGDInfo();
int k = 0;
@@ -135,14 +136,14 @@ public final class TrainNewsGroups {
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
List<Integer> counts = Lists.newArrayList();
- System.out.printf("Word counts\n");
+ System.out.println("Word counts");
for (String count : overallCounts.elementSet()) {
counts.add(overallCounts.count(count));
}
Collections.sort(counts, Ordering.natural().reverse());
k = 0;
for (Integer count : counts) {
- System.out.printf("%d\t%d\n", k, count);
+ System.out.println(k + "\t" + count);
k++;
if (k > 1000) {
break;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java Wed Jun 20 12:07:50 2012
@@ -20,9 +20,11 @@ package org.apache.mahout.classifier.sgd
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Locale;
+import com.google.common.base.Charsets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -58,7 +60,7 @@ public final class ValidateAdaptiveLogis
}
public static void main(String[] args) throws IOException {
- mainToOutput(args, new PrintWriter(System.out, true));
+ mainToOutput(args, new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true));
}
static void mainToOutput(String[] args, PrintWriter output) throws IOException {
@@ -83,8 +85,7 @@ public final class ValidateAdaptiveLogis
State<Wrapper, CrossFoldLearner> best = lr.getBest();
if (best == null) {
- output.printf("%s\n",
- "AdaptiveLogisticRegression has not be trained probably.");
+ output.println("AdaptiveLogisticRegression has not be trained probably.");
return;
}
CrossFoldLearner learner = best.getPayload().getLearner();
@@ -94,8 +95,7 @@ public final class ValidateAdaptiveLogis
csv.firstLine(line);
line = in.readLine();
if (showScores) {
- output.printf(Locale.ENGLISH, "\"%s\", \"%s\", \"%s\", \"%s\"\n",
- "target", "model-output", "log-likelihood", "average-likelihood");
+ output.println("\"target\", \"model-output\", \"log-likelihood\", \"average-likelihood\"");
}
while (line != null) {
Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Wed Jun 20 12:07:50 2012
@@ -91,7 +91,7 @@ public class DisplayDirichlet extends Di
Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
prior.writeToSeqFiles(priorPath);
Configuration conf = new Configuration();
- new ClusterIterator().iterateSeq(conf, input, priorPath, output, numIterations);
+ ClusterIterator.iterateSeq(conf, input, priorPath, output, numIterations);
}
private static void runSequentialDirichletClusterer(Path input, Path output,
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Wed Jun 20 12:07:50 2012
@@ -93,7 +93,7 @@ public class DisplayFuzzyKMeans extends
Path priorPath = new Path(output, "classifier-0");
prior.writeToSeqFiles(priorPath);
- new ClusterIterator().iterateSeq(conf, samples, priorPath, output, maxIterations);
+ ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIterations);
loadClustersWritable(output);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Wed Jun 20 12:07:50 2012
@@ -85,7 +85,7 @@ public class DisplayKMeans extends Displ
prior.writeToSeqFiles(priorPath);
int maxIter = 10;
- new ClusterIterator().iterateSeq(conf, samples, priorPath, output, maxIter);
+ ClusterIterator.iterateSeq(conf, samples, priorPath, output, maxIter);
loadClustersWritable(output);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java Wed Jun 20 12:07:50 2012
@@ -92,17 +92,16 @@ public class DisplayMinHash extends Disp
LINES, POINTS, SYMBOLS
}
- private static final long serialVersionUID = 1L;
- private transient static Logger log = LoggerFactory
- .getLogger(DisplayMinHash.class);
+ private static final Logger log = LoggerFactory.getLogger(DisplayMinHash.class);
- private static Map<String, List<Vector>> clusters = new HashMap<String, List<Vector>>();
+ private static final int SYMBOLS_FONT_SIZE = 6;
+
+ private static final Map<String, List<Vector>> clusters = new HashMap<String, List<Vector>>();
private static Iterator<Entry<String, List<Vector>>> currentCluster;
private static List<Vector> currentClusterPoints;
private static int updatePeriodTime;
private static long lastUpdateTime = 0;
private static boolean isSlideShowOnHold = false;
- private static int symbolsFontSize = 6;
private PlotType plotType = PlotType.POINTS;
@@ -138,25 +137,23 @@ public class DisplayMinHash extends Disp
private static void plotClusters(Graphics2D g2, PlotType plotType) {
double sx = (double) res / DS;
g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
- Font f = new Font("Dialog", Font.PLAIN, symbolsFontSize);
+ Font f = new Font("Dialog", Font.PLAIN, SYMBOLS_FONT_SIZE);
g2.setFont(f);
switch (plotType) {
- case LINES:
- plotLines(g2);
- break;
- case SYMBOLS:
- plotSymbols(g2);
- break;
- case POINTS:
- plotPoints(g2);
- break;
- default:
- break;
+ case LINES:
+ plotLines(g2);
+ break;
+ case SYMBOLS:
+ plotSymbols(g2);
+ break;
+ case POINTS:
+ plotPoints(g2);
+ break;
}
}
private static void plotLines(Graphics2D g2) {
- Random rand = new Random();
+ Random rand = RandomUtils.getRandom();
for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
List<Vector> vecs = entry.getValue();
@@ -181,15 +178,15 @@ public class DisplayMinHash extends Disp
private static void plotSymbols(Graphics2D g2) {
char symbol = 0;
- Random rand = new Random();
+ Random rand = RandomUtils.getRandom();
for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
List<Vector> vecs = entry.getValue();
g2.setColor(new Color(rand.nextInt()));
symbol++;
- for (int i = 0; i < vecs.size(); i++) {
- plotSymbols(g2, vecs.get(i), symbol);
+ for (Vector vec : vecs) {
+ plotSymbols(g2, vec, symbol);
}
}
}
@@ -200,7 +197,7 @@ public class DisplayMinHash extends Disp
}
if (System.currentTimeMillis() - lastUpdateTime > updatePeriodTime) {
- plotSampleData((Graphics2D) g2);
+ plotSampleData(g2);
currentClusterPoints = currentCluster.next().getValue();
lastUpdateTime = System.currentTimeMillis();
}
@@ -208,8 +205,8 @@ public class DisplayMinHash extends Disp
g2.setColor(Color.RED);
Vector dv = new DenseVector(2).assign(0.03);
- for (int i = 0; i < currentClusterPoints.size(); i++) {
- plotRectangle(g2, currentClusterPoints.get(i), dv);
+ for (Vector currentClusterPoint : currentClusterPoints) {
+ plotRectangle(g2, currentClusterPoint, dv);
}
}
@@ -238,8 +235,7 @@ public class DisplayMinHash extends Disp
* The entry point to the program.
*
* @param args
- * The command-line arguments. See {@link DisplayMinHash} for
- * details.
+ * The command-line arguments.
*
* @throws Exception
* Thrown if an error occurs during the execution.
@@ -264,6 +260,7 @@ public class DisplayMinHash extends Disp
if (type == PlotType.POINTS) {
Timer timer = new Timer(updatePeriodTime, new ActionListener() {
+ @Override
public void actionPerformed(ActionEvent e) {
repaint(f);
}
@@ -293,30 +290,28 @@ public class DisplayMinHash extends Disp
private static PlotType determinePlotType(String[] args) {
PlotType type = PlotType.POINTS;
if (args.length != 0) {
- if (args[0].equals("-p")) {
+ if ("-p".equals(args[0])) {
type = PlotType.POINTS;
- } else if (args[0].equals("-l")) {
+ } else if ("-l".equals(args[0])) {
type = PlotType.LINES;
- } else if (args[0].equals("-s")) {
+ } else if ("-s".equals(args[0])) {
type = PlotType.SYMBOLS;
} else {
- System.out
- .println("Wrong parameter: -p (plot points); -l (plot lines); -s (plot symbols)");
+ System.out.println("Wrong parameter: -p (plot points); -l (plot lines); -s (plot symbols)");
}
}
return type;
}
private static int determineUpdatePeriodTime(String[] args) {
- int updatePeriodTimeInMinutes = 1;
if (args.length >= 2) {
try {
updatePeriodTime = Integer.parseInt(args[1]);
- } catch (Exception e) {
- System.out.println(args[1]
- + " isn't valid integer value. 1 second will be used.");
+ } catch (NumberFormatException nfe) {
+ System.out.println(args[1] + " isn't valid integer value. 1 second will be used.");
}
}
+ int updatePeriodTimeInMinutes = 1;
return updatePeriodTimeInMinutes * 1000;
}
@@ -334,15 +329,16 @@ public class DisplayMinHash extends Disp
private static void logClusters() {
int i = 0;
for (Map.Entry<String, List<Vector>> entry : clusters.entrySet()) {
- String logStr = "Cluster N:" + ++i + ": ";
+ StringBuilder logStr = new StringBuilder();
+ logStr.append("Cluster N:").append(++i).append(": ");
List<Vector> vecs = entry.getValue();
for (Vector vector : vecs) {
- logStr += vector.get(0);
- logStr += ",";
- logStr += vector.get(1);
- logStr += "; ";
+ logStr.append(vector.get(0));
+ logStr.append(',');
+ logStr.append(vector.get(1));
+ logStr.append("; ");
}
- log.info(logStr);
+ log.info(logStr.toString());
}
}
@@ -365,9 +361,7 @@ public class DisplayMinHash extends Disp
private static void runMinHash(Configuration conf, Path samples, Path output)
throws Exception {
- MinHashDriver mhd = new MinHashDriver();
-
- ToolRunner.run(conf, mhd, new String[] { "--input", samples.toString(),
+ ToolRunner.run(conf, new MinHashDriver(), new String[] { "--input", samples.toString(),
"--hashType", HashFactory.HashType.MURMUR3.toString(), "--output",
output.toString(), "--minVectorSize", "1", "--debugOutput"
Modified: mahout/trunk/integration/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/pom.xml (original)
+++ mahout/trunk/integration/pom.xml Wed Jun 20 12:07:50 2012
@@ -40,6 +40,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
+ <version>2.4</version>
<configuration>
<encoding>UTF-8</encoding>
<source>1.6</source>
@@ -50,6 +51,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
+ <version>2.4</version>
<executions>
<execution>
<id>copy-dependencies</id>
@@ -66,6 +68,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
+ <version>1.3</version>
<configuration>
<appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
<resourceBundles>
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java Wed Jun 20 12:07:50 2012
@@ -48,7 +48,9 @@ import com.google.common.collect.Lists;
* Intended to consume ConfusionMatrix SequenceFile output by Bayes TestClassifier class
*/
public final class ConfusionMatrixDumper extends AbstractJob {
-
+
+ private static final String TAB_SEPARATOR = "|";
+
// HTML wrapper - default CSS
private static final String HEADER = "<html>"
+ "<head>\n"
@@ -162,7 +164,6 @@ public final class ConfusionMatrixDumper
}
private static void exportText(Path inputPath, PrintStream out) throws IOException {
- String TAB_SEPARATOR = "|";
MatrixWritable mw = new MatrixWritable();
Text key = new Text();
readSeqFile(inputPath, key, mw);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Wed Jun 20 12:07:50 2012
@@ -101,7 +101,7 @@ public final class RepresentativePointsD
* @throws IOException
* if errors occur
*/
- public static void printRepresentativePoints(Path output, int numIterations) throws IOException {
+ public static void printRepresentativePoints(Path output, int numIterations) {
for (int i = 0; i <= numIterations; i++) {
Path out = new Path(output, "representativePoints-" + i);
System.out.println("Representative Points for iteration " + i);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Wed Jun 20 12:07:50 2012
@@ -105,7 +105,7 @@ public final class MailArchivesClusterin
// Regex used to exclude non-alpha-numeric tokens
private static final Pattern alphaNumeric = Pattern.compile("^[a-z][a-z0-9_]+$");
- private final static Matcher matcher = alphaNumeric.matcher("");
+ private static final Matcher matcher = alphaNumeric.matcher("");
public MailArchivesClusteringAnalyzer() {
super(LUCENE_VERSION, STOP_WORDS);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Wed Jun 20 12:07:50 2012
@@ -20,7 +20,6 @@ import com.google.common.io.Closeables;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
@@ -124,7 +123,7 @@ public final class SequenceFilesFromMail
public int run(String[] args) throws Exception {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
+ //GroupBuilder gbuilder = new GroupBuilder();
addInputOption();
addOutputOption();
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java Wed Jun 20 12:07:50 2012
@@ -47,7 +47,7 @@ import org.slf4j.LoggerFactory;
/**
* <p>The Bayes example package provides some helper classes for training the Naive Bayes classifier
- * on the Twenty Newsgroups data. See {@link org.apache.mahout.examples.wikipedia.PrepareTwentyNewsgroups}
+ * on the Twenty Newsgroups data. See {@code PrepareTwentyNewsgroups}
* for details on running the trainer and
* formatting the Twenty Newsgroups data properly for the training.</p>
*
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java Wed Jun 20 12:07:50 2012
@@ -98,17 +98,16 @@ public final class MatrixDumper extends
}
private static PrintStream getPrintStream(String outputPath) throws IOException {
- if (outputPath != null) {
- File outputFile = new File(outputPath);
- if (outputFile.exists()) {
- outputFile.delete();
- }
- outputFile.createNewFile();
- OutputStream os = new FileOutputStream(outputFile);
- return new PrintStream(os);
- } else {
+ if (outputPath == null) {
return System.out;
}
+ File outputFile = new File(outputPath);
+ if (outputFile.exists()) {
+ outputFile.delete();
+ }
+ outputFile.createNewFile();
+ OutputStream os = new FileOutputStream(outputFile);
+ return new PrintStream(os);
}
/**
@@ -128,10 +127,9 @@ public final class MatrixDumper extends
}
private static String[] sortLabels(Map<String,Integer> labels) {
- String[] sorted = new String[labels.keySet().size()];
- for (String label: labels.keySet()) {
- Integer index = labels.get(label);
- sorted[index] = label;
+ String[] sorted = new String[labels.size()];
+ for (Map.Entry<String,Integer> entry : labels.entrySet()) {
+ sorted[entry.getValue()] = entry.getKey();
}
return sorted;
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,6 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator;
import org.apache.mahout.math.list.IntArrayList;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
@@ -62,7 +61,7 @@ public final class SequenceFileDumper ex
return -1;
}
- Path[] pathArr= null;
+ Path[] pathArr;
Configuration conf = new Configuration();
Path input = getInputPath();
FileSystem fs = input.getFileSystem(conf);
@@ -118,8 +117,9 @@ public final class SequenceFileDumper ex
long numItems = Long.MAX_VALUE;
if (hasOption("numItems")) {
numItems = Long.parseLong(getOption("numItems"));
- if (!hasOption("quiet"))
+ if (!hasOption("quiet")) {
writer.append("Max Items to dump: ").append(String.valueOf(numItems)).append("\n");
+ }
}
while (iterator.hasNext() && count < numItems) {
Pair<?, ?> record = iterator.next();
@@ -134,8 +134,9 @@ public final class SequenceFileDumper ex
}
count++;
}
- if (!hasOption("quiet"))
+ if (!hasOption("quiet")) {
writer.append("Count: ").append(String.valueOf(count)).append('\n');
+ }
}
if (facets != null) {
List<String> keyList = new ArrayList<String>(facets.size());
@@ -166,9 +167,4 @@ public final class SequenceFileDumper ex
new SequenceFileDumper().run(args);
}
- private static void printHelp(Group group) {
- HelpFormatter formatter = new HelpFormatter();
- formatter.setGroup(group);
- formatter.print();
- }
}