You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/10/03 14:46:09 UTC
svn commit: r821305 [2/2] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/common/ main/java/org/apache/mahout/df/
main/java/org/apache/mahout/df/builder/
main/java/org/apache/mahout/df/callback/
main/java/org/apache/mahout/df/data/ main...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/ref/SequentialBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/ref/SequentialBuilder.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/ref/SequentialBuilder.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/ref/SequentialBuilder.java Sat Oct 3 12:46:06 2009
@@ -37,9 +37,9 @@
private static final Logger log = LoggerFactory.getLogger(SequentialBuilder.class);
- protected final Random rng;
+ private final Random rng;
- protected final Bagging bagging;
+ private final Bagging bagging;
/**
* Constructor
@@ -53,7 +53,7 @@
bagging = new Bagging(treeBuilder, data);
}
- public DecisionForest build(int nbTrees, PredictionCallback callback) throws Exception {
+ public DecisionForest build(int nbTrees, PredictionCallback callback) {
List<Node> trees = new ArrayList<Node>();
for (int treeId = 0; treeId < nbTrees; treeId++) {
@@ -64,7 +64,7 @@
return new DecisionForest(trees);
}
- protected void logProgress(float progress) {
+ private static void logProgress(float progress) {
int percent = (int) (progress * 100);
if (percent % 10 == 0)
log.info(String.format("Building %2d%%", percent));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java Sat Oct 3 12:46:06 2009
@@ -28,7 +28,7 @@
public class DefaultIgSplit extends IgSplit {
/** used by entropy() */
- protected int[] counts;
+ private int[] counts;
@Override
public Split computeSplit(Data data, int attr) {
@@ -86,10 +86,9 @@
protected double numericalIg(Data data, int attr, double split) {
double hy = entropy(data);
double invDataSize = 1.0 / data.size();
- Data subset;
// LO subset
- subset = data.subset(Condition.lesser(attr, split));
+ Data subset = data.subset(Condition.lesser(attr, split));
hy -= subset.size() * invDataSize * entropy(subset);
// HI subset
@@ -106,7 +105,6 @@
* @return
*/
protected double entropy(Data data) {
- double entropy = 0.0;
double invDataSize = 1.0 / data.size();
if (counts == null)
@@ -115,6 +113,7 @@
Arrays.fill(counts, 0);
data.countLabels(counts);
+ double entropy = 0.0;
for (int label = 0; label < data.dataset.nblabels(); label++) {
int count = counts[label];
if (count == 0)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java Sat Oct 3 12:46:06 2009
@@ -51,7 +51,7 @@
* @param attr
* @return
*/
- protected Split categoricalSplit(Data data, int attr) {
+ protected static Split categoricalSplit(Data data, int attr) {
double[] values = data.values(attr);
int[][] counts = new int[values.length][data.dataset.nblabels()];
int[] countAll = new int[data.dataset.nblabels()];
@@ -84,7 +84,7 @@
* @param attr
* @return
*/
- protected double[] sortedValues(Data data, int attr) {
+ protected static double[] sortedValues(Data data, int attr) {
double[] values = data.values(attr);
Arrays.sort(values);
@@ -131,11 +131,10 @@
int best = -1;
double bestIg = -1.0;
- double ig;
// try each possible split value
for (int index = 0; index < values.length; index++) {
- ig = hy;
+ double ig = hy;
// instance with attribute value < values[index]
size = DataUtils.sum(countLess);
@@ -172,8 +171,7 @@
double entropy = 0.0;
double invDataSize = 1.0 / dataSize;
- for (int label = 0; label < counts.length; label++) {
- int count = counts[label];
+ for (int count : counts) {
if (count == 0)
continue; // otherwise we get a NaN
double p = count * invDataSize;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Describe.java Sat Oct 3 12:46:06 2009
@@ -36,6 +36,7 @@
import org.apache.hadoop.io.Writable;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
+import org.apache.mahout.df.data.DescriptorException;
import org.apache.mahout.df.data.DescriptorUtils;
import org.apache.mahout.common.CommandLineUtil;
import org.slf4j.Logger;
@@ -48,10 +49,10 @@
private static final Logger log = LoggerFactory.getLogger(Describe.class);
+ private Describe() {
+ }
+
public static void main(String[] args) throws Exception {
- String dataPath;
- List<String> descriptor;
- String descPath;
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
@@ -88,9 +89,9 @@
System.exit(-1);
}
- dataPath = cmdLine.getValue(pathOpt).toString();
- descPath = cmdLine.getValue(descPathOpt).toString();
- descriptor = convert(cmdLine.getValues(descriptorOpt));
+ String dataPath = cmdLine.getValue(pathOpt).toString();
+ String descPath = cmdLine.getValue(descPathOpt).toString();
+ List<String> descriptor = convert(cmdLine.getValues(descriptorOpt));
log.debug("Data path : " + dataPath);
log.debug("Descriptor path : " + descPath);
@@ -105,7 +106,7 @@
}
private static void runTool(String dataPath, List<String> description,
- String filePath) throws Exception {
+ String filePath) throws DescriptorException, IOException {
log.info("Generating the descriptor...");
String descriptor = DescriptorUtils.generateDescriptor(description);
@@ -115,34 +116,32 @@
Dataset dataset = generateDataset(descriptor, dataPath);
log.info("storing the dataset description");
- Describe.storeWritable(new Configuration(), fPath, dataset);
+ storeWritable(new Configuration(), fPath, dataset);
}
private static Dataset generateDataset(String descriptor, String dataPath)
- throws Exception {
+ throws IOException, DescriptorException {
Path path = new Path(dataPath);
FileSystem fs = path.getFileSystem(new Configuration());
return DataLoader.generateDataset(descriptor, fs, path);
}
- private static Path validateOutput(String filePath) throws Exception {
+ private static Path validateOutput(String filePath) throws IOException {
Path path = new Path(filePath);
FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path)) {
- throw new Exception("Descriptor's file already exists");
+ throw new IllegalStateException("Descriptor's file already exists");
}
return path;
}
- private static List<String> convert(List values) {
- List<String> list = new ArrayList<String>();
-
+ private static List<String> convert(List<?> values) {
+ List<String> list = new ArrayList<String>(values.size());
for (Object value : values) {
list.add(value.toString());
}
-
return list;
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/builder/DefaultTreeBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/builder/DefaultTreeBuilderTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/builder/DefaultTreeBuilderTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/builder/DefaultTreeBuilderTest.java Sat Oct 3 12:46:06 2009
@@ -20,7 +20,7 @@
import java.util.Random;
import org.apache.commons.lang.ArrayUtils;
-import org.apache.mahout.df.builder.DefaultTreeBuilder;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.Utils;
@@ -29,7 +29,7 @@
public class DefaultTreeBuilderTest extends TestCase {
public void testRandomAttributes() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
int maxNbAttributes = 100;
int n = 100;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java Sat Oct 3 12:46:06 2009
@@ -19,21 +19,18 @@
import java.util.Random;
-import org.apache.mahout.df.data.Data;
-import org.apache.mahout.df.data.DataConverter;
-import org.apache.mahout.df.data.DataLoader;
-import org.apache.mahout.df.data.Dataset;
+import org.apache.mahout.common.RandomUtils;
import junit.framework.TestCase;
public class DataConverterTest extends TestCase {
- private final int nbAttributes = 10;
+ private static final int nbAttributes = 10;
- private final int nbInstances = 100;
+ private static final int nbInstances = 100;
public void testConvert() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
String descriptor = Utils.randomDescriptor(rng, nbAttributes);
double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java Sat Oct 3 12:46:06 2009
@@ -17,10 +17,6 @@
package org.apache.mahout.df.data;
-import static org.apache.mahout.df.data.DescriptorUtils.parseDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -30,11 +26,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.mahout.df.data.Data;
-import org.apache.mahout.df.data.DataLoader;
-import org.apache.mahout.df.data.Dataset;
-import org.apache.mahout.df.data.DescriptorUtils;
-import org.apache.mahout.df.data.Instance;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Dataset.Attribute;
public class DataLoaderTest extends TestCase {
@@ -43,7 +35,7 @@
@Override
protected void setUp() throws Exception {
- rng = new Random();
+ rng = RandomUtils.getRandom();
}
public void testLoadDataWithDescriptor() throws Exception {
@@ -51,11 +43,11 @@
int datasize = 100;
// prepare the descriptors
- String descriptor = randomDescriptor(rng, nbAttributes);
- Attribute[] attrs = parseDescriptor(descriptor);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
// prepare the data
- double[][] data = randomDoubles(rng, descriptor, datasize);
+ double[][] data = Utils.randomDoubles(rng, descriptor, datasize);
List<Integer> missings = new ArrayList<Integer>();
String[] sData = prepareData(data, attrs, missings);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
@@ -74,11 +66,11 @@
int datasize = 100;
// prepare the descriptors
- String descriptor = randomDescriptor(rng, nbAttributes);
- Attribute[] attrs = parseDescriptor(descriptor);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
// prepare the data
- double[][] data = randomDoubles(rng, descriptor, datasize);
+ double[][] data = Utils.randomDoubles(rng, descriptor, datasize);
List<Integer> missings = new ArrayList<Integer>();
String[] sData = prepareData(data, attrs, missings);
Dataset expected = DataLoader.generateDataset(descriptor, sData);
@@ -99,12 +91,11 @@
*/
protected String[] prepareData(double[][] data, Attribute[] attrs, List<Integer> missings) {
int nbAttributes = attrs.length;
- int missingAttr;
- StringBuilder builder;
String[] sData = new String[data.length];
for (int index = 0; index < data.length; index++) {
+ int missingAttr;
if (rng.nextDouble() < 0.0) {
// add a missing value
missings.add(index);
@@ -117,7 +108,7 @@
missingAttr = -1;
}
- builder = new StringBuilder();
+ StringBuilder builder = new StringBuilder();
for (int attr = 0; attr < nbAttributes; attr++) {
if (attr == missingAttr) {
@@ -142,7 +133,7 @@
* @param missings indexes of instance with missing values
* @param loaded
*/
- protected void testLoadedData(double[][] data, Attribute[] attrs, List<Integer> missings, Data loaded) {
+ protected static void testLoadedData(double[][] data, Attribute[] attrs, List<Integer> missings, Data loaded) {
int nbAttributes = attrs.length;
// check the vectors
@@ -189,7 +180,7 @@
* @param missings indexes of instance with missing values
* @param loaded
*/
- protected void testLoadedDataset(double[][] data, Attribute[] attrs, List<Integer> missings, Data loaded) {
+ protected static void testLoadedDataset(double[][] data, Attribute[] attrs, List<Integer> missings, Data loaded) {
int nbAttributes = attrs.length;
int iId = 0;
@@ -224,11 +215,11 @@
int datasize = 100;
// prepare the descriptors
- String descriptor = randomDescriptor(rng, nbAttributes);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
// prepare the data
- double[][] source = randomDoubles(rng, descriptor, datasize);
+ double[][] source = Utils.randomDoubles(rng, descriptor, datasize);
List<Integer> missings = new ArrayList<Integer>();
String[] sData = prepareData(source, attrs, missings);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
@@ -249,11 +240,11 @@
int datasize = 100;
// prepare the descriptors
- String descriptor = randomDescriptor(rng, nbAttributes);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
// prepare the data
- double[][] source = randomDoubles(rng, descriptor, datasize);
+ double[][] source = Utils.randomDoubles(rng, descriptor, datasize);
List<Integer> missings = new ArrayList<Integer>();
String[] sData = prepareData(source, attrs, missings);
Dataset expected = DataLoader.generateDataset(descriptor, sData);
@@ -277,7 +268,7 @@
* @param oValue old value in source
* @param nValue new value in loaded
*/
- protected void checkCategorical(double[][] source, List<Integer> missings,
+ protected static void checkCategorical(double[][] source, List<Integer> missings,
Data loaded, int attr, int aId, double oValue, double nValue) {
int lind = 0;
@@ -286,7 +277,7 @@
continue;
if (source[index][attr] == oValue) {
- assertTrue(nValue == loaded.get(lind).get(aId));
+ assertEquals(nValue, loaded.get(lind).get(aId));
} else {
assertFalse(nValue == loaded.get(lind).get(aId));
}
@@ -304,7 +295,7 @@
* @param labelInd label's index in source
* @param value source label's value
*/
- protected void checkLabel(double[][] source, List<Integer> missings,
+ protected static void checkLabel(double[][] source, List<Integer> missings,
Data loaded, int labelInd, double value) {
// label's code that corresponds to the value
int code = loaded.dataset.labelCode(Double.toString(value));
@@ -316,7 +307,7 @@
continue;
if (source[index][labelInd] == value) {
- assertTrue(code == loaded.get(lind).label);
+ assertEquals(code, loaded.get(lind).label);
} else {
assertFalse(code == loaded.get(lind).label);
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataTest.java Sat Oct 3 12:46:06 2009
@@ -17,27 +17,20 @@
package org.apache.mahout.df.data;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomData;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
-import org.apache.mahout.df.data.Data;
-import org.apache.mahout.df.data.DataLoader;
-import org.apache.mahout.df.data.Dataset;
-import org.apache.mahout.df.data.Instance;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.conditions.Condition;
import junit.framework.TestCase;
public class DataTest extends TestCase {
- protected final int nbAttributes = 10;
+ protected static final int nbAttributes = 10;
- protected final int datasize = 100;
+ protected static final int datasize = 100;
protected Random rng;
@@ -45,7 +38,7 @@
@Override
protected void setUp() throws Exception {
- rng = new Random();
+ rng = RandomUtils.getRandom();
data = Utils.randomData(rng, nbAttributes, datasize);
}
@@ -112,14 +105,14 @@
public void testIdenticalTrue() throws Exception {
// generate a small data, only to get the dataset
- Dataset dataset = randomData(rng, nbAttributes, 1).dataset;
+ Dataset dataset = Utils.randomData(rng, nbAttributes, 1).dataset;
// test empty data
Data empty = new Data(dataset, new ArrayList<Instance>());
assertTrue(empty.isIdentical());
// test identical data, except for the labels
- Data identical = randomData(rng, nbAttributes, datasize);
+ Data identical = Utils.randomData(rng, nbAttributes, datasize);
Instance model = identical.get(0);
for (int index = 1; index < datasize; index++) {
for (int attr = 0; attr < identical.dataset.nbAttributes(); attr++) {
@@ -134,7 +127,7 @@
int n = 10;
for (int nloop = 0; nloop < n; nloop++) {
- Data data = randomData(rng, nbAttributes, datasize);
+ Data data = Utils.randomData(rng, nbAttributes, datasize);
// choose a random instance
int index = rng.nextInt(datasize);
@@ -150,17 +143,17 @@
public void testIdenticalLabelTrue() throws Exception {
// generate a small data, only to get a dataset
- Dataset dataset = randomData(rng, nbAttributes, 1).dataset;
+ Dataset dataset = Utils.randomData(rng, nbAttributes, 1).dataset;
// test empty data
Data empty = new Data(dataset, new ArrayList<Instance>());
assertTrue(empty.identicalLabel());
// test identical labels
- String descriptor = randomDescriptor(rng, nbAttributes);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
double[][] source = Utils.randomDoublesWithSameLabel(rng, descriptor,
datasize, rng.nextInt());
- String[] sData = double2String(source);
+ String[] sData = Utils.double2String(source);
dataset = DataLoader.generateDataset(descriptor, sData);
Data data = DataLoader.loadData(dataset, sData);
@@ -172,7 +165,7 @@
int n = 10;
for (int nloop = 0; nloop < n; nloop++) {
- String descriptor = randomDescriptor(rng, nbAttributes);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
int label = Utils.findLabel(descriptor);
double[][] source = Utils.randomDoublesWithSameLabel(rng, descriptor,
datasize, rng.nextInt());
@@ -210,15 +203,13 @@
/**
* Test method for
- * {@link org.apache.mahout.df.data.Data#rsplit(java.util.Random, double)}.
+ * {@link org.apache.mahout.df.data.Data#rsplit(java.util.Random, int)}.
*/
public void testRsplit() {
- Data source;
- Data subset;
// rsplit should handle empty subsets
- source = data.clone();
- subset = source.rsplit(rng, 0);
+ Data source = data.clone();
+ Data subset = source.rsplit(rng, 0);
assertTrue("subset should be empty", subset.isEmpty());
assertEquals("source.size is incorrect", datasize, source.size());
@@ -257,27 +248,25 @@
}
public void testMajorityLabel() throws Exception {
- int label1, label2;
- int code1, code2;
// all instances have the same label
- String descriptor = randomDescriptor(rng, nbAttributes);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
int label = Utils.findLabel(descriptor);
- label1 = rng.nextInt();
+ int label1 = rng.nextInt();
double[][] source = Utils.randomDoublesWithSameLabel(rng, descriptor, 100,
label1);
- String[] sData = double2String(source);
+ String[] sData = Utils.double2String(source);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
Data data = DataLoader.loadData(dataset, sData);
-
- code1 = dataset.labelCode(Double.toString(label1));
+
+ int code1 = dataset.labelCode(Double.toString(label1));
assertEquals(code1, data.majorityLabel(rng));
// 51/100 vectors have label2
- label2 = label1 + 1;
+ int label2 = label1 + 1;
int nblabel2 = 51;
while (nblabel2 > 0) {
double[] vector = source[rng.nextInt(100)];
@@ -286,11 +275,11 @@
nblabel2--;
}
}
- sData = double2String(source);
+ sData = Utils.double2String(source);
dataset = DataLoader.generateDataset(descriptor, sData);
data = DataLoader.loadData(dataset, sData);
code1 = dataset.labelCode(Double.toString(label1));
- code2 = dataset.labelCode(Double.toString(label2));
+ int code2 = dataset.labelCode(Double.toString(label2));
// label2 should be the majority label
assertEquals(code2, data.majorityLabel(rng));
@@ -303,7 +292,7 @@
break;
}
} while (true);
- sData = double2String(source);
+ sData = Utils.double2String(source);
data = DataLoader.loadData(dataset, sData);
code1 = dataset.labelCode(Double.toString(label1));
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java Sat Oct 3 12:46:06 2009
@@ -26,32 +26,30 @@
import java.io.IOException;
import java.util.Random;
-import org.apache.mahout.df.data.Dataset;
+import org.apache.mahout.common.RandomUtils;
import junit.framework.TestCase;
public class DatasetTest extends TestCase {
- private final int nbAttributes = 10;
-
- protected Random rng;
+ private static final int nbAttributes = 10;
protected ByteArrayOutputStream byteOutStream;
protected DataOutput out;
- protected Dataset readDataset(byte[] bytes) throws IOException {
+ protected static Dataset readDataset(byte[] bytes) throws IOException {
ByteArrayInputStream byteInStream = new ByteArrayInputStream(bytes);
DataInput in = new DataInputStream(byteInStream);
return Dataset.read(in);
}
public void testWritable() throws Exception {
- int n = 10;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
byteOutStream = new ByteArrayOutputStream();
out = new DataOutputStream(byteOutStream);
-
+
+ int n = 10;
for (int nloop=0; nloop< n; nloop++) {
byteOutStream.reset();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java Sat Oct 3 12:46:06 2009
@@ -17,17 +17,12 @@
package org.apache.mahout.df.data;
-import static org.apache.mahout.df.data.DescriptorUtils.parseDescriptor;
-import static org.apache.mahout.df.data.Utils.generateDescriptor;
-import static org.apache.mahout.df.data.Utils.randomTokens;
-
import java.util.Random;
import junit.framework.TestCase;
-import org.apache.mahout.df.data.DescriptorUtils;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Dataset.Attribute;
-import org.apache.mahout.df.data.DescriptorUtils.DescriptorException;
public class DescriptorUtilsTest extends TestCase {
@@ -39,13 +34,13 @@
int n = 10;
int maxnbAttributes = 100;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int nbAttributes = rng.nextInt(maxnbAttributes) + 1;
- char[] tokens = randomTokens(rng, nbAttributes);
- Attribute[] attrs = parseDescriptor(generateDescriptor(tokens));
+ char[] tokens = Utils.randomTokens(rng, nbAttributes);
+ Attribute[] attrs = DescriptorUtils.parseDescriptor(Utils.generateDescriptor(tokens));
// verify that the attributes matches the token list
assertEquals("attributes size", nbAttributes, attrs.length);
@@ -88,7 +83,7 @@
}
}
- protected void validate(String descriptor, String description) throws DescriptorException {
+ protected static void validate(String descriptor, String description) throws DescriptorException {
assertEquals(descriptor, DescriptorUtils.generateDescriptor(description));
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java Sat Oct 3 12:46:06 2009
@@ -31,10 +31,6 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.df.callback.PredictionCallback;
-import org.apache.mahout.df.data.Data;
-import org.apache.mahout.df.data.DataLoader;
-import org.apache.mahout.df.data.Dataset;
-import org.apache.mahout.df.data.DescriptorUtils;
import org.apache.mahout.df.data.Dataset.Attribute;
import org.slf4j.Logger;
@@ -43,7 +39,10 @@
*
*/
public class Utils {
- public static class LogCallback extends PredictionCallback {
+ private Utils() {
+ }
+
+ public static class LogCallback implements PredictionCallback {
protected final Logger log;
@@ -103,8 +102,8 @@
public static String generateDescriptor(char[] tokens) {
StringBuilder builder = new StringBuilder();
- for (int token = 0; token < tokens.length; token++) {
- builder.append(tokens[token]).append(" ");
+ for (char token1 : tokens) {
+ builder.append(token1).append(' ');
}
return builder.toString();
@@ -154,11 +153,8 @@
* @param rng Random number generator
* @param descriptor attributes description
* @param number number of data lines to generate
- * @return
- * @throws Exception
*/
- public static double[][] randomDoubles(Random rng, String descriptor,
- int number) throws Exception {
+ public static double[][] randomDoubles(Random rng, String descriptor, int number) throws DescriptorException {
Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
double[][] data = new double[number][];
@@ -227,8 +223,8 @@
protected static String double2String(double[] v) {
StringBuilder builder = new StringBuilder();
- for (int attr = 0; attr < v.length; attr++) {
- builder.append(v[attr]).append(",");
+ for (double aV : v) {
+ builder.append(aV).append(',');
}
return builder.toString();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputFormatTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputFormatTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputFormatTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputFormatTest.java Sat Oct 3 12:46:06 2009
@@ -25,8 +25,8 @@
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.mapred.Builder;
-import org.apache.mahout.df.mapred.inmem.InMemInputFormat;
import org.apache.mahout.df.mapred.inmem.InMemInputFormat.InMemInputSplit;
import org.apache.mahout.df.mapred.inmem.InMemInputFormat.InMemRecordReader;
@@ -37,7 +37,7 @@
int maxNumSplits = 100;
int maxNbTrees = 1000;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int numSplits = rng.nextInt(maxNumSplits) + 1;
@@ -79,7 +79,7 @@
int maxNumSplits = 100;
int maxNbTrees = 1000;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int numSplits = rng.nextInt(maxNumSplits) + 1;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputSplitTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputSplitTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/inmem/InMemInputSplitTest.java Sat Oct 3 12:46:06 2009
@@ -28,18 +28,19 @@
import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.mapred.inmem.InMemInputFormat.InMemInputSplit;
public class InMemInputSplitTest extends TestCase {
- protected Random rng;
+ private Random rng;
- protected ByteArrayOutputStream byteOutStream;
- protected DataOutput out;
+ private ByteArrayOutputStream byteOutStream;
+ private DataOutput out;
@Override
protected void setUp() throws Exception {
- rng = new Random();
+ rng = RandomUtils.getRandom();
byteOutStream = new ByteArrayOutputStream();
out = new DataOutputStream(byteOutStream);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialBuilderTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialBuilderTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialBuilderTest.java Sat Oct 3 12:46:06 2009
@@ -17,6 +17,7 @@
package org.apache.mahout.df.mapred.partial;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -30,11 +31,10 @@
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.DefaultTreeBuilder;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.callback.PredictionCallback;
-import org.apache.mahout.df.mapred.partial.PartialBuilder;
-import org.apache.mahout.df.mapred.partial.Step1Mapper;
import org.apache.mahout.df.mapreduce.MapredOutput;
import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
@@ -53,7 +53,7 @@
JobConf job = new JobConf();
job.setNumMapTasks(numMaps);
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the output
TreeID[] keys = new TreeID[numTrees];
@@ -115,15 +115,15 @@
* @param values
* @param firstIds partitions's first ids in hadoop's order
*/
- protected void randomKeyValues(Random rng, TreeID[] keys,
+ protected static void randomKeyValues(Random rng, TreeID[] keys,
MapredOutput[] values, int[] firstIds) {
int index = 0;
int firstId = 0;
List<Integer> partitions = new ArrayList<Integer>();
- int partition;
for (int p = 0; p < numMaps; p++) {
// select a random partition, not yet selected
+ int partition;
do {
partition = rng.nextInt(numMaps);
} while (partitions.contains(partition));
@@ -147,7 +147,7 @@
}
- protected int[] nextIntArray(Random rng, int size) {
+ protected static int[] nextIntArray(Random rng, int size) {
int[] array = new int[size];
for (int index = 0; index < size; index++) {
array[index] = rng.nextInt(101) - 1;
@@ -158,13 +158,13 @@
protected static class PartialBuilderChecker extends PartialBuilder {
- protected Long _seed;
+ protected final Long _seed;
- protected TreeBuilder _treeBuilder;
+ protected final TreeBuilder _treeBuilder;
- protected Path _datasetPath;
+ protected final Path _datasetPath;
- public PartialBuilderChecker(TreeBuilder treeBuilder, Path dataPath,
+ protected PartialBuilderChecker(TreeBuilder treeBuilder, Path dataPath,
Path datasetPath, Long seed) {
super(treeBuilder, dataPath, datasetPath, seed);
@@ -174,7 +174,7 @@
}
@Override
- protected void runJob(JobConf job) throws Exception {
+ protected void runJob(JobConf job) throws IOException {
// no need to run the job, just check if the params are correct
assertEquals(_seed, getRandomSeed(job));
@@ -199,13 +199,13 @@
* Mock Callback. Make sure that the callback receives the correct predictions
*
*/
- protected static class TestCallback extends PredictionCallback {
+ protected static class TestCallback implements PredictionCallback {
protected final TreeID[] keys;
protected final MapredOutput[] values;
- public TestCallback(TreeID[] keys, MapredOutput[] values) {
+ protected TestCallback(TreeID[] keys, MapredOutput[] values) {
this.keys = keys;
this.values = values;
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialOutputCollector.java Sat Oct 3 12:46:06 2009
@@ -37,6 +37,7 @@
values = new MapredOutput[nbTrees];
}
+ @Override
public void collect(TreeID key, MapredOutput value) throws IOException {
if (index == keys.length) {
throw new IOException("Received more output than expected : " + index);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java Sat Oct 3 12:46:06 2009
@@ -39,9 +39,6 @@
import org.apache.mahout.df.callback.PredictionCallback;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.mapred.Builder;
-import org.apache.mahout.df.mapred.partial.PartialBuilder;
-import org.apache.mahout.df.mapred.partial.Step1Mapper;
-import org.apache.mahout.df.mapred.partial.Step2Mapper;
import org.apache.mahout.df.mapreduce.MapredOutput;
import org.apache.mahout.df.mapreduce.partial.InterResults;
import org.apache.mahout.df.mapreduce.partial.TreeID;
@@ -106,17 +103,16 @@
firstOutput = new PartialOutputCollector(numTrees);
Reporter reporter = Reporter.NULL;
- long slowest = 0; // duration of slowest map
- int firstId = 0;
firstIds = new int[splits.length];
sizes = new int[splits.length];
// to compute firstIds, process the splits in file order
- for (int p = 0; p < splits.length; p++) {
- InputSplit split = splits[p];
+ int firstId = 0;
+ long slowest = 0; // duration of slowest map
+ for (InputSplit split : splits) {
int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
-
+
RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);
LongWritable key = reader.createKey();
@@ -274,7 +270,7 @@
*
*/
protected static class MockStep1Mapper extends Step1Mapper {
- public MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
+ protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
int partition, int numMapTasks, int numTrees) {
configure(false, true, treeBuilder, dataset);
configure(seed, partition, numMapTasks, numTrees);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java Sat Oct 3 12:46:06 2009
@@ -27,6 +27,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.callback.PredictionCallback;
import org.apache.mahout.df.data.Data;
@@ -37,13 +38,13 @@
import org.apache.mahout.df.node.Node;
public class PartitionBugTest extends TestCase {
- int numAttributes = 40;
+ static final int numAttributes = 40;
- int numInstances = 200;
+ static final int numInstances = 200;
- int numTrees = 10;
+ static final int numTrees = 10;
- int numMaps = 5;
+ static final int numMaps = 5;
/**
* Make sure that the correct instance ids are being computed
@@ -52,9 +53,8 @@
*
*/
public void testProcessOutput() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
//long seed = rng.nextLong();
- long seed = 1L;
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -82,6 +82,7 @@
// disable the second step because we can test without it
// and we won't be able to serialize the MockNode
PartialBuilder.setStep2(jobConf, false);
+ long seed = 1L;
PartialSequentialBuilder builder = new PartialSequentialBuilder(
treeBuilder, dataPath, dataset, seed, jobConf);
@@ -99,10 +100,10 @@
* Assets that the instanceId are correct
*
*/
- private static class MockCallback extends PredictionCallback {
+ private static class MockCallback implements PredictionCallback {
private final Data data;
- public MockCallback(Data data) {
+ private MockCallback(Data data) {
this.data = data;
}
@@ -151,13 +152,14 @@
protected void writeNode(DataOutput out) throws IOException {
}
+ @Override
public void readFields(DataInput in) throws IOException {
}
}
- private static class MockTreeBuilder extends TreeBuilder {
+ private static class MockTreeBuilder implements TreeBuilder {
@Override
public Node build(Random rng, Data data) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step0JobTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step0JobTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step0JobTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step0JobTest.java Sat Oct 3 12:46:06 2009
@@ -10,12 +10,14 @@
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.DataConverter;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
@@ -28,16 +30,16 @@
// the generated data must be big enough to be splited by FileInputFormat
- int numAttributes = 40;
+ private static final int numAttributes = 40;
- int numInstances = 200;
+ private static final int numInstances = 200;
- int numTrees = 10;
+ //int numTrees = 10;
- int numMaps = 5;
+ private static final int numMaps = 5;
public void testStep0Mapper() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -50,7 +52,7 @@
JobConf job = new JobConf();
job.setNumMapTasks(numMaps);
- TextInputFormat.setInputPaths(job, dataPath);
+ FileInputFormat.setInputPaths(job, dataPath);
// retrieve the splits
TextInputFormat input = (TextInputFormat) job.getInputFormat();
@@ -96,7 +98,7 @@
}
public void testProcessOutput() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -119,7 +121,7 @@
JobConf job = new JobConf();
job.setNumMapTasks(numMaps);
- TextInputFormat.setInputPaths(job, dataPath);
+ FileInputFormat.setInputPaths(job, dataPath);
// retrieve the splits
TextInputFormat input = (TextInputFormat) job.getInputFormat();
@@ -175,11 +177,12 @@
private int index = 0;
- public Step0OutputCollector(int numMaps) {
+ protected Step0OutputCollector(int numMaps) {
keys = new int[numMaps];
values = new Step0Output[numMaps];
}
+ @Override
public void collect(IntWritable key, Step0Output value) throws IOException {
if (index == keys.length) {
throw new IOException("Received more output than expected : " + index);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step1MapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step1MapperTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step1MapperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step1MapperTest.java Sat Oct 3 12:46:06 2009
@@ -1,9 +1,5 @@
package org.apache.mahout.df.mapred.partial;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.Random;
import junit.framework.TestCase;
@@ -11,12 +7,12 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
-import org.apache.mahout.df.mapred.partial.Step1Mapper;
import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;
@@ -28,7 +24,7 @@
* partition
*
*/
- private static class MockTreeBuilder extends TreeBuilder {
+ private static class MockTreeBuilder implements TreeBuilder {
protected Data expected;
@@ -51,7 +47,7 @@
*
*/
protected static class MockStep1Mapper extends Step1Mapper {
- public MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
+ protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
int partition, int numMapTasks, int numTrees) {
configure(false, true, treeBuilder, dataset);
configure(seed, partition, numMapTasks, numTrees);
@@ -64,25 +60,25 @@
}
/** nb attributes per generated data instance */
- protected final int nbAttributes = 4;
+ protected static final int nbAttributes = 4;
/** nb generated data instances */
- protected final int nbInstances = 100;
+ protected static final int nbInstances = 100;
/** nb trees to build */
- protected final int nbTrees = 10;
+ protected static final int nbTrees = 10;
/** nb mappers to use */
- protected final int nbMappers = 2;
+ protected static final int nbMappers = 2;
public void testMapper() throws Exception {
Long seed = null;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the data
- String descriptor = randomDescriptor(rng, nbAttributes);
- double[][] source = randomDoubles(rng, descriptor, nbInstances);
- String[] sData = double2String(source);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
+ String[] sData = Utils.double2String(source);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
String[][] splits = Utils.splitData(sData, nbMappers);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step2MapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step2MapperTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step2MapperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/Step2MapperTest.java Sat Oct 3 12:46:06 2009
@@ -17,10 +17,6 @@
package org.apache.mahout.df.mapred.partial;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.Random;
import junit.framework.TestCase;
@@ -32,6 +28,7 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
@@ -47,7 +44,7 @@
*
*/
private static class MockStep2Mapper extends Step2Mapper {
- public MockStep2Mapper(int partition, Dataset dataset, TreeID[] keys,
+ private MockStep2Mapper(int partition, Dataset dataset, TreeID[] keys,
Node[] trees, int numInstances) {
configure(partition, dataset, keys, trees, numInstances);
}
@@ -55,24 +52,24 @@
}
/** nb attributes per generated data instance */
- protected final int nbAttributes = 4;
+ protected static final int nbAttributes = 4;
/** nb generated data instances */
- protected final int nbInstances = 100;
+ protected static final int nbInstances = 100;
/** nb trees to build */
- protected final int nbTrees = 11;
+ protected static final int nbTrees = 11;
/** nb mappers to use */
- protected final int nbMappers = 5;
+ protected static final int nbMappers = 5;
public void testMapper() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the data
- String descriptor = randomDescriptor(rng, nbAttributes);
- double[][] source = randomDoubles(rng, descriptor, nbInstances);
- String[] sData = double2String(source);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
+ String[] sData = Utils.double2String(source);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
String[][] splits = Utils.splitData(sData, nbMappers);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormatTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormatTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormatTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormatTest.java Sat Oct 3 12:46:06 2009
@@ -24,6 +24,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.mapreduce.Builder;
import org.apache.mahout.df.mapreduce.inmem.InMemInputFormat.InMemInputSplit;
import org.apache.mahout.df.mapreduce.inmem.InMemInputFormat.InMemRecordReader;
@@ -35,7 +36,7 @@
int maxNumSplits = 100;
int maxNbTrees = 1000;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int numSplits = rng.nextInt(maxNumSplits) + 1;
@@ -76,7 +77,7 @@
int maxNumSplits = 100;
int maxNbTrees = 1000;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int numSplits = rng.nextInt(maxNumSplits) + 1;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputSplitTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputSplitTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/inmem/InMemInputSplitTest.java Sat Oct 3 12:46:06 2009
@@ -26,6 +26,7 @@
import java.io.IOException;
import java.util.Random;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.mapreduce.inmem.InMemInputFormat.InMemInputSplit;
import junit.framework.TestCase;
@@ -39,7 +40,7 @@
@Override
protected void setUp() throws Exception {
- rng = new Random();
+ rng = RandomUtils.getRandom();
byteOutStream = new ByteArrayOutputStream();
out = new DataOutputStream(byteOutStream);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java Sat Oct 3 12:46:06 2009
@@ -16,9 +16,6 @@
*/
package org.apache.mahout.df.mapreduce.partial;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.Random;
import junit.framework.TestCase;
@@ -27,6 +24,7 @@
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Utils;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;
@@ -34,16 +32,16 @@
public class InterResultsTest extends TestCase {
/** nb attributes per generated data instance */
- protected final int nbAttributes = 4;
+ protected static final int nbAttributes = 4;
/** nb generated data instances */
- protected final int nbInstances = 100;
+ protected static final int nbInstances = 100;
/** nb trees to build */
- protected final int nbTrees = 11;
+ protected static final int nbTrees = 11;
/** nb mappers to use */
- protected final int nbMappers = 5;
+ protected static final int nbMappers = 5;
protected String[][] splits;
@@ -53,12 +51,13 @@
int[] sizes;
+ @Override
protected void setUp() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the data
- double[][] source = randomDoubles(rng, nbAttributes, nbInstances);
- String[] sData = double2String(source);
+ double[][] source = Utils.randomDoubles(rng, nbAttributes, nbInstances);
+ String[] sData = Utils.double2String(source);
splits = Utils.splitData(sData, nbMappers);
@@ -143,10 +142,9 @@
// load (key, tree)
TreeID key = new TreeID();
- Node value;
for (int index = 0; index < nbTrees; index++) {
key.readFields(in);
- value = Node.read(in);
+ Node value = Node.read(in);
assertEquals("index: " + index, keys[index], key);
assertEquals("index: " + index, trees[index], value);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java Sat Oct 3 12:46:06 2009
@@ -35,8 +35,7 @@
private int index = 0;
- @SuppressWarnings("unchecked")
- public MockContext(Mapper mapper, Configuration conf, TaskAttemptID taskid,
+ public MockContext(Mapper<?,?,?,?> mapper, Configuration conf, TaskAttemptID taskid,
int nbTrees) throws IOException, InterruptedException {
mapper.super(conf, taskid, null, null, null, null, null);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java Sat Oct 3 12:46:06 2009
@@ -17,6 +17,7 @@
package org.apache.mahout.df.mapreduce.partial;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -31,11 +32,11 @@
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.DefaultTreeBuilder;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.callback.PredictionCallback;
import org.apache.mahout.df.mapreduce.MapredOutput;
-import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;
@@ -52,7 +53,7 @@
Configuration conf = new Configuration();
conf.setInt("mapred.map.tasks", numMaps);
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the output
TreeID[] keys = new TreeID[numTrees];
@@ -114,15 +115,15 @@
* @param values
* @param firstIds partitions's first ids in hadoop's order
*/
- protected void randomKeyValues(Random rng, TreeID[] keys,
+ protected static void randomKeyValues(Random rng, TreeID[] keys,
MapredOutput[] values, int[] firstIds) {
int index = 0;
int firstId = 0;
List<Integer> partitions = new ArrayList<Integer>();
- int partition;
for (int p = 0; p < numMaps; p++) {
// select a random partition, not yet selected
+ int partition;
do {
partition = rng.nextInt(numMaps);
} while (partitions.contains(partition));
@@ -146,7 +147,7 @@
}
- protected int[] nextIntArray(Random rng, int size) {
+ protected static int[] nextIntArray(Random rng, int size) {
int[] array = new int[size];
for (int index = 0; index < size; index++) {
array[index] = rng.nextInt(101) - 1;
@@ -157,13 +158,13 @@
protected static class PartialBuilderChecker extends PartialBuilder {
- protected Long _seed;
+ protected final Long _seed;
- protected TreeBuilder _treeBuilder;
+ protected final TreeBuilder _treeBuilder;
- protected Path _datasetPath;
+ protected final Path _datasetPath;
- public PartialBuilderChecker(TreeBuilder treeBuilder, Path dataPath,
+ protected PartialBuilderChecker(TreeBuilder treeBuilder, Path dataPath,
Path datasetPath, Long seed) {
super(treeBuilder, dataPath, datasetPath, seed);
@@ -173,7 +174,7 @@
}
@Override
- protected boolean runJob(Job job) throws Exception {
+ protected boolean runJob(Job job) throws IOException {
// no need to run the job, just check if the params are correct
Configuration conf = job.getConfiguration();
@@ -202,13 +203,13 @@
* Mock Callback. Make sure that the callback receives the correct predictions
*
*/
- protected static class TestCallback extends PredictionCallback {
+ protected static class TestCallback implements PredictionCallback {
protected final TreeID[] keys;
protected final MapredOutput[] values;
- public TestCallback(TreeID[] keys, MapredOutput[] values) {
+ protected TestCallback(TreeID[] keys, MapredOutput[] values) {
this.keys = keys;
this.values = values;
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java Sat Oct 3 12:46:06 2009
@@ -54,17 +54,17 @@
private static final Logger log = LoggerFactory.getLogger(PartialSequentialBuilder.class);
- protected MockContext firstOutput;
+ private MockContext firstOutput;
- protected MockContext secondOutput;
+ private MockContext secondOutput;
- protected final Dataset dataset;
+ private final Dataset dataset;
/** first instance id in hadoop's order */
- protected int[] firstIds;
+ private int[] firstIds;
/** partitions' sizes in hadoop order */
- protected int[] sizes;
+ private int[] sizes;
public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
Dataset dataset, long seed, Configuration conf) {
@@ -90,9 +90,8 @@
conf.setInt("mapred.map.tasks", num);
}
- @SuppressWarnings("unchecked")
@Override
- protected boolean runJob(Job job) throws Exception {
+ protected boolean runJob(Job job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
// retrieve the splits
@@ -111,13 +110,13 @@
TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());
firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);
- long slowest = 0; // duration of slowest map
- int firstId = 0;
firstIds = new int[nbSplits];
sizes = new int[nbSplits];
// to compute firstIds, process the splits in file order
+ long slowest = 0; // duration of slowest map
+ int firstId = 0;
for (int p = 0; p < nbSplits; p++) {
InputSplit split = splits.get(p);
int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
@@ -153,8 +152,7 @@
}
@Override
- protected DecisionForest parseOutput(Job job, PredictionCallback callback)
- throws Exception {
+ protected DecisionForest parseOutput(Job job, PredictionCallback callback) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
DecisionForest forest = processOutput(firstOutput.keys, firstOutput.values, callback);
@@ -212,8 +210,8 @@
* @throws Exception
*
*/
- @SuppressWarnings("unchecked")
- protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback) throws Exception {
+ protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback)
+ throws IOException, InterruptedException {
JobContext jobContext = new JobContext(conf, new JobID());
// retrieve the splits
@@ -281,7 +279,7 @@
*
*/
protected static class MockStep1Mapper extends Step1Mapper {
- public MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
+ protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
int partition, int numMapTasks, int numTrees) {
configure(false, true, treeBuilder, dataset);
configure(seed, partition, numMapTasks, numTrees);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java Sat Oct 3 12:46:06 2009
@@ -27,6 +27,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.callback.PredictionCallback;
import org.apache.mahout.df.data.Data;
@@ -37,13 +38,13 @@
import org.apache.mahout.df.node.Node;
public class PartitionBugTest extends TestCase {
- int numAttributes = 40;
+ static final int numAttributes = 40;
- int numInstances = 200;
+ static final int numInstances = 200;
- int numTrees = 10;
+ static final int numTrees = 10;
- int numMaps = 5;
+ static final int numMaps = 5;
/**
* Make sure that the correct instance ids are being computed
@@ -52,9 +53,8 @@
*
*/
public void testProcessOutput() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
//long seed = rng.nextLong();
- long seed = 1L;
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -82,6 +82,7 @@
// disable the second step because we can test without it
// and we won't be able to serialize the MockNode
PartialBuilder.setStep2(conf, false);
+ long seed = 1L;
PartialSequentialBuilder builder = new PartialSequentialBuilder(
treeBuilder, dataPath, dataset, seed, conf);
@@ -96,13 +97,13 @@
}
/**
- * Assets that the instanceId are correct
+ * Asserts that the instanceId are correct
*
*/
- private static class MockCallback extends PredictionCallback {
+ private static class MockCallback implements PredictionCallback {
private final Data data;
- public MockCallback(Data data) {
+ private MockCallback(Data data) {
this.data = data;
}
@@ -151,13 +152,14 @@
protected void writeNode(DataOutput out) throws IOException {
}
+ @Override
public void readFields(DataInput in) throws IOException {
}
}
- private static class MockTreeBuilder extends TreeBuilder {
+ private static class MockTreeBuilder implements TreeBuilder {
@Override
public Node build(Random rng, Data data) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java Sat Oct 3 12:46:06 2009
@@ -22,7 +22,9 @@
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.DataConverter;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
@@ -35,13 +37,13 @@
// the generated data must be big enough to be splited by FileInputFormat
- int numAttributes = 40;
+ static final int numAttributes = 40;
- int numInstances = 2000;
+ static final int numInstances = 2000;
int numTrees = 10;
- int numMaps = 5;
+ static final int numMaps = 5;
Step0Context context;
@@ -62,9 +64,8 @@
conf.setLong("mapred.max.split.size", goalSize);
}
- @SuppressWarnings("unchecked")
public void testStep0Mapper() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -76,7 +77,7 @@
Job job = new Job();
job.setInputFormatClass(TextInputFormat.class);
- TextInputFormat.setInputPaths(job, dataPath);
+ FileInputFormat.setInputPaths(job, dataPath);
setMaxSplitSize(job.getConfiguration(), dataPath, numMaps);
@@ -128,7 +129,7 @@
}
public void testProcessOutput() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// create a dataset large enough to be split up
String descriptor = Utils.randomDescriptor(rng, numAttributes);
@@ -213,8 +214,7 @@
private int index = 0;
- @SuppressWarnings("unchecked")
- public Step0Context(Mapper mapper, Configuration conf,
+ public Step0Context(Mapper<?,?,?,?> mapper, Configuration conf,
TaskAttemptID taskid, int numMaps) throws IOException,
InterruptedException {
mapper.super(conf, taskid, null, null, null, null, null);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java Sat Oct 3 12:46:06 2009
@@ -1,9 +1,5 @@
package org.apache.mahout.df.mapreduce.partial;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.Random;
import junit.framework.TestCase;
@@ -12,12 +8,12 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
-import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;
@@ -28,7 +24,7 @@
* partition
*
*/
- private static class MockTreeBuilder extends TreeBuilder {
+ private static class MockTreeBuilder implements TreeBuilder {
protected Data expected;
@@ -51,7 +47,7 @@
*
*/
protected static class MockStep1Mapper extends Step1Mapper {
- public MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
+ protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed,
int partition, int numMapTasks, int numTrees) {
configure(false, true, treeBuilder, dataset);
configure(seed, partition, numMapTasks, numTrees);
@@ -64,26 +60,25 @@
}
/** nb attributes per generated data instance */
- protected final int nbAttributes = 4;
+ protected static final int nbAttributes = 4;
/** nb generated data instances */
- protected final int nbInstances = 100;
+ protected static final int nbInstances = 100;
/** nb trees to build */
- protected final int nbTrees = 10;
+ protected static final int nbTrees = 10;
/** nb mappers to use */
- protected final int nbMappers = 2;
+ protected static final int nbMappers = 2;
- @SuppressWarnings("unchecked")
public void testMapper() throws Exception {
Long seed = null;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the data
- String descriptor = randomDescriptor(rng, nbAttributes);
- double[][] source = randomDoubles(rng, descriptor, nbInstances);
- String[] sData = double2String(source);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
+ String[] sData = Utils.double2String(source);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
String[][] splits = Utils.splitData(sData, nbMappers);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java Sat Oct 3 12:46:06 2009
@@ -17,10 +17,6 @@
package org.apache.mahout.df.mapreduce.partial;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoubles;
-
import java.util.Random;
import junit.framework.TestCase;
@@ -32,11 +28,10 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
-import org.apache.mahout.df.mapreduce.partial.InterResults;
-import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;
@@ -47,7 +42,7 @@
*
*/
private static class MockStep2Mapper extends Step2Mapper {
- public MockStep2Mapper(int partition, Dataset dataset, TreeID[] keys,
+ private MockStep2Mapper(int partition, Dataset dataset, TreeID[] keys,
Node[] trees, int numInstances) {
configure(partition, dataset, keys, trees, numInstances);
}
@@ -55,25 +50,24 @@
}
/** nb attributes per generated data instance */
- protected final int nbAttributes = 4;
+ protected static final int nbAttributes = 4;
/** nb generated data instances */
- protected final int nbInstances = 100;
+ protected static final int nbInstances = 100;
/** nb trees to build */
- protected final int nbTrees = 11;
+ protected static final int nbTrees = 11;
/** nb mappers to use */
- protected final int nbMappers = 5;
+ protected static final int nbMappers = 5;
- @SuppressWarnings("unchecked")
public void testMapper() throws Exception {
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
// prepare the data
- String descriptor = randomDescriptor(rng, nbAttributes);
- double[][] source = randomDoubles(rng, descriptor, nbInstances);
- String[] sData = double2String(source);
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
+ double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
+ String[] sData = Utils.double2String(source);
Dataset dataset = DataLoader.generateDataset(descriptor, sData);
String[][] splits = Utils.splitData(sData, nbMappers);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/TreeIDTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/TreeIDTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/TreeIDTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/TreeIDTest.java Sat Oct 3 12:46:06 2009
@@ -19,7 +19,7 @@
import java.util.Random;
-import org.apache.mahout.df.mapreduce.partial.TreeID;
+import org.apache.mahout.common.RandomUtils;
import junit.framework.TestCase;
@@ -27,7 +27,7 @@
public void testTreeID() {
int n = 1000000;
- Random rng = new Random();
+ Random rng = RandomUtils.getRandom();
for (int nloop = 0; nloop < n; nloop++) {
int partition = Math.abs(rng.nextInt());
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/node/NodeTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/node/NodeTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/node/NodeTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/node/NodeTest.java Sat Oct 3 12:46:06 2009
@@ -26,23 +26,20 @@
import java.io.IOException;
import java.util.Random;
-import org.apache.mahout.df.node.CategoricalNode;
-import org.apache.mahout.df.node.Leaf;
-import org.apache.mahout.df.node.Node;
-import org.apache.mahout.df.node.NumericalNode;
+import org.apache.mahout.common.RandomUtils;
import junit.framework.TestCase;
public class NodeTest extends TestCase {
- protected Random rng;
+ private Random rng;
- protected ByteArrayOutputStream byteOutStream;
- protected DataOutput out;
+ private ByteArrayOutputStream byteOutStream;
+ private DataOutput out;
@Override
protected void setUp() throws Exception {
- rng = new Random();
+ rng = RandomUtils.getRandom();
byteOutStream = new ByteArrayOutputStream();
out = new DataOutputStream(byteOutStream);
@@ -77,30 +74,27 @@
}
public void testReadLeaf() throws Exception {
- Leaf leaf;
- leaf = new Leaf(rng.nextInt());
+ Leaf leaf = new Leaf(rng.nextInt());
leaf.write(out);
assertEquals(leaf, readNode());
}
public void testParseNumerical() throws Exception {
- NumericalNode node;
- node = new NumericalNode(rng.nextInt(), rng.nextDouble(), new Leaf(rng
+ NumericalNode node = new NumericalNode(rng.nextInt(), rng.nextDouble(), new Leaf(rng
.nextInt()), new Leaf(rng.nextInt()));
node.write(out);
assertEquals(node, readNode());
}
public void testCategoricalNode() throws Exception {
- CategoricalNode node;
- node = new CategoricalNode(rng.nextInt(), new double[] { rng.nextDouble(),
- rng.nextDouble(), rng.nextDouble() }, new Node[] {
+ CategoricalNode node = new CategoricalNode(rng.nextInt(), new double[]{rng.nextDouble(),
+ rng.nextDouble(), rng.nextDouble()}, new Node[]{
new Leaf(rng.nextInt()), new Leaf(rng.nextInt()),
- new Leaf(rng.nextInt()) });
-
+ new Leaf(rng.nextInt())});
+
node.write(out);
assertEquals(node, readNode());
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java Sat Oct 3 12:46:06 2009
@@ -17,41 +17,32 @@
package org.apache.mahout.df.split;
-import static org.apache.mahout.df.data.Utils.double2String;
-import static org.apache.mahout.df.data.Utils.randomDescriptor;
-import static org.apache.mahout.df.data.Utils.randomDoublesWithSameLabel;
-
import java.util.Random;
import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
-import org.apache.mahout.df.split.DefaultIgSplit;
public class DefaultIgSplitTest extends TestCase {
- protected final int nbAttributes = 10;
+ protected static final int nbAttributes = 10;
public void testEntropy() throws Exception {
- Random rng = new Random();
- String descriptor = randomDescriptor(rng, nbAttributes);
+ Random rng = RandomUtils.getRandom();
+ String descriptor = Utils.randomDescriptor(rng, nbAttributes);
int label = Utils.findLabel(descriptor);
- double[][] temp;
- String[] sData;
- Data data;
- Dataset dataset;
- DefaultIgSplit iG;
-
+
// all the vectors have the same label (0)
- temp = randomDoublesWithSameLabel(rng, descriptor, 100, 0);
- sData = double2String(temp);
- dataset = DataLoader.generateDataset(descriptor, sData);
- data = DataLoader.loadData(dataset, sData);
- iG = new DefaultIgSplit();
-
+ double[][] temp = Utils.randomDoublesWithSameLabel(rng, descriptor, 100, 0);
+ String[] sData = Utils.double2String(temp);
+ Dataset dataset = DataLoader.generateDataset(descriptor, sData);
+ Data data = DataLoader.loadData(dataset, sData);
+ DefaultIgSplit iG = new DefaultIgSplit();
+
double expected = 0.0 - 1.0 * Math.log(1.0) / Math.log(2.0);
assertEquals(expected, iG.entropy(data));
@@ -60,7 +51,7 @@
for (int index = 0; index < 50; index++) {
temp[index][label] = 1.0;
}
- sData = double2String(temp);
+ sData = Utils.double2String(temp);
dataset = DataLoader.generateDataset(descriptor, sData);
data = DataLoader.loadData(dataset, sData);
iG = new DefaultIgSplit();
@@ -74,7 +65,7 @@
for (int index = 0; index < 15; index++) {
temp[index][label] = 2.0;
}
- sData = double2String(temp);
+ sData = Utils.double2String(temp);
dataset = DataLoader.generateDataset(descriptor, sData);
data = DataLoader.loadData(dataset, sData);
iG = new DefaultIgSplit();
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java?rev=821305&r1=821304&r2=821305&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java Sat Oct 3 12:46:06 2009
@@ -19,20 +19,17 @@
import java.util.Random;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.Utils;
-import org.apache.mahout.df.split.DefaultIgSplit;
-import org.apache.mahout.df.split.IgSplit;
-import org.apache.mahout.df.split.OptIgSplit;
-import org.apache.mahout.df.split.Split;
import junit.framework.TestCase;
public class OptIgSplitTest extends TestCase {
- protected final int nbAttributes = 20;
+ protected static final int nbAttributes = 20;
- protected final int numInstances = 100;
+ protected static final int numInstances = 100;
public void testComputeSplit() throws Exception {
int n = 100;
@@ -40,7 +37,7 @@
IgSplit ref = new DefaultIgSplit();
IgSplit opt = new OptIgSplit();
- Random rng = new Random(1L);
+ Random rng = RandomUtils.getRandom();
Data data = Utils.randomData(rng, nbAttributes, numInstances);
for (int nloop = 0; nloop < n; nloop++) {