You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/11/06 06:15:19 UTC
[1/2] mahout git commit: MAHOUT-1759: Deprecate Random Forests,
this closes apache/mahout#173
Repository: mahout
Updated Branches:
refs/heads/master 48bfb64f1 -> 1ffa3a460
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputSplitTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputSplitTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputSplitTest.java
index f94841d..aeea084 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputSplitTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputSplitTest.java
@@ -31,7 +31,7 @@ import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.InMemInputSplit;
import org.junit.Before;
import org.junit.Test;
-
+@Deprecated
public final class InMemInputSplitTest extends MahoutTestCase {
private Random rng;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
index 3903c33..2821034 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
@@ -38,7 +38,7 @@ import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
import org.apache.mahout.classifier.df.node.Leaf;
import org.apache.mahout.classifier.df.node.Node;
import org.junit.Test;
-
+@Deprecated
public final class PartialBuilderTest extends MahoutTestCase {
private static final int NUM_MAPS = 5;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
index a4c1bfd..c5aec7f 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
@@ -35,7 +35,7 @@ import org.apache.mahout.common.MahoutTestCase;
import org.easymock.Capture;
import org.easymock.CaptureType;
import org.junit.Test;
-
+@Deprecated
public final class Step1MapperTest extends MahoutTestCase {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeIDTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeIDTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeIDTest.java
index d3c30d4..c4beeaf 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeIDTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeIDTest.java
@@ -22,7 +22,7 @@ import java.util.Random;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
-
+@Deprecated
public final class TreeIDTest extends MahoutTestCase {
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
index 236a2e0..1300926 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/node/NodeTest.java
@@ -30,7 +30,7 @@ import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Before;
import org.junit.Test;
-
+@Deprecated
public final class NodeTest extends MahoutTestCase {
private Random rng;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/split/DefaultIgSplitTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/split/DefaultIgSplitTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/split/DefaultIgSplitTest.java
index c5eb635..94d0ad9 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/split/DefaultIgSplitTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/split/DefaultIgSplitTest.java
@@ -26,7 +26,7 @@ import org.apache.mahout.classifier.df.data.DataLoader;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.Utils;
import org.junit.Test;
-
+@Deprecated
public final class DefaultIgSplitTest extends MahoutTestCase {
private static final int NUM_ATTRIBUTES = 10;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/split/RegressionSplitTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/split/RegressionSplitTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/split/RegressionSplitTest.java
index dbd1ef7..9c5893a 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/split/RegressionSplitTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/split/RegressionSplitTest.java
@@ -24,7 +24,7 @@ import org.apache.mahout.classifier.df.data.DescriptorException;
import org.apache.mahout.classifier.df.data.conditions.Condition;
import org.apache.mahout.common.MahoutTestCase;
import org.junit.Test;
-
+@Deprecated
public final class RegressionSplitTest extends MahoutTestCase {
private static Data[] generateTrainingData() throws DescriptorException {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/tools/VisualizerTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/tools/VisualizerTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/tools/VisualizerTest.java
index eacce36..aa15410 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/tools/VisualizerTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/tools/VisualizerTest.java
@@ -37,7 +37,7 @@ import java.text.DecimalFormat;
import java.util.List;
import java.util.Random;
import java.util.ArrayList;
-
+@Deprecated
public final class VisualizerTest extends MahoutTestCase {
private static final char DECIMAL_SEPARATOR =
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/src/conf/driver.classes.default.props
----------------------------------------------------------------------
diff --git a/src/conf/driver.classes.default.props b/src/conf/driver.classes.default.props
index cc57f1f..cb37efb 100644
--- a/src/conf/driver.classes.default.props
+++ b/src/conf/driver.classes.default.props
@@ -40,8 +40,6 @@ org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansDriver = streami
#new bayes
org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb : Train the Vector-based Bayes classifier
org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb : Test the Vector-based Bayes classifier
-org.apache.mahout.classifier.df.mapreduce.BuildForest = buildforest : Build the random forest classifier
-org.apache.mahout.classifier.df.mapreduce.TestForest = testforest : Test the random forest classifier
#SGD
org.apache.mahout.classifier.sgd.TrainLogistic = trainlogistic : Train a logistic regression using stochastic gradient descent
[2/2] mahout git commit: MAHOUT-1759: Deprecate Random Forests,
this closes apache/mahout#173
Posted by sm...@apache.org.
MAHOUT-1759: Deprecate Random Forests, this closes apache/mahout#173
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/1ffa3a46
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/1ffa3a46
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/1ffa3a46
Branch: refs/heads/master
Commit: 1ffa3a4609f58f9144a69422b202ae53cd8ef6c9
Parents: 48bfb64
Author: smarthi <sm...@apache.org>
Authored: Fri Nov 6 00:15:09 2015 -0500
Committer: smarthi <sm...@apache.org>
Committed: Fri Nov 6 00:15:09 2015 -0500
----------------------------------------------------------------------
examples/bin/README.txt | 1 -
examples/bin/create-rf-data.sh | 36 ---
examples/bin/run-rf.sh | 89 ------
.../mahout/classifier/df/BreimanExample.java | 224 -------------
.../classifier/df/mapreduce/BuildForest.java | 258 ---------------
.../classifier/df/mapreduce/TestForest.java | 313 -------------------
.../apache/mahout/classifier/df/Bagging.java | 1 +
.../apache/mahout/classifier/df/DFUtils.java | 1 +
.../mahout/classifier/df/DecisionForest.java | 1 +
.../mahout/classifier/df/ErrorEstimate.java | 1 +
.../df/builder/DecisionTreeBuilder.java | 1 +
.../df/builder/DefaultTreeBuilder.java | 1 +
.../classifier/df/builder/TreeBuilder.java | 1 +
.../apache/mahout/classifier/df/data/Data.java | 1 +
.../classifier/df/data/DataConverter.java | 1 +
.../mahout/classifier/df/data/DataLoader.java | 1 +
.../mahout/classifier/df/data/DataUtils.java | 1 +
.../mahout/classifier/df/data/Dataset.java | 1 +
.../classifier/df/data/DescriptorException.java | 1 +
.../classifier/df/data/DescriptorUtils.java | 1 +
.../mahout/classifier/df/data/Instance.java | 1 +
.../df/data/conditions/Condition.java | 1 +
.../classifier/df/data/conditions/Equals.java | 1 +
.../df/data/conditions/GreaterOrEquals.java | 1 +
.../classifier/df/data/conditions/Lesser.java | 1 +
.../mahout/classifier/df/mapreduce/Builder.java | 1 +
.../classifier/df/mapreduce/Classifier.java | 1 +
.../classifier/df/mapreduce/MapredMapper.java | 1 +
.../classifier/df/mapreduce/MapredOutput.java | 1 +
.../df/mapreduce/inmem/InMemBuilder.java | 1 +
.../df/mapreduce/inmem/InMemInputFormat.java | 1 +
.../df/mapreduce/inmem/InMemMapper.java | 1 +
.../df/mapreduce/partial/PartialBuilder.java | 1 +
.../df/mapreduce/partial/Step1Mapper.java | 1 +
.../classifier/df/mapreduce/partial/TreeID.java | 1 +
.../classifier/df/node/CategoricalNode.java | 2 +-
.../apache/mahout/classifier/df/node/Leaf.java | 1 +
.../apache/mahout/classifier/df/node/Node.java | 1 +
.../classifier/df/node/NumericalNode.java | 1 +
.../classifier/df/ref/SequentialBuilder.java | 1 +
.../classifier/df/split/DefaultIgSplit.java | 1 +
.../mahout/classifier/df/split/IgSplit.java | 1 +
.../mahout/classifier/df/split/OptIgSplit.java | 1 +
.../classifier/df/split/RegressionSplit.java | 1 +
.../mahout/classifier/df/split/Split.java | 1 +
.../classifier/df/tools/ForestVisualizer.java | 1 +
.../mahout/classifier/df/tools/Frequencies.java | 1 +
.../classifier/df/tools/FrequenciesJob.java | 1 +
.../classifier/df/tools/TreeVisualizer.java | 1 +
.../mahout/classifier/df/tools/UDistrib.java | 1 +
.../classifier/df/DecisionForestTest.java | 2 +-
.../df/builder/DecisionTreeBuilderTest.java | 2 +-
.../df/builder/DefaultTreeBuilderTest.java | 2 +-
.../df/builder/InfiniteRecursionTest.java | 2 +-
.../classifier/df/data/DataConverterTest.java | 2 +-
.../classifier/df/data/DataLoaderTest.java | 2 +-
.../mahout/classifier/df/data/DataTest.java | 2 +-
.../mahout/classifier/df/data/DatasetTest.java | 2 +-
.../classifier/df/data/DescriptorUtilsTest.java | 2 +-
.../apache/mahout/classifier/df/data/Utils.java | 1 +
.../mapreduce/inmem/InMemInputFormatTest.java | 2 +-
.../df/mapreduce/inmem/InMemInputSplitTest.java | 2 +-
.../mapreduce/partial/PartialBuilderTest.java | 2 +-
.../df/mapreduce/partial/Step1MapperTest.java | 2 +-
.../df/mapreduce/partial/TreeIDTest.java | 2 +-
.../mahout/classifier/df/node/NodeTest.java | 2 +-
.../classifier/df/split/DefaultIgSplitTest.java | 2 +-
.../df/split/RegressionSplitTest.java | 2 +-
.../classifier/df/tools/VisualizerTest.java | 2 +-
src/conf/driver.classes.default.props | 2 -
70 files changed, 63 insertions(+), 942 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/README.txt
----------------------------------------------------------------------
diff --git a/examples/bin/README.txt b/examples/bin/README.txt
index f47ab44..503a687 100644
--- a/examples/bin/README.txt
+++ b/examples/bin/README.txt
@@ -7,5 +7,4 @@ cluster-reuters.sh -- Cluster the Reuters data set using a variety of algorithms
cluster-syntheticcontrol.sh -- Cluster the Synthetic Control data set. Downloads the data set automatically.
factorize-movielens-1m.sh -- Run the Alternating Least Squares Recommender on the Grouplens data set (size 1M).
factorize-netflix.sh -- (Deprecated due to lack of availability of the data set) Run the ALS Recommender on the Netflix data set.
-run-rf.sh -- Create some synthetic data, build a random forest, and test performance.
spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/create-rf-data.sh
----------------------------------------------------------------------
diff --git a/examples/bin/create-rf-data.sh b/examples/bin/create-rf-data.sh
deleted file mode 100755
index 6e72829..0000000
--- a/examples/bin/create-rf-data.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# Create synthetic data set with four numeric fields and a boolean label.
-#
-# Requires scala, and is run from run-rf.sh.
-
-exec scala "$0" "$@"
-!#
-val r = new scala.util.Random()
-val pw = new java.io.PrintWriter(args(1))
-val numRows = args(0).toInt
-(1 to numRows).foreach(e =>
- pw.println(r.nextDouble() + "," +
- r.nextDouble() + "," +
- r.nextDouble() + "," +
- r.nextDouble() + "," +
- (if (r.nextBoolean()) 1 else 0))
-)
-pw.close()
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/run-rf.sh
----------------------------------------------------------------------
diff --git a/examples/bin/run-rf.sh b/examples/bin/run-rf.sh
deleted file mode 100755
index e52a3b9..0000000
--- a/examples/bin/run-rf.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# Requires scala, and for $HADOOP_HOME to be set.
-#
-# Creates test data for random forest classifier, splits data into train
-# and test sets, trains the classifier on the train set, then tests
-# model on test set.
-#
-# To run: change into the mahout directory and type:
-# ./examples/bin/run-rf.sh <num-rows>
-
-
-if [ $# -ne 1 ]
-then
- echo -e "\nThis script takes one parameter, the number of rows of random data to generate.\n"
- echo -e "Syntax: $0 <number-of-rows-of-sample-data> \n"
- exit -1
-fi
-
-WORK_DIR=/tmp/mahout-work-${USER}
-INPUT="${WORK_DIR}/input"
-mkdir -p $INPUT
-INPUT_PATH="${INPUT}/rf-input.csv"
-
-# Set commands for dfs
-source ./examples/bin/set-dfs-commands.sh
-
-# Create test data
-numrows=$1
-echo "Writing random data to $INPUT_PATH"
-./examples/bin/create-rf-data.sh $numrows $INPUT_PATH
-
-# Put the test file in HDFS
-if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
- echo "Copying random data to HDFS"
- set +e
- $DFSRM $WORK_DIR
- $DFS -mkdir -p $INPUT
- set -e
- $DFS -put $INPUT_PATH $INPUT
-fi
-
-# Split original file into train and test
-echo "Creating training and holdout set with a random 60-40 split of the generated vector dataset"
-./bin/mahout split \
- -i $INPUT \
- --trainingOutput ${WORK_DIR}/train.csv \
- --testOutput ${WORK_DIR}/test.csv \
- --randomSelectionPct 40 --overwrite -xm sequential
-
-# Describe input file schema
-# Note: "-d 4 N L" indicates four numerical fields and one label, as built by the step above.
-./bin/mahout describe -p $INPUT_PATH -f ${WORK_DIR}/info -d 4 N L
-
-# Train rf model
-echo
-echo "Training random forest."
-echo
-./bin/mahout buildforest -DXmx10000m -Dmapred.max.split.size=1000000 -d ${WORK_DIR}/train.csv -ds ${WORK_DIR}/info -sl 7 -p -t 500 -o ${WORK_DIR}/forest
-
-# Test predictions
-echo
-echo "Testing predictions on test set."
-echo
-./bin/mahout testforest -DXmx10000m -Dmapred.output.compress=false -i ${WORK_DIR}/test.csv -ds ${WORK_DIR}/info -m ${WORK_DIR}/forest -a -mr -o ${WORK_DIR}/predictions
-
-# Remove old files
-if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ]
-then
- $DFSRM $WORK_DIR
-fi
-rm -r $WORK_DIR
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java b/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java
deleted file mode 100644
index 8d2c1cd..0000000
--- a/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.df;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Random;
-
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.commons.math3.util.FastMath;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.classifier.df.builder.DefaultTreeBuilder;
-import org.apache.mahout.classifier.df.data.Data;
-import org.apache.mahout.classifier.df.data.DataLoader;
-import org.apache.mahout.classifier.df.data.Dataset;
-import org.apache.mahout.classifier.df.ref.SequentialBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Test procedure as described in Breiman's paper.<br>
- * <b>Leo Breiman: Random Forests. Machine Learning 45(1): 5-32 (2001)</b>
- */
-public class BreimanExample extends Configured implements Tool {
-
- private static final Logger log = LoggerFactory.getLogger(BreimanExample.class);
-
- /** sum test error */
- private double sumTestErrM;
-
- private double sumTestErrOne;
-
- /** mean time to build a forest with m=log2(M)+1 */
- private long sumTimeM;
-
- /** mean time to build a forest with m=1 */
- private long sumTimeOne;
-
- /** mean number of nodes for all the trees grown with m=log2(M)+1 */
- private long numNodesM;
-
- /** mean number of nodes for all the trees grown with m=1 */
- private long numNodesOne;
-
- /**
- * runs one iteration of the procedure.
- *
- * @param rng
- * random numbers generator
- * @param data
- * training data
- * @param m
- * number of random variables to select at each tree-node
- * @param nbtrees
- * number of trees to grow
- */
- private void runIteration(Random rng, Data data, int m, int nbtrees) {
-
- log.info("Splitting the data");
- Data train = data.clone();
- Data test = train.rsplit(rng, (int) (data.size() * 0.1));
-
- DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
-
- SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);
-
- // grow a forest with m = log2(M)+1
- treeBuilder.setM(m);
-
- long time = System.currentTimeMillis();
- log.info("Growing a forest with m={}", m);
- DecisionForest forestM = forestBuilder.build(nbtrees);
- sumTimeM += System.currentTimeMillis() - time;
- numNodesM += forestM.nbNodes();
-
- // grow a forest with m=1
- treeBuilder.setM(1);
-
- time = System.currentTimeMillis();
- log.info("Growing a forest with m=1");
- DecisionForest forestOne = forestBuilder.build(nbtrees);
- sumTimeOne += System.currentTimeMillis() - time;
- numNodesOne += forestOne.nbNodes();
-
- // compute the test set error (Selection Error), and mean tree error (One Tree Error),
- double[] testLabels = test.extractLabels();
- double[][] predictions = new double[test.size()][];
-
- forestM.classify(test, predictions);
- double[] sumPredictions = new double[test.size()];
- Arrays.fill(sumPredictions, 0.0);
- for (int i = 0; i < predictions.length; i++) {
- for (int j = 0; j < predictions[i].length; j++) {
- sumPredictions[i] += predictions[i][j];
- }
- }
- sumTestErrM += ErrorEstimate.errorRate(testLabels, sumPredictions);
-
- forestOne.classify(test, predictions);
- Arrays.fill(sumPredictions, 0.0);
- for (int i = 0; i < predictions.length; i++) {
- for (int j = 0; j < predictions[i].length; j++) {
- sumPredictions[i] += predictions[i][j];
- }
- }
- sumTestErrOne += ErrorEstimate.errorRate(testLabels, sumPredictions);
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new BreimanExample(), args);
- }
-
- @Override
- public int run(String[] args) throws IOException {
-
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true).withArgument(
- abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("Data path").create();
-
- Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true).withArgument(
- abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()).withDescription("Dataset path")
- .create();
-
- Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true).withArgument(
- abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()).withDescription(
- "Number of trees to grow, each iteration").create();
-
- Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
- .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
- .withDescription("Number of times to repeat the test").create();
-
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
-
- Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(
- nbItersOpt).withOption(nbtreesOpt).withOption(helpOpt).create();
-
- Path dataPath;
- Path datasetPath;
- int nbTrees;
- int nbIterations;
-
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
-
- if (cmdLine.hasOption("help")) {
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- String dataName = cmdLine.getValue(dataOpt).toString();
- String datasetName = cmdLine.getValue(datasetOpt).toString();
- nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
- nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());
-
- dataPath = new Path(dataName);
- datasetPath = new Path(datasetName);
- } catch (OptionException e) {
- log.error("Error while parsing options", e);
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- // load the data
- FileSystem fs = dataPath.getFileSystem(new Configuration());
- Dataset dataset = Dataset.load(getConf(), datasetPath);
- Data data = DataLoader.loadData(dataset, fs, dataPath);
-
- // take m to be the first integer less than log2(M) + 1, where M is the
- // number of inputs
- int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1);
-
- Random rng = RandomUtils.getRandom();
- for (int iteration = 0; iteration < nbIterations; iteration++) {
- log.info("Iteration {}", iteration);
- runIteration(rng, data, m, nbTrees);
- }
-
- log.info("********************************************");
- log.info("Random Input Test Error : {}", sumTestErrM / nbIterations);
- log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations);
- log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
- log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
- log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
- log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);
-
- return 0;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java b/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
deleted file mode 100644
index d945f39..0000000
--- a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.df.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.classifier.df.DFUtils;
-import org.apache.mahout.classifier.df.DecisionForest;
-import org.apache.mahout.classifier.df.builder.DecisionTreeBuilder;
-import org.apache.mahout.classifier.df.data.Data;
-import org.apache.mahout.classifier.df.data.DataLoader;
-import org.apache.mahout.classifier.df.data.Dataset;
-import org.apache.mahout.classifier.df.mapreduce.inmem.InMemBuilder;
-import org.apache.mahout.classifier.df.mapreduce.partial.PartialBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Tool to builds a Random Forest using any given dataset (in UCI format). Can use either the in-mem mapred or
- * partial mapred implementations. Stores the forest in the given output directory
- */
-public class BuildForest extends Configured implements Tool {
-
- private static final Logger log = LoggerFactory.getLogger(BuildForest.class);
-
- private Path dataPath;
-
- private Path datasetPath;
-
- private Path outputPath;
-
- private Integer m; // Number of variables to select at each tree-node
-
- private boolean complemented; // tree is complemented
-
- private Integer minSplitNum; // minimum number for split
-
- private Double minVarianceProportion; // minimum proportion of the total variance for split
-
- private int nbTrees; // Number of trees to grow
-
- private Long seed; // Random seed
-
- private boolean isPartial; // use partial data implementation
-
- @Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
-
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
- .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
- .withDescription("Data path").create();
-
- Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
- .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
- .withDescription("Dataset path").create();
-
- Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(false)
- .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create())
- .withDescription("Optional, Number of variables to select randomly at each tree-node.\n"
- + "For classification problem, the default is square root of the number of explanatory variables.\n"
- + "For regression problem, the default is 1/3 of the number of explanatory variables.").create();
-
- Option noCompleteOpt = obuilder.withLongName("no-complete").withShortName("nc").withRequired(false)
- .withDescription("Optional, The tree is not complemented").create();
-
- Option minSplitOpt = obuilder.withLongName("minsplit").withShortName("ms").withRequired(false)
- .withArgument(abuilder.withName("minsplit").withMinimum(1).withMaximum(1).create())
- .withDescription("Optional, The tree-node is not divided, if the branching data size is "
- + "smaller than this value.\nThe default is 2.").create();
-
- Option minPropOpt = obuilder.withLongName("minprop").withShortName("mp").withRequired(false)
- .withArgument(abuilder.withName("minprop").withMinimum(1).withMaximum(1).create())
- .withDescription("Optional, The tree-node is not divided, if the proportion of the "
- + "variance of branching data is smaller than this value.\n"
- + "In the case of a regression problem, this value is used. "
- + "The default is 1/1000(0.001).").create();
-
- Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false)
- .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create())
- .withDescription("Optional, seed value used to initialise the Random number generator").create();
-
- Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false)
- .withDescription("Optional, use the Partial Data implementation").create();
-
- Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
- .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
- .withDescription("Number of trees to grow").create();
-
- Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
- .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
- .withDescription("Output path, will contain the Decision Forest").create();
-
- Option helpOpt = obuilder.withLongName("help").withShortName("h")
- .withDescription("Print out help").create();
-
- Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt)
- .withOption(selectionOpt).withOption(noCompleteOpt).withOption(minSplitOpt)
- .withOption(minPropOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt)
- .withOption(outputOpt).withOption(helpOpt).create();
-
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
-
- if (cmdLine.hasOption("help")) {
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- isPartial = cmdLine.hasOption(partialOpt);
- String dataName = cmdLine.getValue(dataOpt).toString();
- String datasetName = cmdLine.getValue(datasetOpt).toString();
- String outputName = cmdLine.getValue(outputOpt).toString();
- nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
-
- if (cmdLine.hasOption(selectionOpt)) {
- m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString());
- }
- complemented = !cmdLine.hasOption(noCompleteOpt);
- if (cmdLine.hasOption(minSplitOpt)) {
- minSplitNum = Integer.parseInt(cmdLine.getValue(minSplitOpt).toString());
- }
- if (cmdLine.hasOption(minPropOpt)) {
- minVarianceProportion = Double.parseDouble(cmdLine.getValue(minPropOpt).toString());
- }
- if (cmdLine.hasOption(seedOpt)) {
- seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
- }
-
- if (log.isDebugEnabled()) {
- log.debug("data : {}", dataName);
- log.debug("dataset : {}", datasetName);
- log.debug("output : {}", outputName);
- log.debug("m : {}", m);
- log.debug("complemented : {}", complemented);
- log.debug("minSplitNum : {}", minSplitNum);
- log.debug("minVarianceProportion : {}", minVarianceProportion);
- log.debug("seed : {}", seed);
- log.debug("nbtrees : {}", nbTrees);
- log.debug("isPartial : {}", isPartial);
- }
-
- dataPath = new Path(dataName);
- datasetPath = new Path(datasetName);
- outputPath = new Path(outputName);
-
- } catch (OptionException e) {
- log.error("Exception", e);
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- buildForest();
-
- return 0;
- }
-
- private void buildForest() throws IOException, ClassNotFoundException, InterruptedException {
- // make sure the output path does not exist
- FileSystem ofs = outputPath.getFileSystem(getConf());
- if (ofs.exists(outputPath)) {
- log.error("Output path already exists");
- return;
- }
-
- DecisionTreeBuilder treeBuilder = new DecisionTreeBuilder();
- if (m != null) {
- treeBuilder.setM(m);
- }
- treeBuilder.setComplemented(complemented);
- if (minSplitNum != null) {
- treeBuilder.setMinSplitNum(minSplitNum);
- }
- if (minVarianceProportion != null) {
- treeBuilder.setMinVarianceProportion(minVarianceProportion);
- }
-
- Builder forestBuilder;
-
- if (isPartial) {
- log.info("Partial Mapred implementation");
- forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
- } else {
- log.info("InMem Mapred implementation");
- forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
- }
-
- forestBuilder.setOutputDirName(outputPath.getName());
-
- log.info("Building the forest...");
- long time = System.currentTimeMillis();
-
- DecisionForest forest = forestBuilder.build(nbTrees);
- if (forest == null) {
- return;
- }
-
- time = System.currentTimeMillis() - time;
- log.info("Build Time: {}", DFUtils.elapsedTime(time));
- log.info("Forest num Nodes: {}", forest.nbNodes());
- log.info("Forest mean num Nodes: {}", forest.meanNbNodes());
- log.info("Forest mean max Depth: {}", forest.meanMaxDepth());
-
- // store the decision forest in the output path
- Path forestPath = new Path(outputPath, "forest.seq");
- log.info("Storing the forest in: {}", forestPath);
- DFUtils.storeWritable(getConf(), forestPath, forest);
- }
-
- protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
- log.info("Loading the data...");
- FileSystem fs = dataPath.getFileSystem(conf);
- Data data = DataLoader.loadData(dataset, fs, dataPath);
- log.info("Data Loaded");
-
- return data;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new BuildForest(), args);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java b/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
deleted file mode 100644
index db39215..0000000
--- a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.df.mapreduce;
-
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.RegressionResultAnalyzer;
-import org.apache.mahout.classifier.ResultAnalyzer;
-import org.apache.mahout.classifier.df.DFUtils;
-import org.apache.mahout.classifier.df.DecisionForest;
-import org.apache.mahout.classifier.df.data.DataConverter;
-import org.apache.mahout.classifier.df.data.Dataset;
-import org.apache.mahout.classifier.df.data.Instance;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
-import java.util.Scanner;
-
-/**
- * Tool to classify a Dataset using a previously built Decision Forest
- */
-public class TestForest extends Configured implements Tool {
-
- private static final Logger log = LoggerFactory.getLogger(TestForest.class);
-
- private FileSystem dataFS;
- private Path dataPath; // test data path
-
- private Path datasetPath;
-
- private Path modelPath; // path where the forest is stored
-
- private FileSystem outFS;
- private Path outputPath; // path to predictions file, if null do not output the predictions
-
- private boolean analyze; // analyze the classification results ?
-
- private boolean useMapreduce; // use the mapreduce classifier ?
-
- @Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
-
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option inputOpt = DefaultOptionCreator.inputOption().create();
-
- Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true).withArgument(
- abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()).withDescription("Dataset path")
- .create();
-
- Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true).withArgument(
- abuilder.withName("path").withMinimum(1).withMaximum(1).create()).
- withDescription("Path to the Decision Forest").create();
-
- Option outputOpt = DefaultOptionCreator.outputOption().create();
-
- Option analyzeOpt = obuilder.withLongName("analyze").withShortName("a").withRequired(false).create();
-
- Option mrOpt = obuilder.withLongName("mapreduce").withShortName("mr").withRequired(false).create();
-
- Option helpOpt = DefaultOptionCreator.helpOption();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
- .withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create();
-
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
-
- if (cmdLine.hasOption("help")) {
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- String dataName = cmdLine.getValue(inputOpt).toString();
- String datasetName = cmdLine.getValue(datasetOpt).toString();
- String modelName = cmdLine.getValue(modelOpt).toString();
- String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null;
- analyze = cmdLine.hasOption(analyzeOpt);
- useMapreduce = cmdLine.hasOption(mrOpt);
-
- if (log.isDebugEnabled()) {
- log.debug("inout : {}", dataName);
- log.debug("dataset : {}", datasetName);
- log.debug("model : {}", modelName);
- log.debug("output : {}", outputName);
- log.debug("analyze : {}", analyze);
- log.debug("mapreduce : {}", useMapreduce);
- }
-
- dataPath = new Path(dataName);
- datasetPath = new Path(datasetName);
- modelPath = new Path(modelName);
- if (outputName != null) {
- outputPath = new Path(outputName);
- }
- } catch (OptionException e) {
- log.warn(e.toString(), e);
- CommandLineUtil.printHelp(group);
- return -1;
- }
-
- testForest();
-
- return 0;
- }
-
- private void testForest() throws IOException, ClassNotFoundException, InterruptedException {
-
- // make sure the output file does not exist
- if (outputPath != null) {
- outFS = outputPath.getFileSystem(getConf());
- if (outFS.exists(outputPath)) {
- throw new IllegalArgumentException("Output path already exists");
- }
- }
-
- // make sure the decision forest exists
- FileSystem mfs = modelPath.getFileSystem(getConf());
- if (!mfs.exists(modelPath)) {
- throw new IllegalArgumentException("The forest path does not exist");
- }
-
- // make sure the test data exists
- dataFS = dataPath.getFileSystem(getConf());
- if (!dataFS.exists(dataPath)) {
- throw new IllegalArgumentException("The Test data path does not exist");
- }
-
- if (useMapreduce) {
- mapreduce();
- } else {
- sequential();
- }
-
- }
-
- private void mapreduce() throws ClassNotFoundException, IOException, InterruptedException {
- if (outputPath == null) {
- throw new IllegalArgumentException("You must specify the ouputPath when using the mapreduce implementation");
- }
-
- Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, getConf());
-
- classifier.run();
-
- if (analyze) {
- double[][] results = classifier.getResults();
- if (results != null) {
- Dataset dataset = Dataset.load(getConf(), datasetPath);
- if (dataset.isNumerical(dataset.getLabelId())) {
- RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
- regressionAnalyzer.setInstances(results);
- log.info("{}", regressionAnalyzer);
- } else {
- ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
- for (double[] res : results) {
- analyzer.addInstance(dataset.getLabelString(res[0]),
- new ClassifierResult(dataset.getLabelString(res[1]), 1.0));
- }
- log.info("{}", analyzer);
- }
- }
- }
- }
-
- private void sequential() throws IOException {
-
- log.info("Loading the forest...");
- DecisionForest forest = DecisionForest.load(getConf(), modelPath);
-
- if (forest == null) {
- log.error("No Decision Forest found!");
- return;
- }
-
- // load the dataset
- Dataset dataset = Dataset.load(getConf(), datasetPath);
- DataConverter converter = new DataConverter(dataset);
-
- log.info("Sequential classification...");
- long time = System.currentTimeMillis();
-
- Random rng = RandomUtils.getRandom();
-
- List<double[]> resList = new ArrayList<>();
- if (dataFS.getFileStatus(dataPath).isDir()) {
- //the input is a directory of files
- testDirectory(outputPath, converter, forest, dataset, resList, rng);
- } else {
- // the input is one single file
- testFile(dataPath, outputPath, converter, forest, dataset, resList, rng);
- }
-
- time = System.currentTimeMillis() - time;
- log.info("Classification Time: {}", DFUtils.elapsedTime(time));
-
- if (analyze) {
- if (dataset.isNumerical(dataset.getLabelId())) {
- RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
- double[][] results = new double[resList.size()][2];
- regressionAnalyzer.setInstances(resList.toArray(results));
- log.info("{}", regressionAnalyzer);
- } else {
- ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
- for (double[] r : resList) {
- analyzer.addInstance(dataset.getLabelString(r[0]),
- new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
- }
- log.info("{}", analyzer);
- }
- }
- }
-
- private void testDirectory(Path outPath,
- DataConverter converter,
- DecisionForest forest,
- Dataset dataset,
- Collection<double[]> results,
- Random rng) throws IOException {
- Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
-
- for (Path path : infiles) {
- log.info("Classifying : {}", path);
- Path outfile = outPath != null ? new Path(outPath, path.getName()).suffix(".out") : null;
- testFile(path, outfile, converter, forest, dataset, results, rng);
- }
- }
-
- private void testFile(Path inPath,
- Path outPath,
- DataConverter converter,
- DecisionForest forest,
- Dataset dataset,
- Collection<double[]> results,
- Random rng) throws IOException {
- // create the predictions file
- FSDataOutputStream ofile = null;
-
- if (outPath != null) {
- ofile = outFS.create(outPath);
- }
-
- try (FSDataInputStream input = dataFS.open(inPath)){
- Scanner scanner = new Scanner(input, "UTF-8");
-
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine();
- if (!line.isEmpty()) {
-
- Instance instance = converter.convert(line);
- double prediction = forest.classify(dataset, rng, instance);
-
- if (ofile != null) {
- ofile.writeChars(Double.toString(prediction)); // write the prediction
- ofile.writeChar('\n');
- }
-
- results.add(new double[]{dataset.getLabel(instance), prediction});
- }
- }
-
- scanner.close();
- }
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new TestForest(), args);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java b/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java
index 0ec5b55..f79a429 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java
@@ -29,6 +29,7 @@ import java.util.Random;
/**
* Builds a tree using bagging
*/
+@Deprecated
public class Bagging {
private static final Logger log = LoggerFactory.getLogger(Bagging.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
index 86f99b6..c94292c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
@@ -37,6 +37,7 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters;
/**
* Utility class that contains various helper methods
*/
+@Deprecated
public final class DFUtils {
private DFUtils() {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
index bb4153e..c11cf34 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
@@ -39,6 +39,7 @@ import java.util.Random;
/**
* Represents a forest of decision trees.
*/
+@Deprecated
public class DecisionForest implements Writable {
private final List<Node> trees;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java b/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java
index 2a7facc..13cd386 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
/**
* Various methods to compute from the output of a random forest
*/
+@Deprecated
public final class ErrorEstimate {
private ErrorEstimate() {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
index 8a7d945..9f84e9c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
@@ -41,6 +41,7 @@ import java.util.Random;
* A classification tree is built when the criterion variable is the categorical attribute.<br>
* A regression tree is built when the criterion variable is the numerical attribute.
*/
+@Deprecated
public class DecisionTreeBuilder implements TreeBuilder {
private static final Logger log = LoggerFactory.getLogger(DecisionTreeBuilder.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java
index f03698d..3392fb1 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java
@@ -41,6 +41,7 @@ import java.util.Random;
* <br><br>
* This class can be used when the criterion variable is the categorical attribute.
*/
+@Deprecated
public class DefaultTreeBuilder implements TreeBuilder {
private static final Logger log = LoggerFactory.getLogger(DefaultTreeBuilder.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java
index 3d4c6d6..bf686a4 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java
@@ -25,6 +25,7 @@ import java.util.Random;
/**
* Abstract base class for TreeBuilders
*/
+@Deprecated
public interface TreeBuilder {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
index c68ce52..77e5ed5 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
@@ -30,6 +30,7 @@ import java.util.Random;
* vectors (subset, count,...)
*
*/
+@Deprecated
public class Data implements Cloneable {
private final List<Instance> instances;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java
index 318c0d0..f1bdc95 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java
@@ -26,6 +26,7 @@ import java.util.regex.Pattern;
/**
* Converts String to Instance using a Dataset
*/
+@Deprecated
public class DataConverter {
private static final Pattern COMMA_SPACE = Pattern.compile("[, ]");
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
index c8d9dcd..c62dcac 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
@@ -45,6 +45,7 @@ import java.util.regex.Pattern;
* adds an IGNORED first attribute that will contain a unique id for each instance, which is the line number
* of the instance in the input data
*/
+@Deprecated
public final class DataLoader {
private static final Logger log = LoggerFactory.getLogger(DataLoader.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
index 3eb126c..0889370 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
@@ -26,6 +26,7 @@ import java.util.Random;
/**
* Helper methods that deals with data lists and arrays of values
*/
+@Deprecated
public final class DataUtils {
private DataUtils() { }
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
index 413389f..a392669 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
@@ -39,6 +39,7 @@ import java.util.Map;
/**
* Contains information about the attributes.
*/
+@Deprecated
public class Dataset {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java
index f4419f0..e7a10ff 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java
@@ -20,6 +20,7 @@ package org.apache.mahout.classifier.df.data;
/**
* Exception thrown when parsing a descriptor
*/
+@Deprecated
public class DescriptorException extends Exception {
public DescriptorException(String msg) {
super(msg);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
index f2e0ce4..aadedbd 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
@@ -27,6 +27,7 @@ import java.util.Locale;
/**
* Contains various methods that deal with descriptor strings
*/
+@Deprecated
public final class DescriptorUtils {
private static final Splitter SPACE = Splitter.on(' ').omitEmptyStrings();
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java
index 3abf124..6a23cb8 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java
@@ -22,6 +22,7 @@ import org.apache.mahout.math.Vector;
/**
* Represents one data instance.
*/
+@Deprecated
public class Instance {
/** attributes, except LABEL and IGNORED */
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java
index b199834..c16ca3f 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java
@@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance;
/**
* Condition on Instance
*/
+@Deprecated
public abstract class Condition {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java
index 73f4ef6..c51082b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java
@@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance;
/**
* True if a given attribute has a given value
*/
+@Deprecated
public class Equals extends Condition {
private final int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java
index 2db3f2e..3e3d1a4 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java
@@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance;
/**
* True if a given attribute has a value "greater or equal" than a given value
*/
+@Deprecated
public class GreaterOrEquals extends Condition {
private final int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java
index 4e49eb7..577cb24 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java
@@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance;
/**
* True if a given attribute has a value "lesser" than a given value
*/
+@Deprecated
public class Lesser extends Condition {
private final int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java
index da2448f..32d7b5c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java
@@ -47,6 +47,7 @@ import java.util.Comparator;
* </ul>
*
*/
+@Deprecated
public abstract class Builder {
private static final Logger log = LoggerFactory.getLogger(Builder.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
index bdbaf2b..1a35cfe 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
@@ -54,6 +54,7 @@ import java.util.Random;
/**
* Mapreduce implementation that classifies the Input data using a previousely built decision forest
*/
+@Deprecated
public class Classifier {
private static final Logger log = LoggerFactory.getLogger(Classifier.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java
index cfd93cd..4d0f3f1 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java
@@ -28,6 +28,7 @@ import java.io.IOException;
/**
* Base class for Mapred mappers. Loads common parameters from the job
*/
+@Deprecated
public class MapredMapper<KEYIN,VALUEIN,KEYOUT,VALUEOUT> extends Mapper<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
private boolean noOutput;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java
index b177ce5..56cabb2 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java
@@ -30,6 +30,7 @@ import java.util.Arrays;
* Used by various implementation to return the results of a build.<br>
* Contains a grown tree and and its oob predictions.
*/
+@Deprecated
public class MapredOutput implements Writable, Cloneable {
private Node tree;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
index 4c33e73..86d4404 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
@@ -44,6 +44,7 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
* MapReduce implementation where each mapper loads a full copy of the data in-memory. The forest trees are
* splitted across all the mappers
*/
+@Deprecated
public class InMemBuilder extends Builder {
public InMemBuilder(TreeBuilder treeBuilder, Path dataPath, Path datasetPath, Long seed, Configuration conf) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
index 51e5a3e..c3b2fa3 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
@@ -45,6 +45,7 @@ import org.slf4j.LoggerFactory;
* each input split contains a subset of the trees.<br>
* The number of splits is equal to the number of requested splits
*/
+@Deprecated
public class InMemInputFormat extends InputFormat<IntWritable,NullWritable> {
private static final Logger log = LoggerFactory.getLogger(InMemInputSplit.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java
index 9e7e176..2fc67ba 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java
@@ -42,6 +42,7 @@ import java.util.Random;
* In-memory mapper that grows the trees using a full copy of the data loaded in-memory. The number of trees
* to grow is determined by the current InMemInputSplit.
*/
+@Deprecated
public class InMemMapper extends MapredMapper<IntWritable,NullWritable,IntWritable,MapredOutput> {
private static final Logger log = LoggerFactory.getLogger(InMemMapper.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
index 1c9a13b..9236af3 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java
@@ -45,6 +45,7 @@ import java.util.List;
/**
* Builds a random forest using partial data. Each mapper uses only the data given by its InputSplit
*/
+@Deprecated
public class PartialBuilder extends Builder {
private static final Logger log = LoggerFactory.getLogger(PartialBuilder.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
index 648472c..9474236 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
@@ -42,6 +42,7 @@ import java.util.Random;
* First step of the Partial Data Builder. Builds the trees using the data available in the InputSplit.
* Predict the oob classes for each tree in its growing partition (input split).
*/
+@Deprecated
public class Step1Mapper extends MapredMapper<LongWritable,Text,TreeID,MapredOutput> {
private static final Logger log = LoggerFactory.getLogger(Step1Mapper.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java
index d0ed5df..c296061 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.io.LongWritable;
/**
* Indicates both the tree and the data partition used to grow the tree
*/
+@Deprecated
public class TreeID extends LongWritable implements Cloneable {
public static final int MAX_TREEID = 100000;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java
index 3484866..1f91842 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java
@@ -25,7 +25,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
-
+@Deprecated
public class CategoricalNode extends Node {
private int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java
index 285a134..3360bb5 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java
@@ -26,6 +26,7 @@ import java.io.IOException;
/**
* Represents a Leaf node
*/
+@Deprecated
public class Leaf extends Node {
private static final double EPSILON = 1.0e-6;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java
index cb6deb2..73d516d 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java
@@ -27,6 +27,7 @@ import java.io.IOException;
/**
* Represents an abstract node of a decision tree
*/
+@Deprecated
public abstract class Node implements Writable {
protected enum Type {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java
index 19b3e57..aa02089 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java
@@ -26,6 +26,7 @@ import java.io.IOException;
/**
* Represents a node that splits using a numerical attribute
*/
+@Deprecated
public class NumericalNode extends Node {
/** numerical attribute to split for */
private int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
index d7f023b..7ef907e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
@@ -32,6 +32,7 @@ import java.util.Random;
/**
* Builds a Random Decision Forest using a given TreeBuilder to grow the trees
*/
+@Deprecated
public class SequentialBuilder {
private static final Logger log = LoggerFactory.getLogger(SequentialBuilder.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java
index 38d3007..3f1cfdf 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
/**
* Default, not optimized, implementation of IgSplit
*/
+@Deprecated
public class DefaultIgSplit extends IgSplit {
/** used by entropy() */
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java
index da37cf3..aff94e1 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java
@@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Data;
/**
* Computes the best split using the Information Gain measure
*/
+@Deprecated
public abstract class IgSplit {
static final double LOG2 = Math.log(2.0);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java
index 7b15d2a..e3ab95f 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java
@@ -50,6 +50,7 @@ import java.util.TreeSet;
* pw.close()
* }
*/
+@Deprecated
public class OptIgSplit extends IgSplit {
private static final int MAX_NUMERIC_SPLITS = 16;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
index 2974bcb..38695a3 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
@@ -29,6 +29,7 @@ import java.util.Comparator;
* Regression problem implementation of IgSplit. This class can be used when the criterion variable is the numerical
* attribute.
*/
+@Deprecated
public class RegressionSplit extends IgSplit {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java
index bf079de..2a6a322 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java
@@ -22,6 +22,7 @@ import java.util.Locale;
/**
* Contains enough information to identify each split
*/
+@Deprecated
public final class Split {
private final int attr;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
index 3b9d4ee..b421c4e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
@@ -42,6 +42,7 @@ import org.slf4j.LoggerFactory;
/**
* This tool is to visualize the Decision Forest
*/
+@Deprecated
public final class ForestVisualizer {
private static final Logger log = LoggerFactory.getLogger(ForestVisualizer.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
index 4586540..c37af4e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
@@ -42,6 +42,7 @@ import java.util.Arrays;
* Compute the frequency distribution of the "class label"<br>
* This class can be used when the criterion variable is the categorical attribute.
*/
+@Deprecated
public final class Frequencies extends Configured implements Tool {
private static final Logger log = LoggerFactory.getLogger(Frequencies.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java
index d02d974..9d7e2ff 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java
@@ -53,6 +53,7 @@ import java.util.Arrays;
* Temporary class used to compute the frequency distribution of the "class attribute".<br>
* This class can be used when the criterion variable is the categorical attribute.
*/
+@Deprecated
public class FrequenciesJob {
private static final Logger log = LoggerFactory.getLogger(FrequenciesJob.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
index d82b383..07928ab 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
@@ -34,6 +34,7 @@ import org.apache.mahout.classifier.df.node.NumericalNode;
/**
* This tool is to visualize the Decision tree
*/
+@Deprecated
public final class TreeVisualizer {
private TreeVisualizer() {}
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
index 06876e1..e1b55ab 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
@@ -52,6 +52,7 @@ import org.slf4j.LoggerFactory;
* partitions.<br>
* This class can be used when the criterion variable is the categorical attribute.
*/
+@Deprecated
public final class UDistrib {
private static final Logger log = LoggerFactory.getLogger(UDistrib.class);
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
index f1ec07f..036d473 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
@@ -32,7 +32,7 @@ import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
import com.google.common.collect.Lists;
-
+@Deprecated
public final class DecisionForestTest extends MahoutTestCase {
private static final String[] TRAIN_DATA = {"sunny,85,85,FALSE,no",
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java
index 85244c8..56b4787 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java
@@ -25,7 +25,7 @@ import org.apache.commons.lang3.ArrayUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
-
+@Deprecated
public final class DecisionTreeBuilderTest extends MahoutTestCase {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java
index 78fe65f..87fd44b 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java
@@ -24,7 +24,7 @@ import org.apache.commons.lang3.ArrayUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
-
+@Deprecated
public final class DefaultTreeBuilderTest extends MahoutTestCase {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java
index 16e7499..8ebc721 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java
@@ -26,7 +26,7 @@ import org.apache.mahout.classifier.df.data.Utils;
import org.junit.Test;
import java.util.Random;
-
+@Deprecated
public final class InfiniteRecursionTest extends MahoutTestCase {
private static final double[][] dData = {
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java
index 39858cf..dfae61d 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java
@@ -22,7 +22,7 @@ import java.util.Random;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
-
+@Deprecated
public final class DataConverterTest extends MahoutTestCase {
private static final int ATTRIBUTE_COUNT = 10;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java
index dce23db..aeb69fc 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java
@@ -28,7 +28,7 @@ import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.data.Dataset.Attribute;
import org.junit.Test;
-
+@Deprecated
public final class DataLoaderTest extends MahoutTestCase {
private Random rng;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java
index 86e4461..70ed7f6 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java
@@ -24,7 +24,7 @@ import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.data.conditions.Condition;
import org.junit.Test;
-
+@Deprecated
public class DataTest extends MahoutTestCase {
private static final int ATTRIBUTE_COUNT = 10;
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
index 3cdf65a..e5c9ee7 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
@@ -18,7 +18,7 @@ package org.apache.mahout.classifier.df.data;
import org.apache.mahout.common.MahoutTestCase;
import org.junit.Test;
-
+@Deprecated
public final class DatasetTest extends MahoutTestCase {
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java
index 121e1f8..619f067 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java
@@ -23,7 +23,7 @@ import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.data.Dataset.Attribute;
import org.junit.Test;
-
+@Deprecated
public final class DescriptorUtilsTest extends MahoutTestCase {
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
index 1cf8b6a..9b51ec9 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
@@ -36,6 +36,7 @@ import org.apache.mahout.common.MahoutTestCase;
* Helper methods used by the tests
*
*/
+@Deprecated
public final class Utils {
private Utils() {}
http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java
index 0a4a034..6a17aa2 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java
@@ -28,7 +28,7 @@ import org.apache.mahout.classifier.df.mapreduce.Builder;
import org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.InMemInputSplit;
import org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.InMemRecordReader;
import org.junit.Test;
-
+@Deprecated
public final class InMemInputFormatTest extends MahoutTestCase {
@Test