You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by rv...@apache.org on 2012/04/26 02:54:02 UTC
svn commit: r1330614 -
/incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy
Author: rvs
Date: Thu Apr 26 00:54:01 2012
New Revision: 1330614
URL: http://svn.apache.org/viewvc?rev=1330614&view=rev
Log:
BIGTOP-562. fix Mahout smoke test in trunk branch (Johnny Zhang via rvs)
Modified:
incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy
Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy?rev=1330614&r1=1330613&r2=1330614&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy Thu Apr 26 00:54:01 2012
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEqu
import static org.junit.Assert.assertNotNull;
import org.junit.AfterClass;
import org.junit.BeforeClass;
+import org.junit.After;
import org.junit.Test;
import org.apache.bigtop.itest.JarContent;
@@ -30,12 +31,6 @@ import org.apache.bigtop.itest.shell.She
* Test Mahout examples shipped with the distribution.
*/
public class TestMahoutExamples {
- public static final String HADOOP_HOME =
- System.getenv("HADOOP_HOME");
- static {
- assertNotNull("HADOOP_HOME is not set", HADOOP_HOME);
- }
-
public static final String TEMP_DIR = "/tmp/mahout.${(new Date().getTime())}";
public static final String WORK_DIR = TEMP_DIR;
private static Shell sh = new Shell("/bin/bash -s");
@@ -103,11 +98,18 @@ public class TestMahoutExamples {
}
}
- @Test
+ @After
+ public void killHangingProcess() {
+ sh.exec("mapred job -list | grep 'Total jobs:0'");
+ if (sh.getRet() == 0) {
+ sh.exec("for jobid in `mapred job -list | grep 'RUNNING' |awk '{print \$1}'`;",
+ "do mapred job -kill \${jobid};",
+ "done");
+ }
+ }
+
+ @Test(timeout=1200000L)
public void factorizeMovieLensRatings() {
- // convert ratings
- sh.exec("cat ${TEMP_DIR}/movielens/ml-1m/ratings.dat |sed -e s/::/,/g| cut -d, -f1,2,3 > ${TEMP_DIR}/movielens/ratings.csv");
- assertEquals("Unexpected error from converting ratings", 0, sh.getRet());
// put ratings in hdfs
sh.exec("hadoop fs -mkdir ${WORK_DIR}/movielens",
"hadoop fs -put ${TEMP_DIR}/movielens/ratings.csv ${WORK_DIR}/movielens/ratings.csv");
@@ -124,8 +126,14 @@ public class TestMahoutExamples {
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
//compute predictions against the probe set, measure the error
- sh.exec("mahout evaluateFactorizationParallel --output ${WORK_DIR}/als/rmse --pairs ${WORK_DIR}/dataset/probeSet/ " +
- "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/");
+ sh.exec("mahout evaluateFactorization --output ${WORK_DIR}/als/rmse --input ${WORK_DIR}/dataset/probeSet/ " +
+ "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/ --tempDir ${WORK_DIR}/als/tmp");
+ assertEquals("Unexpected error from running mahout", 0, sh.getRet());
+
+ //compute recommendations
+ sh.exec("mahout recommendfactorized --input ${WORK_DIR}/als/out/userRatings/ --output ${WORK_DIR}/recommendations " +
+ "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/ " +
+ "--numRecommendations 6 --maxRating 5");
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
// check that error has been calculated
@@ -134,6 +142,10 @@ public class TestMahoutExamples {
// print the error
sh.exec("hadoop fs -cat ${WORK_DIR}/als/rmse/rmse.txt");
assertEquals("Unexpected error from running hadoop", 0, sh.getRet());
+
+ // check that recommendations has been calculated
+ sh.exec("hadoop fs -test -e ${WORK_DIR}/recommendations/part-m-00000");
+ assertEquals("${WORK_DIR}/recommendations/part-m-00000 does not exist", 0, sh.getRet());
}
// it's too much of a pain to use junit parameterized tests, so do it
@@ -147,37 +159,41 @@ public class TestMahoutExamples {
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
}
- @Test
+ @Test(timeout=900000L)
public void clusterControlDataWithCanopy() {
_clusterSyntheticControlData("canopy");
}
- @Test
+ @Test(timeout=900000L)
public void clusterControlDataWithKMeans() {
_clusterSyntheticControlData("kmeans");
}
- @Test
+ @Test(timeout=900000L)
public void clusterControlDataWithFuzzyKMeans() {
_clusterSyntheticControlData("fuzzykmeans");
}
- @Test
+ @Test(timeout=900000L)
public void clusterControlDataWithDirichlet() {
_clusterSyntheticControlData("dirichlet");
}
- @Test
+ @Test(timeout=900000L)
public void clusterControlDataWithMeanShift() {
_clusterSyntheticControlData("meanshift");
}
- @Test
+ @Test(timeout=7200000L)
public void testReutersLDA() {
// where does lda.algorithm come in?
sh.exec("mahout org.apache.lucene.benchmark.utils.ExtractReuters ${TEMP_DIR}/reuters-sgm ${TEMP_DIR}/reuters-out");
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
- sh.exec("MAHOUT_LOCAL=true mahout seqdirectory -i ${TEMP_DIR}/reuters-out -o ${TEMP_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5");
+ //put ${TEMP_DIR}/reuters-out into hdfs as we have to run seqdirectory in mapreduce mode, so files need be in hdfs
+ sh.exec("hadoop fs -put ${TEMP_DIR}/reuters-out ${WORK_DIR}/reuters-out");
+ assertEquals("Unable to put reuters-out-seqdir in hdfs", 0, sh.getRet());
+
+ sh.exec("mahout seqdirectory -i ${TEMP_DIR}/reuters-out -o ${TEMP_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5");
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
/*
// reuters-out-seqdir exists on a local disk at this point,
@@ -189,11 +205,15 @@ public class TestMahoutExamples {
sh.exec("""mahout seq2sparse \
-i ${WORK_DIR}/reuters-out-seqdir/ \
-o ${WORK_DIR}/reuters-out-seqdir-sparse-lda \
- -wt tf -seq -nr 3 \
- && \
- mahout lda \
+ -wt tf -seq -nr 3 --namedVector""");
+ assertEquals("Unexpected error from running mahout", 0, sh.getRet());
+
+ sh.exec("hadoop fs -mkdir ${WORK_DIR}/reuters-lda");
+ assertEquals("Unable to make dir reuters-lda in hdfs", 0, sh.getRet());
+
+ sh.exec("""mahout lda \
-i ${WORK_DIR}/reuters-out-seqdir-sparse-lda/tf-vectors \
- -o ${WORK_DIR}/reuters-lda -k 20 -v 50000 -ow -x 20 \
+ -o ${WORK_DIR}/reuters-lda -k 20 -x 20 \
&& \
mahout ldatopics \
-i ${WORK_DIR}/reuters-lda/state-20 \
@@ -202,7 +222,7 @@ public class TestMahoutExamples {
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
}
- @Test
+ @Test(timeout=1200000L)
public void testBayesNewsgroupClassifier() {
sh.exec("""mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
-p ${TEMP_DIR}/20news-bydate/20news-bydate-train \
@@ -218,27 +238,27 @@ public class TestMahoutExamples {
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
// put bayes-train-input and bayes-test-input in hdfs
+ sh.exec("hadoop fs -mkdir ${WORK_DIR}/20news-bydate");
sh.exec("hadoop fs -put ${TEMP_DIR}/20news-bydate/bayes-train-input ${WORK_DIR}/20news-bydate/bayes-train-input");
assertEquals("Unable to put bayes-train-input in hdfs", 0, sh.getRet());
sh.exec("hadoop fs -put ${TEMP_DIR}/20news-bydate/bayes-test-input ${WORK_DIR}/20news-bydate/bayes-test-input");
assertEquals("Unable to put bayes-test-input in hdfs", 0, sh.getRet());
sh.exec("""mahout trainclassifier \
- -i ${WORK_DIR}/20news-bydate/bayes-train-input \
- -o ${WORK_DIR}/20news-bydate/bayes-model \
- -type bayes \
- -ng 1 \
- -source hdfs""");
+-i ${WORK_DIR}/20news-bydate/bayes-train-input \
+-o ${WORK_DIR}/20news-bydate/bayes-model \
+-type bayes \
+-ng 1 \
+-source hdfs""");
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
sh.exec("""mahout testclassifier \
- -m ${WORK_DIR}/20news-bydate/bayes-model \
- -d ${WORK_DIR}/20news-bydate/bayes-test-input \
- -type bayes \
- -ng 1 \
- -source hdfs \
- -method mapreduce""");
+-m ${WORK_DIR}/20news-bydate/bayes-model \
+-d ${WORK_DIR}/20news-bydate/bayes-test-input \
+-type bayes \
+-ng 1 \
+-source hdfs \
+-method mapreduce""");
assertEquals("Unexpected error from running mahout", 0, sh.getRet());
}
-
}