You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by rv...@apache.org on 2012/04/26 02:54:02 UTC

svn commit: r1330614 - /incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy

Author: rvs
Date: Thu Apr 26 00:54:01 2012
New Revision: 1330614

URL: http://svn.apache.org/viewvc?rev=1330614&view=rev
Log:
BIGTOP-562. fix Mahout smoke test in trunk branch (Johnny Zhang via rvs)

Modified:
    incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy?rev=1330614&r1=1330613&r2=1330614&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/mahout/src/main/groovy/org/apache/bigtop/itest/mahout/smoke/TestMahoutExamples.groovy Thu Apr 26 00:54:01 2012
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertNotNull;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.After;
 import org.junit.Test;
 
 import org.apache.bigtop.itest.JarContent;
@@ -30,12 +31,6 @@ import org.apache.bigtop.itest.shell.She
  * Test Mahout examples shipped with the distribution.
  */
 public class TestMahoutExamples {
-  public static final String HADOOP_HOME =
-    System.getenv("HADOOP_HOME");
-  static {
-    assertNotNull("HADOOP_HOME is not set", HADOOP_HOME);
-  }
-
   public static final String TEMP_DIR = "/tmp/mahout.${(new Date().getTime())}";
   public static final String WORK_DIR = TEMP_DIR;
   private static Shell sh = new Shell("/bin/bash -s");
@@ -103,11 +98,18 @@ public class TestMahoutExamples {
     }
   }
 
-  @Test
+  @After
+  public void killHangingProcess() {
+    sh.exec("mapred job -list | grep 'Total jobs:0'");
+    if (sh.getRet() == 0) {
+      sh.exec("for jobid in `mapred job -list | grep 'RUNNING' |awk '{print \$1}'`;",
+              "do mapred job -kill \${jobid};",
+              "done");
+    }
+  }
+
+  @Test(timeout=1200000L)
   public void factorizeMovieLensRatings() {
-    // convert ratings
-    sh.exec("cat ${TEMP_DIR}/movielens/ml-1m/ratings.dat |sed -e s/::/,/g| cut -d, -f1,2,3 > ${TEMP_DIR}/movielens/ratings.csv");
-    assertEquals("Unexpected error from converting ratings", 0, sh.getRet());
     // put ratings in hdfs
     sh.exec("hadoop fs -mkdir ${WORK_DIR}/movielens",
             "hadoop fs -put ${TEMP_DIR}/movielens/ratings.csv ${WORK_DIR}/movielens/ratings.csv");
@@ -124,8 +126,14 @@ public class TestMahoutExamples {
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
 
     //compute predictions against the probe set, measure the error
-    sh.exec("mahout evaluateFactorizationParallel --output ${WORK_DIR}/als/rmse --pairs ${WORK_DIR}/dataset/probeSet/ " +
-            "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/");
+    sh.exec("mahout evaluateFactorization --output ${WORK_DIR}/als/rmse --input ${WORK_DIR}/dataset/probeSet/ " +
+            "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/ --tempDir ${WORK_DIR}/als/tmp");
+    assertEquals("Unexpected error from running mahout", 0, sh.getRet());
+
+    //compute recommendations
+    sh.exec("mahout recommendfactorized --input ${WORK_DIR}/als/out/userRatings/ --output ${WORK_DIR}/recommendations " +
+            "--userFeatures ${WORK_DIR}/als/out/U/ --itemFeatures ${WORK_DIR}/als/out/M/ " +
+            "--numRecommendations 6 --maxRating 5");
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
 
     // check that error has been calculated
@@ -134,6 +142,10 @@ public class TestMahoutExamples {
     // print the error
     sh.exec("hadoop fs -cat ${WORK_DIR}/als/rmse/rmse.txt");
     assertEquals("Unexpected error from running hadoop", 0, sh.getRet());
+
+    // check that recommendations has been calculated
+    sh.exec("hadoop fs -test -e ${WORK_DIR}/recommendations/part-m-00000");
+    assertEquals("${WORK_DIR}/recommendations/part-m-00000 does not exist", 0, sh.getRet());
   }
 
   // it's too much of a pain to use junit parameterized tests, so do it
@@ -147,37 +159,41 @@ public class TestMahoutExamples {
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
   }
 
-  @Test
+  @Test(timeout=900000L)
   public void clusterControlDataWithCanopy() {
     _clusterSyntheticControlData("canopy");
   }
 
-  @Test
+  @Test(timeout=900000L)
   public void clusterControlDataWithKMeans() {
     _clusterSyntheticControlData("kmeans");
   }
 
-  @Test
+  @Test(timeout=900000L)
   public void clusterControlDataWithFuzzyKMeans() {
     _clusterSyntheticControlData("fuzzykmeans");
   }
 
-  @Test
+  @Test(timeout=900000L)
   public void clusterControlDataWithDirichlet() {
     _clusterSyntheticControlData("dirichlet");
   }
 
-  @Test
+  @Test(timeout=900000L)
   public void clusterControlDataWithMeanShift() {
     _clusterSyntheticControlData("meanshift");
   }
 
-  @Test
+  @Test(timeout=7200000L)
   public void testReutersLDA() {
     // where does lda.algorithm come in?
     sh.exec("mahout org.apache.lucene.benchmark.utils.ExtractReuters ${TEMP_DIR}/reuters-sgm ${TEMP_DIR}/reuters-out");
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
-    sh.exec("MAHOUT_LOCAL=true mahout seqdirectory -i ${TEMP_DIR}/reuters-out -o ${TEMP_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5");
+    //put ${TEMP_DIR}/reuters-out into hdfs as we have to run seqdirectory in mapreduce mode, so files need be in hdfs
+    sh.exec("hadoop fs -put ${TEMP_DIR}/reuters-out ${WORK_DIR}/reuters-out");
+    assertEquals("Unable to put reuters-out-seqdir in hdfs", 0, sh.getRet());   
+
+    sh.exec("mahout seqdirectory -i ${TEMP_DIR}/reuters-out -o ${TEMP_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5");
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
     /*
     // reuters-out-seqdir exists on a local disk at this point,
@@ -189,11 +205,15 @@ public class TestMahoutExamples {
     sh.exec("""mahout seq2sparse \
     -i ${WORK_DIR}/reuters-out-seqdir/ \
     -o ${WORK_DIR}/reuters-out-seqdir-sparse-lda \
-    -wt tf -seq -nr 3 \
-  && \
-  mahout lda \
+    -wt tf -seq -nr 3 --namedVector""");
+    assertEquals("Unexpected error from running mahout", 0, sh.getRet());
+
+    sh.exec("hadoop fs -mkdir ${WORK_DIR}/reuters-lda");
+    assertEquals("Unable to make dir reuters-lda in hdfs", 0, sh.getRet());
+
+    sh.exec("""mahout lda \
     -i ${WORK_DIR}/reuters-out-seqdir-sparse-lda/tf-vectors \
-    -o ${WORK_DIR}/reuters-lda -k 20 -v 50000 -ow -x 20 \
+    -o ${WORK_DIR}/reuters-lda -k 20 -x 20 \
   && \
   mahout ldatopics \
     -i ${WORK_DIR}/reuters-lda/state-20 \
@@ -202,7 +222,7 @@ public class TestMahoutExamples {
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
   }
 
-  @Test
+  @Test(timeout=1200000L)
   public void testBayesNewsgroupClassifier() {
     sh.exec("""mahout org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups \
   -p ${TEMP_DIR}/20news-bydate/20news-bydate-train \
@@ -218,27 +238,27 @@ public class TestMahoutExamples {
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
 
     // put bayes-train-input and bayes-test-input in hdfs
+    sh.exec("hadoop fs -mkdir ${WORK_DIR}/20news-bydate");
     sh.exec("hadoop fs -put ${TEMP_DIR}/20news-bydate/bayes-train-input ${WORK_DIR}/20news-bydate/bayes-train-input");
     assertEquals("Unable to put bayes-train-input in hdfs", 0, sh.getRet());
     sh.exec("hadoop fs -put ${TEMP_DIR}/20news-bydate/bayes-test-input ${WORK_DIR}/20news-bydate/bayes-test-input");
     assertEquals("Unable to put bayes-test-input in hdfs", 0, sh.getRet());
 
     sh.exec("""mahout trainclassifier \
-  -i ${WORK_DIR}/20news-bydate/bayes-train-input \
-  -o ${WORK_DIR}/20news-bydate/bayes-model \
-  -type bayes \
-  -ng 1 \
-  -source hdfs""");
+-i ${WORK_DIR}/20news-bydate/bayes-train-input \
+-o ${WORK_DIR}/20news-bydate/bayes-model \
+-type bayes \
+-ng 1 \
+-source hdfs""");
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
     sh.exec("""mahout testclassifier \
-  -m ${WORK_DIR}/20news-bydate/bayes-model \
-  -d ${WORK_DIR}/20news-bydate/bayes-test-input \
-  -type bayes \
-  -ng 1 \
-  -source hdfs \
-  -method mapreduce""");
+-m ${WORK_DIR}/20news-bydate/bayes-model \
+-d ${WORK_DIR}/20news-bydate/bayes-test-input \
+-type bayes \
+-ng 1 \
+-source hdfs \
+-method mapreduce""");
     assertEquals("Unexpected error from running mahout", 0, sh.getRet());
 
   }
-
 }