You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ak...@apache.org on 2016/03/20 03:59:24 UTC

[1/4] mahout git commit: Support environment variable MAHOUT_WORK_DIR to allow users to specify an alternate temporary directory in examples.

Repository: mahout
Updated Branches:
  refs/heads/master 23267a0be -> b25a70a1b


Support environment variable MAHOUT_WORK_DIR to allow users to
specify an alternate temporary directory in examples.


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/89e19c75
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/89e19c75
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/89e19c75

Branch: refs/heads/master
Commit: 89e19c75e80fd9523af5eb208cc13674db7a847f
Parents: 99d8ce2
Author: Albert Chu <ch...@llnl.gov>
Authored: Mon Nov 16 16:43:42 2015 -0800
Committer: Albert Chu <ch...@llnl.gov>
Committed: Thu Nov 19 13:45:45 2015 -0800

----------------------------------------------------------------------
 examples/bin/classify-20newsgroups.sh    | 6 +++++-
 examples/bin/classify-wikipedia.sh       | 6 +++++-
 examples/bin/cluster-reuters.sh          | 6 +++++-
 examples/bin/cluster-syntheticcontrol.sh | 6 +++++-
 examples/bin/factorize-movielens-1M.sh   | 9 +++++++--
 examples/bin/factorize-netflix.sh        | 6 +++++-
 6 files changed, 32 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index c58e9a0..6d7ab45 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -36,7 +36,11 @@ START_PATH=`pwd`
 # Set commands for dfs
 source ${START_PATH}/set-dfs-commands.sh
 
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-${USER}
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
 algorithm=( cnaivebayes-MapReduce naivebayes-MapReduce cnaivebayes-Spark naivebayes-Spark sgd clean)
 if [ -n "$1" ]; then
   choice=$1

http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 68487dc..686c99d 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -42,7 +42,11 @@ START_PATH=`pwd`
 # Set commands for dfs
 source ${START_PATH}/set-dfs-commands.sh
 
-WORK_DIR=/tmp/mahout-work-wiki
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-wiki
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
 algorithm=( CBayes BinaryCBayes clean)
 if [ -n "$1" ]; then
   choice=$1

http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/cluster-reuters.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh
index d53aa00..6c42ab9 100755
--- a/examples/bin/cluster-reuters.sh
+++ b/examples/bin/cluster-reuters.sh
@@ -43,7 +43,11 @@ if [ ! -e $MAHOUT ]; then
   exit 1
 fi
 
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-${USER}
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
 
 algorithm=( kmeans fuzzykmeans lda streamingkmeans clean)
 if [ -n "$1" ]; then

http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/cluster-syntheticcontrol.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-syntheticcontrol.sh b/examples/bin/cluster-syntheticcontrol.sh
index eab62be..5e1240f 100755
--- a/examples/bin/cluster-syntheticcontrol.sh
+++ b/examples/bin/cluster-syntheticcontrol.sh
@@ -48,7 +48,11 @@ START_PATH=`pwd`
 # Set commands for dfs
 source ${START_PATH}/set-dfs-commands.sh
 
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-${USER}
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
 
 echo "creating work directory at ${WORK_DIR}"
 mkdir -p ${WORK_DIR}

http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/factorize-movielens-1M.sh
----------------------------------------------------------------------
diff --git a/examples/bin/factorize-movielens-1M.sh b/examples/bin/factorize-movielens-1M.sh
index 735e425..29730e1 100755
--- a/examples/bin/factorize-movielens-1M.sh
+++ b/examples/bin/factorize-movielens-1M.sh
@@ -43,7 +43,12 @@ fi
 export MAHOUT_LOCAL=true
 MAHOUT="$MAHOUT_HOME/bin/mahout"
 
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-${USER}
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
+
 echo "creating work directory at ${WORK_DIR}"
 mkdir -p ${WORK_DIR}/movielens
 
@@ -77,4 +82,4 @@ shuf ${WORK_DIR}/recommendations/part-m-00000 |head
 echo -e "\n\n"
 
 echo "removing work directory"
-rm -rf ${WORK_DIR}
\ No newline at end of file
+rm -rf ${WORK_DIR}

http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/factorize-netflix.sh
----------------------------------------------------------------------
diff --git a/examples/bin/factorize-netflix.sh b/examples/bin/factorize-netflix.sh
index 856f775..26faf66 100755
--- a/examples/bin/factorize-netflix.sh
+++ b/examples/bin/factorize-netflix.sh
@@ -45,7 +45,11 @@ fi
 
 MAHOUT="../../bin/mahout"
 
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+  WORK_DIR=/tmp/mahout-work-${USER}
+else
+  WORK_DIR=$MAHOUT_WORK_DIR
+fi
 
 START_PATH=`pwd`
 


[2/4] mahout git commit: In examples, specify -p option to HDFS -mkdir option when potentially necessary.

Posted by ak...@apache.org.
In examples, specify -p option to HDFS -mkdir option when potentially
necessary.


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/59910108
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/59910108
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/59910108

Branch: refs/heads/master
Commit: 599101083ab7d31600b8c93b2efd8cea4c0a30bf
Parents: 89e19c7
Author: Albert Chu <ch...@llnl.gov>
Authored: Thu Nov 19 13:46:05 2015 -0800
Committer: Albert Chu <ch...@llnl.gov>
Committed: Thu Nov 19 13:46:05 2015 -0800

----------------------------------------------------------------------
 examples/bin/classify-20newsgroups.sh | 2 +-
 examples/bin/classify-wikipedia.sh    | 2 +-
 examples/bin/cluster-reuters.sh       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index 6d7ab45..f47d5c5 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -109,7 +109,7 @@ if  ( [ "x$alg" == "xnaivebayes-MapReduce" ] ||  [ "x$alg" == "xcnaivebayes-MapR
     echo "Copying 20newsgroups data to HDFS"
     set +e
     $DFSRM ${WORK_DIR}/20news-all
-    $DFS -mkdir ${WORK_DIR}
+    $DFS -mkdir -p ${WORK_DIR}
     $DFS -mkdir ${WORK_DIR}/20news-all
     set -e
     if [ $HVERSION -eq "1" ] ; then

http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 686c99d..8a7889f 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -114,7 +114,7 @@ if [ "x$alg" == "xCBayes" ] || [ "x$alg" == "xBinaryCBayes" ] ; then
     echo "Copying wikipedia data to HDFS"
     set +e
     $DFSRM ${WORK_DIR}/wikixml
-    $DFS -mkdir ${WORK_DIR}
+    $DFS -mkdir -p ${WORK_DIR}
     set -e
     $DFS -put ${WORK_DIR}/wikixml ${WORK_DIR}/wikixml
   fi

http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/cluster-reuters.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh
index 6c42ab9..49f6c94 100755
--- a/examples/bin/cluster-reuters.sh
+++ b/examples/bin/cluster-reuters.sh
@@ -102,7 +102,7 @@ if [ ! -e ${WORK_DIR}/reuters-out-seqdir ]; then
         set +e
         $DFSRM ${WORK_DIR}/reuters-sgm
         $DFSRM ${WORK_DIR}/reuters-out
-        $DFS -mkdir ${WORK_DIR}/
+        $DFS -mkdir -p ${WORK_DIR}/
         $DFS -mkdir ${WORK_DIR}/reuters-sgm
         $DFS -mkdir ${WORK_DIR}/reuters-out
         $DFS -put ${WORK_DIR}/reuters-sgm ${WORK_DIR}/reuters-sgm


[4/4] mahout git commit: Adding instructions for MAHOUT-1794 to the readme.

Posted by ak...@apache.org.
Adding instructions for MAHOUT-1794 to the readme.


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/b25a70a1
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/b25a70a1
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/b25a70a1

Branch: refs/heads/master
Commit: b25a70a1bc6b9f8cb6c89947e0eaba5588463652
Parents: 67f0093
Author: Andrew Musselman <ak...@apache.org>
Authored: Sat Mar 19 14:46:48 2016 -0700
Committer: Andrew Musselman <ak...@apache.org>
Committed: Sat Mar 19 14:46:48 2016 -0700

----------------------------------------------------------------------
 examples/bin/README.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/b25a70a1/examples/bin/README.txt
----------------------------------------------------------------------
diff --git a/examples/bin/README.txt b/examples/bin/README.txt
index 503a687..7ad3a38 100644
--- a/examples/bin/README.txt
+++ b/examples/bin/README.txt
@@ -1,5 +1,8 @@
 This directory contains helpful shell scripts for working with some of Mahout's examples.  
 
+To set a non-default temporary work directory: `export MAHOUT_WORK_DIR=/path/in/hdfs/to/temp/dir`
+  Note that this requires the same path to be writable both on the local file system as well as on HDFS.
+
 Here's a description of what each does:
 
 classify-20newsgroups.sh -- Run SGD and Bayes classifiers over the classic 20 News Groups.  Downloads the data set automatically.
@@ -7,4 +10,4 @@ cluster-reuters.sh -- Cluster the Reuters data set using a variety of algorithms
 cluster-syntheticcontrol.sh -- Cluster the Synthetic Control data set.  Downloads the data set automatically.
 factorize-movielens-1m.sh -- Run the Alternating Least Squares Recommender on the Grouplens data set (size 1M).
 factorize-netflix.sh -- (Deprecated due to lack of availability of the data set) Run the ALS Recommender on the Netflix data set.
-spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text.
\ No newline at end of file
+spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text.


[3/4] mahout git commit: Merge branch 'mahoutworkdir' into MAHOUT-1794; pulling PR into a branch to work on.

Posted by ak...@apache.org.
Merge branch 'mahoutworkdir' into MAHOUT-1794; pulling PR into a branch to work on.


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/67f00930
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/67f00930
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/67f00930

Branch: refs/heads/master
Commit: 67f00930a8412be39de2c7b2abc9936f634f13d6
Parents: 23267a0 5991010
Author: Andrew Musselman <ak...@apache.org>
Authored: Thu Mar 17 18:34:41 2016 -0700
Committer: Andrew Musselman <ak...@apache.org>
Committed: Thu Mar 17 18:34:41 2016 -0700

----------------------------------------------------------------------
 examples/bin/classify-20newsgroups.sh    | 8 ++++++--
 examples/bin/classify-wikipedia.sh       | 8 ++++++--
 examples/bin/cluster-reuters.sh          | 8 ++++++--
 examples/bin/cluster-syntheticcontrol.sh | 6 +++++-
 examples/bin/factorize-movielens-1M.sh   | 9 +++++++--
 examples/bin/factorize-netflix.sh        | 6 +++++-
 6 files changed, 35 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/67f00930/examples/bin/cluster-syntheticcontrol.sh
----------------------------------------------------------------------