You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ak...@apache.org on 2016/03/20 03:59:24 UTC
[1/4] mahout git commit: Support environment variable MAHOUT_WORK_DIR
to allow users to specify an alternate temporary directory in examples.
Repository: mahout
Updated Branches:
refs/heads/master 23267a0be -> b25a70a1b
Support environment variable MAHOUT_WORK_DIR to allow users to
specify an alternate temporary directory in examples.
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/89e19c75
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/89e19c75
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/89e19c75
Branch: refs/heads/master
Commit: 89e19c75e80fd9523af5eb208cc13674db7a847f
Parents: 99d8ce2
Author: Albert Chu <ch...@llnl.gov>
Authored: Mon Nov 16 16:43:42 2015 -0800
Committer: Albert Chu <ch...@llnl.gov>
Committed: Thu Nov 19 13:45:45 2015 -0800
----------------------------------------------------------------------
examples/bin/classify-20newsgroups.sh | 6 +++++-
examples/bin/classify-wikipedia.sh | 6 +++++-
examples/bin/cluster-reuters.sh | 6 +++++-
examples/bin/cluster-syntheticcontrol.sh | 6 +++++-
examples/bin/factorize-movielens-1M.sh | 9 +++++++--
examples/bin/factorize-netflix.sh | 6 +++++-
6 files changed, 32 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index c58e9a0..6d7ab45 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -36,7 +36,11 @@ START_PATH=`pwd`
# Set commands for dfs
source ${START_PATH}/set-dfs-commands.sh
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-${USER}
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
algorithm=( cnaivebayes-MapReduce naivebayes-MapReduce cnaivebayes-Spark naivebayes-Spark sgd clean)
if [ -n "$1" ]; then
choice=$1
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 68487dc..686c99d 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -42,7 +42,11 @@ START_PATH=`pwd`
# Set commands for dfs
source ${START_PATH}/set-dfs-commands.sh
-WORK_DIR=/tmp/mahout-work-wiki
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-wiki
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
algorithm=( CBayes BinaryCBayes clean)
if [ -n "$1" ]; then
choice=$1
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/cluster-reuters.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh
index d53aa00..6c42ab9 100755
--- a/examples/bin/cluster-reuters.sh
+++ b/examples/bin/cluster-reuters.sh
@@ -43,7 +43,11 @@ if [ ! -e $MAHOUT ]; then
exit 1
fi
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-${USER}
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
algorithm=( kmeans fuzzykmeans lda streamingkmeans clean)
if [ -n "$1" ]; then
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/cluster-syntheticcontrol.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-syntheticcontrol.sh b/examples/bin/cluster-syntheticcontrol.sh
index eab62be..5e1240f 100755
--- a/examples/bin/cluster-syntheticcontrol.sh
+++ b/examples/bin/cluster-syntheticcontrol.sh
@@ -48,7 +48,11 @@ START_PATH=`pwd`
# Set commands for dfs
source ${START_PATH}/set-dfs-commands.sh
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-${USER}
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
echo "creating work directory at ${WORK_DIR}"
mkdir -p ${WORK_DIR}
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/factorize-movielens-1M.sh
----------------------------------------------------------------------
diff --git a/examples/bin/factorize-movielens-1M.sh b/examples/bin/factorize-movielens-1M.sh
index 735e425..29730e1 100755
--- a/examples/bin/factorize-movielens-1M.sh
+++ b/examples/bin/factorize-movielens-1M.sh
@@ -43,7 +43,12 @@ fi
export MAHOUT_LOCAL=true
MAHOUT="$MAHOUT_HOME/bin/mahout"
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-${USER}
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
+
echo "creating work directory at ${WORK_DIR}"
mkdir -p ${WORK_DIR}/movielens
@@ -77,4 +82,4 @@ shuf ${WORK_DIR}/recommendations/part-m-00000 |head
echo -e "\n\n"
echo "removing work directory"
-rm -rf ${WORK_DIR}
\ No newline at end of file
+rm -rf ${WORK_DIR}
http://git-wip-us.apache.org/repos/asf/mahout/blob/89e19c75/examples/bin/factorize-netflix.sh
----------------------------------------------------------------------
diff --git a/examples/bin/factorize-netflix.sh b/examples/bin/factorize-netflix.sh
index 856f775..26faf66 100755
--- a/examples/bin/factorize-netflix.sh
+++ b/examples/bin/factorize-netflix.sh
@@ -45,7 +45,11 @@ fi
MAHOUT="../../bin/mahout"
-WORK_DIR=/tmp/mahout-work-${USER}
+if [[ -z "$MAHOUT_WORK_DIR" ]]; then
+ WORK_DIR=/tmp/mahout-work-${USER}
+else
+ WORK_DIR=$MAHOUT_WORK_DIR
+fi
START_PATH=`pwd`
[2/4] mahout git commit: In examples,
specify -p option to HDFS -mkdir option when potentially necessary.
Posted by ak...@apache.org.
In examples, specify -p option to HDFS -mkdir option when potentially
necessary.
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/59910108
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/59910108
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/59910108
Branch: refs/heads/master
Commit: 599101083ab7d31600b8c93b2efd8cea4c0a30bf
Parents: 89e19c7
Author: Albert Chu <ch...@llnl.gov>
Authored: Thu Nov 19 13:46:05 2015 -0800
Committer: Albert Chu <ch...@llnl.gov>
Committed: Thu Nov 19 13:46:05 2015 -0800
----------------------------------------------------------------------
examples/bin/classify-20newsgroups.sh | 2 +-
examples/bin/classify-wikipedia.sh | 2 +-
examples/bin/cluster-reuters.sh | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/classify-20newsgroups.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-20newsgroups.sh b/examples/bin/classify-20newsgroups.sh
index 6d7ab45..f47d5c5 100755
--- a/examples/bin/classify-20newsgroups.sh
+++ b/examples/bin/classify-20newsgroups.sh
@@ -109,7 +109,7 @@ if ( [ "x$alg" == "xnaivebayes-MapReduce" ] || [ "x$alg" == "xcnaivebayes-MapR
echo "Copying 20newsgroups data to HDFS"
set +e
$DFSRM ${WORK_DIR}/20news-all
- $DFS -mkdir ${WORK_DIR}
+ $DFS -mkdir -p ${WORK_DIR}
$DFS -mkdir ${WORK_DIR}/20news-all
set -e
if [ $HVERSION -eq "1" ] ; then
http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 686c99d..8a7889f 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -114,7 +114,7 @@ if [ "x$alg" == "xCBayes" ] || [ "x$alg" == "xBinaryCBayes" ] ; then
echo "Copying wikipedia data to HDFS"
set +e
$DFSRM ${WORK_DIR}/wikixml
- $DFS -mkdir ${WORK_DIR}
+ $DFS -mkdir -p ${WORK_DIR}
set -e
$DFS -put ${WORK_DIR}/wikixml ${WORK_DIR}/wikixml
fi
http://git-wip-us.apache.org/repos/asf/mahout/blob/59910108/examples/bin/cluster-reuters.sh
----------------------------------------------------------------------
diff --git a/examples/bin/cluster-reuters.sh b/examples/bin/cluster-reuters.sh
index 6c42ab9..49f6c94 100755
--- a/examples/bin/cluster-reuters.sh
+++ b/examples/bin/cluster-reuters.sh
@@ -102,7 +102,7 @@ if [ ! -e ${WORK_DIR}/reuters-out-seqdir ]; then
set +e
$DFSRM ${WORK_DIR}/reuters-sgm
$DFSRM ${WORK_DIR}/reuters-out
- $DFS -mkdir ${WORK_DIR}/
+ $DFS -mkdir -p ${WORK_DIR}/
$DFS -mkdir ${WORK_DIR}/reuters-sgm
$DFS -mkdir ${WORK_DIR}/reuters-out
$DFS -put ${WORK_DIR}/reuters-sgm ${WORK_DIR}/reuters-sgm
[4/4] mahout git commit: Adding instructions for MAHOUT-1794 to the
readme.
Posted by ak...@apache.org.
Adding instructions for MAHOUT-1794 to the readme.
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/b25a70a1
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/b25a70a1
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/b25a70a1
Branch: refs/heads/master
Commit: b25a70a1bc6b9f8cb6c89947e0eaba5588463652
Parents: 67f0093
Author: Andrew Musselman <ak...@apache.org>
Authored: Sat Mar 19 14:46:48 2016 -0700
Committer: Andrew Musselman <ak...@apache.org>
Committed: Sat Mar 19 14:46:48 2016 -0700
----------------------------------------------------------------------
examples/bin/README.txt | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/b25a70a1/examples/bin/README.txt
----------------------------------------------------------------------
diff --git a/examples/bin/README.txt b/examples/bin/README.txt
index 503a687..7ad3a38 100644
--- a/examples/bin/README.txt
+++ b/examples/bin/README.txt
@@ -1,5 +1,8 @@
This directory contains helpful shell scripts for working with some of Mahout's examples.
+To set a non-default temporary work directory: `export MAHOUT_WORK_DIR=/path/in/hdfs/to/temp/dir`
+ Note that this requires the same path to be writable both on the local file system as well as on HDFS.
+
Here's a description of what each does:
classify-20newsgroups.sh -- Run SGD and Bayes classifiers over the classic 20 News Groups. Downloads the data set automatically.
@@ -7,4 +10,4 @@ cluster-reuters.sh -- Cluster the Reuters data set using a variety of algorithms
cluster-syntheticcontrol.sh -- Cluster the Synthetic Control data set. Downloads the data set automatically.
factorize-movielens-1m.sh -- Run the Alternating Least Squares Recommender on the Grouplens data set (size 1M).
factorize-netflix.sh -- (Deprecated due to lack of availability of the data set) Run the ALS Recommender on the Netflix data set.
-spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text.
\ No newline at end of file
+spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text.
[3/4] mahout git commit: Merge branch 'mahoutworkdir' into
MAHOUT-1794; pulling PR into a branch to work on.
Posted by ak...@apache.org.
Merge branch 'mahoutworkdir' into MAHOUT-1794; pulling PR into a branch to work on.
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/67f00930
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/67f00930
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/67f00930
Branch: refs/heads/master
Commit: 67f00930a8412be39de2c7b2abc9936f634f13d6
Parents: 23267a0 5991010
Author: Andrew Musselman <ak...@apache.org>
Authored: Thu Mar 17 18:34:41 2016 -0700
Committer: Andrew Musselman <ak...@apache.org>
Committed: Thu Mar 17 18:34:41 2016 -0700
----------------------------------------------------------------------
examples/bin/classify-20newsgroups.sh | 8 ++++++--
examples/bin/classify-wikipedia.sh | 8 ++++++--
examples/bin/cluster-reuters.sh | 8 ++++++--
examples/bin/cluster-syntheticcontrol.sh | 6 +++++-
examples/bin/factorize-movielens-1M.sh | 9 +++++++--
examples/bin/factorize-netflix.sh | 6 +++++-
6 files changed, 35 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/67f00930/examples/bin/cluster-syntheticcontrol.sh
----------------------------------------------------------------------