You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/10/27 05:20:12 UTC
[2/4] mahout git commit: MAHOUT-1775 FileNotFoundException caused by
aborting the process of downloading Wikipedia dataset,
closes apache/mahout# 162
MAHOUT-1775 FileNotFoundException caused by aborting the process of downloading Wikipedia dataset, closes apache/mahout# 162
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/d53f0a5d
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/d53f0a5d
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/d53f0a5d
Branch: refs/heads/flink-binding
Commit: d53f0a5d78000045bb12e90e3a6808cc2c369450
Parents: e943b0a
Author: smarthi <sm...@apache.org>
Authored: Sun Oct 25 00:29:47 2015 -0400
Committer: smarthi <sm...@apache.org>
Committed: Sun Oct 25 09:57:46 2015 -0400
----------------------------------------------------------------------
examples/bin/classify-wikipedia.sh | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/d53f0a5d/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 470a81c..68487dc 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -63,6 +63,8 @@ if [ "x$alg" != "xclean" ]; then
mkdir -p ${WORK_DIR}
if [ ! -e ${WORK_DIR}/wikixml ]; then
mkdir -p ${WORK_DIR}/wikixml
+ fi
+ if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2 ]; then
echo "Downloading wikipedia XML dump"
########################################################
# Datasets: uncomment and run "clean" to change dataset
@@ -74,10 +76,11 @@ if [ "x$alg" != "xclean" ]; then
######### full wikipedia dump: 10G zipped
#curl http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 -o ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2
########################################################
-
- echo "Extracting..."
+ fi
+ if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml ]; then
+ echo "Extracting..."
- cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd ..
+ cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd ..
fi
echo $START_PATH
@@ -186,4 +189,4 @@ elif [ "x$alg" == "xclean" ]; then
rm -rf $WORK_DIR
$DFSRM $WORK_DIR
fi
-# Remove the work directory
\ No newline at end of file
+# Remove the work directory