You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/10/27 05:20:12 UTC

[2/4] mahout git commit: MAHOUT-1775 FileNotFoundException caused by aborting the process of downloading Wikipedia dataset, closes apache/mahout# 162

MAHOUT-1775 FileNotFoundException caused by aborting the process of downloading Wikipedia dataset, closes apache/mahout# 162


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/d53f0a5d
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/d53f0a5d
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/d53f0a5d

Branch: refs/heads/flink-binding
Commit: d53f0a5d78000045bb12e90e3a6808cc2c369450
Parents: e943b0a
Author: smarthi <sm...@apache.org>
Authored: Sun Oct 25 00:29:47 2015 -0400
Committer: smarthi <sm...@apache.org>
Committed: Sun Oct 25 09:57:46 2015 -0400

----------------------------------------------------------------------
 examples/bin/classify-wikipedia.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/d53f0a5d/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh
index 470a81c..68487dc 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -63,6 +63,8 @@ if [ "x$alg" != "xclean" ]; then
   mkdir -p ${WORK_DIR}
     if [ ! -e ${WORK_DIR}/wikixml ]; then
         mkdir -p ${WORK_DIR}/wikixml
+    fi
+    if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2 ]; then
         echo "Downloading wikipedia XML dump"
         ########################################################   
         #  Datasets: uncomment and run "clean" to change dataset   
@@ -74,10 +76,11 @@ if [ "x$alg" != "xclean" ]; then
         ######### full wikipedia dump: 10G zipped
         #curl http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 -o ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2
         ########################################################
-      
-      echo "Extracting..."
+    fi
+    if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml ]; then
+        echo "Extracting..."
        
-      cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd ..
+        cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd ..
     fi
 
 echo $START_PATH
@@ -186,4 +189,4 @@ elif [ "x$alg" == "xclean" ]; then
   rm -rf $WORK_DIR
   $DFSRM $WORK_DIR
 fi
-# Remove the work directory
\ No newline at end of file
+# Remove the work directory