You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/09/27 23:57:10 UTC

svn commit: r1628010 - in /nutch/trunk: CHANGES.txt src/bin/crawl

Author: lewismc
Date: Sat Sep 27 21:57:10 2014
New Revision: 1628010

URL: http://svn.apache.org/r1628010
Log:
NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/bin/crawl

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Sep 27 21:57:10 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl (lewismc)
+
 * NUTCH-1844 testresources/testcrawl not referenced anywhere in code (mattmann)
 
 * NUTCH-1839 Improve WebGraph CLI parsing (lewismc)

Modified: nutch/trunk/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Sat Sep 27 21:57:10 2014
@@ -203,7 +203,45 @@ do
   else
       echo "Skipping indexing: no SOLR url provided."
   fi
+  
+  #######################################################
+  # The following commands fall into WebGraph territory
+  # and should be uncommented based on your requirements
+  #######################################################
+  #echo "Building WebGraph within $CRAWL_PATH on all segments in $CRAWL_PATH/segments/"
+  #"$bin/nutch" webgraph $commonOptions -filter -normalize -segmentDir "$CRAWL_PATH"/segments/ -webgraphdb "$CRAWL_PATH"
 
+  #if [ $? -ne 0 ]
+  # then exit $?
+  #fi
+
+  #echo "Running Loops Job on WebGraph within $CRAWL_PATH"
+  #"$bin/nutch" org.apache.nutch.scoring.webgraph.Loops $commonOptions -webgraphdb "$CRAWL_PATH"
+
+  #if [ $? -ne 0 ]
+  # then exit $?
+  #fi
+
+  #echo "Running LinkRank Algorithm on WebGraph within $CRAWL_PATH"
+  #"$bin/nutch" linkrank $commonOptions -webgraphdb "$CRAWL_PATH"
+
+  #if [ $? -ne 0 ]
+  # then exit $?
+  #fi
+
+  #echo "Running ScoreUpdater Job with $CRAWL_PATH/crawldb and  WebGraph within $CRAWL_PATH"
+  #"$bin/nutch" scoreupdater $commonOptions -crawldb "$CRAWL_PATH"/crawldb -webgraphdb "$CRAWL_PATH"
+
+  #if [ $? -ne 0 ]
+  # then exit $?
+  #fi
+
+  #echo "Running NodeDumper on WebGraph within $CRAWL_PATH and dumping output to $CRAWL_PATH/dump/scores"
+  #"$bin/nutch" nodedumper $commonOptions -scores -topn 1000 -webgraphdb "$CRAWL_PATH" -output "$CRAWL_PATH"/dump/scores
+
+  #if [ $? -ne 0 ]
+  # then exit $?
+  #fi
 done
 
 exit 0