You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/09/27 23:57:10 UTC
svn commit: r1628010 - in /nutch/trunk: CHANGES.txt src/bin/crawl
Author: lewismc
Date: Sat Sep 27 21:57:10 2014
New Revision: 1628010
URL: http://svn.apache.org/r1628010
Log:
NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/bin/crawl
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Sep 27 21:57:10 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl (lewismc)
+
* NUTCH-1844 testresources/testcrawl not referenced anywhere in code (mattmann)
* NUTCH-1839 Improve WebGraph CLI parsing (lewismc)
Modified: nutch/trunk/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Sat Sep 27 21:57:10 2014
@@ -203,7 +203,45 @@ do
else
echo "Skipping indexing: no SOLR url provided."
fi
+
+ #######################################################
+ # The following commands fall into WebGraph territory
+ # and should be uncommented based on your requirements
+ #######################################################
+ #echo "Building WebGraph within $CRAWL_PATH on all segments in $CRAWL_PATH/segments/"
+ #"$bin/nutch" webgraph $commonOptions -filter -normalize -segmentDir "$CRAWL_PATH"/segments/ -webgraphdb "$CRAWL_PATH"
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running Loops Job on WebGraph within $CRAWL_PATH"
+ #"$bin/nutch" org.apache.nutch.scoring.webgraph.Loops $commonOptions -webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running LinkRank Algorithm on WebGraph within $CRAWL_PATH"
+ #"$bin/nutch" linkrank $commonOptions -webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running ScoreUpdater Job with $CRAWL_PATH/crawldb and WebGraph within $CRAWL_PATH"
+ #"$bin/nutch" scoreupdater $commonOptions -crawldb "$CRAWL_PATH"/crawldb -webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running NodeDumper on WebGraph within $CRAWL_PATH and dumping output to $CRAWL_PATH/dump/scores"
+ #"$bin/nutch" nodedumper $commonOptions -scores -topn 1000 -webgraphdb "$CRAWL_PATH" -output "$CRAWL_PATH"/dump/scores
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
done
exit 0