You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/12/30 20:13:06 UTC

svn commit: r491291 - in /lucene/nutch/trunk/src/java/org/apache/nutch: crawl/LinkDb.java indexer/Indexer.java

Author: ab
Date: Sat Dec 30 11:13:06 2006
New Revision: 491291

URL: http://svn.apache.org/viewvc?view=rev&rev=491291
Log:
Fix two bugs reported by Dogacan Guney.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?view=diff&rev=491291&r1=491290&r2=491291
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Sat Dec 30 11:13:06 2006
@@ -214,6 +214,7 @@
     Path lock = new Path(linkDb, LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
+    Path currentLinkDb = new Path(linkDb, CURRENT_NAME);
     if (LOG.isInfoEnabled()) {
       LOG.info("LinkDb: starting");
       LOG.info("LinkDb: linkdb: " + linkDb);
@@ -233,14 +234,14 @@
       LockUtil.removeLockFile(fs, lock);
       throw e;
     }
-    if (fs.exists(linkDb)) {
+    if (fs.exists(currentLinkDb)) {
       if (LOG.isInfoEnabled()) {
         LOG.info("LinkDb: merging with existing linkdb: " + linkDb);
       }
       // try to merge
       Path newLinkDb = job.getOutputPath();
       job = LinkDb.createMergeJob(getConf(), linkDb, normalize, filter);
-      job.addInputPath(new Path(linkDb, CURRENT_NAME));
+      job.addInputPath(currentLinkDb);
       job.addInputPath(newLinkDb);
       try {
         JobClient.runJob(job);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?view=diff&rev=491291&r1=491290&r2=491291
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Sat Dec 30 11:13:06 2006
@@ -190,20 +190,12 @@
         inlinks = (Inlinks)value;
       } else if (value instanceof CrawlDatum) {
         CrawlDatum datum = (CrawlDatum)value;
-        switch (datum.getStatus()) {
-        case CrawlDatum.STATUS_DB_UNFETCHED:
-        case CrawlDatum.STATUS_DB_FETCHED:
-        case CrawlDatum.STATUS_DB_GONE:
+        if (CrawlDatum.hasDbStatus(datum))
           dbDatum = datum;
-          break;
-        case CrawlDatum.STATUS_FETCH_SUCCESS:
-        case CrawlDatum.STATUS_FETCH_RETRY:
-        case CrawlDatum.STATUS_FETCH_GONE:
+        else if (CrawlDatum.hasFetchStatus(datum))
           fetchDatum = datum;
-          break;
-        default:
+        else
           throw new RuntimeException("Unexpected status: "+datum.getStatus());
-        }
       } else if (value instanceof ParseData) {
         parseData = (ParseData)value;
       } else if (value instanceof ParseText) {