You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/12/30 20:13:06 UTC
svn commit: r491291 - in /lucene/nutch/trunk/src/java/org/apache/nutch:
crawl/LinkDb.java indexer/Indexer.java
Author: ab
Date: Sat Dec 30 11:13:06 2006
New Revision: 491291
URL: http://svn.apache.org/viewvc?view=rev&rev=491291
Log:
Fix two bugs reported by Dogacan Guney.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?view=diff&rev=491291&r1=491290&r2=491291
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Sat Dec 30 11:13:06 2006
@@ -214,6 +214,7 @@
Path lock = new Path(linkDb, LOCK_NAME);
FileSystem fs = FileSystem.get(getConf());
LockUtil.createLockFile(fs, lock, force);
+ Path currentLinkDb = new Path(linkDb, CURRENT_NAME);
if (LOG.isInfoEnabled()) {
LOG.info("LinkDb: starting");
LOG.info("LinkDb: linkdb: " + linkDb);
@@ -233,14 +234,14 @@
LockUtil.removeLockFile(fs, lock);
throw e;
}
- if (fs.exists(linkDb)) {
+ if (fs.exists(currentLinkDb)) {
if (LOG.isInfoEnabled()) {
LOG.info("LinkDb: merging with existing linkdb: " + linkDb);
}
// try to merge
Path newLinkDb = job.getOutputPath();
job = LinkDb.createMergeJob(getConf(), linkDb, normalize, filter);
- job.addInputPath(new Path(linkDb, CURRENT_NAME));
+ job.addInputPath(currentLinkDb);
job.addInputPath(newLinkDb);
try {
JobClient.runJob(job);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?view=diff&rev=491291&r1=491290&r2=491291
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Sat Dec 30 11:13:06 2006
@@ -190,20 +190,12 @@
inlinks = (Inlinks)value;
} else if (value instanceof CrawlDatum) {
CrawlDatum datum = (CrawlDatum)value;
- switch (datum.getStatus()) {
- case CrawlDatum.STATUS_DB_UNFETCHED:
- case CrawlDatum.STATUS_DB_FETCHED:
- case CrawlDatum.STATUS_DB_GONE:
+ if (CrawlDatum.hasDbStatus(datum))
dbDatum = datum;
- break;
- case CrawlDatum.STATUS_FETCH_SUCCESS:
- case CrawlDatum.STATUS_FETCH_RETRY:
- case CrawlDatum.STATUS_FETCH_GONE:
+ else if (CrawlDatum.hasFetchStatus(datum))
fetchDatum = datum;
- break;
- default:
+ else
throw new RuntimeException("Unexpected status: "+datum.getStatus());
- }
} else if (value instanceof ParseData) {
parseData = (ParseData)value;
} else if (value instanceof ParseText) {