You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2010/07/19 14:38:56 UTC
svn commit: r965456 - in
/nutch/branches/nutchbase/src/java/org/apache/nutch: crawl/CrawlDatum.java
crawl/CrawlDb.java crawl/DbUpdateReducer.java crawl/MapWritable.java
crawl/NutchWritable.java crawl/WebTableReader.java fetcher/FetcherOutput.java
Author: jnioche
Date: Mon Jul 19 12:38:55 2010
New Revision: 965456
URL: http://svn.apache.org/viewvc?rev=965456&view=rev
Log:
removed old Nutch classes, mostly Writables
Removed:
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/CrawlDatum.java
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/CrawlDb.java
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/MapWritable.java
nutch/branches/nutchbase/src/java/org/apache/nutch/fetcher/FetcherOutput.java
Modified:
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java
nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java
Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java Mon Jul 19 12:38:55 2010
@@ -23,6 +23,8 @@ import org.gora.mapreduce.GoraReducer;
public class DbUpdateReducer
extends GoraReducer<String, NutchWritable, String, WebPage> {
+ public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";
+
public static final Log LOG = DbUpdaterJob.LOG;
private int retryMax;
@@ -36,7 +38,7 @@ extends GoraReducer<String, NutchWritabl
protected void setup(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
retryMax = conf.getInt("db.fetch.retry.max", 3);
- additionsAllowed = conf.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
+ additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
maxInterval = conf.getInt("db.fetch.interval.max", 0 );
schedule = FetchScheduleFactory.getFetchSchedule(conf);
scoringFilters = new ScoringFilters(conf);
Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java Mon Jul 19 12:38:55 2010
@@ -25,30 +25,8 @@ public class NutchWritable extends Gener
static {
CLASSES = new Class[] {
- org.apache.hadoop.io.NullWritable.class,
- org.apache.hadoop.io.LongWritable.class,
- org.apache.hadoop.io.BytesWritable.class,
- org.apache.hadoop.io.FloatWritable.class,
- org.apache.hadoop.io.IntWritable.class,
- org.apache.hadoop.io.Text.class,
- org.apache.hadoop.io.MD5Hash.class,
- org.apache.nutch.crawl.CrawlDatum.class,
- org.apache.nutch.crawl.Inlink.class,
- org.apache.nutch.crawl.Inlinks.class,
- org.apache.nutch.crawl.MapWritable.class,
- org.apache.nutch.fetcher.FetcherOutput.class,
- org.apache.nutch.metadata.Metadata.class,
- org.apache.nutch.parse.Outlink.class,
- org.apache.nutch.parse.ParseText.class,
- org.apache.nutch.parse.ParseData.class,
- org.apache.nutch.parse.ParseStatus.class,
- org.apache.nutch.protocol.Content.class,
- org.apache.nutch.protocol.ProtocolStatus.class,
-// org.apache.nutch.searcher.Hit.class,
-// org.apache.nutch.searcher.HitDetails.class,
-// org.apache.nutch.searcher.Hits.class,
org.apache.nutch.scoring.ScoreDatum.class,
- org.apache.nutch.util.WebPageWritable.class,
+ org.apache.nutch.util.WebPageWritable.class
};
}
Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java Mon Jul 19 12:38:55 2010
@@ -16,7 +16,6 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
@@ -37,6 +36,7 @@ import org.gora.mapreduce.GoraMapper;
import org.gora.query.Query;
import org.gora.query.Result;
import org.gora.store.DataStore;
+import org.apache.hadoop.hbase.util.Bytes;
/**
* Displays information about the entries of the webtable
@@ -281,7 +281,7 @@ public class WebTableReader extends Conf
LOG.info(" " + st[2] + " :\t" + val);
else
LOG.info(st[0] + " " + code + " ("
- + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
+ + CrawlStatus.getName((byte) code) + "):\t" + val);
} else
LOG.info(k + ":\t" + val);
}