You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2010/07/19 14:38:56 UTC

svn commit: r965456 - in /nutch/branches/nutchbase/src/java/org/apache/nutch: crawl/CrawlDatum.java crawl/CrawlDb.java crawl/DbUpdateReducer.java crawl/MapWritable.java crawl/NutchWritable.java crawl/WebTableReader.java fetcher/FetcherOutput.java

Author: jnioche
Date: Mon Jul 19 12:38:55 2010
New Revision: 965456

URL: http://svn.apache.org/viewvc?rev=965456&view=rev
Log:
removed old Nutch classes, mostly Writables

Removed:
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/CrawlDatum.java
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/CrawlDb.java
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/MapWritable.java
    nutch/branches/nutchbase/src/java/org/apache/nutch/fetcher/FetcherOutput.java
Modified:
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java
    nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java

Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/DbUpdateReducer.java Mon Jul 19 12:38:55 2010
@@ -23,6 +23,8 @@ import org.gora.mapreduce.GoraReducer;
 public class DbUpdateReducer
 extends GoraReducer<String, NutchWritable, String, WebPage> {
 
+  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";	
+	
   public static final Log LOG = DbUpdaterJob.LOG;
 
   private int retryMax;
@@ -36,7 +38,7 @@ extends GoraReducer<String, NutchWritabl
   protected void setup(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     retryMax = conf.getInt("db.fetch.retry.max", 3);
-    additionsAllowed = conf.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
+    additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
     maxInterval = conf.getInt("db.fetch.interval.max", 0 );
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);

Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/NutchWritable.java Mon Jul 19 12:38:55 2010
@@ -25,30 +25,8 @@ public class NutchWritable extends Gener
 
   static {
     CLASSES = new Class[] {
-      org.apache.hadoop.io.NullWritable.class,
-      org.apache.hadoop.io.LongWritable.class,
-      org.apache.hadoop.io.BytesWritable.class,
-      org.apache.hadoop.io.FloatWritable.class,
-      org.apache.hadoop.io.IntWritable.class,
-      org.apache.hadoop.io.Text.class,
-      org.apache.hadoop.io.MD5Hash.class,
-      org.apache.nutch.crawl.CrawlDatum.class,
-      org.apache.nutch.crawl.Inlink.class,
-      org.apache.nutch.crawl.Inlinks.class,
-      org.apache.nutch.crawl.MapWritable.class,
-      org.apache.nutch.fetcher.FetcherOutput.class,
-      org.apache.nutch.metadata.Metadata.class,
-      org.apache.nutch.parse.Outlink.class,
-      org.apache.nutch.parse.ParseText.class,
-      org.apache.nutch.parse.ParseData.class,
-      org.apache.nutch.parse.ParseStatus.class,
-      org.apache.nutch.protocol.Content.class,
-      org.apache.nutch.protocol.ProtocolStatus.class,
-//      org.apache.nutch.searcher.Hit.class,
-//      org.apache.nutch.searcher.HitDetails.class,
-//      org.apache.nutch.searcher.Hits.class,
       org.apache.nutch.scoring.ScoreDatum.class,
-      org.apache.nutch.util.WebPageWritable.class,
+      org.apache.nutch.util.WebPageWritable.class
     };
   }
 

Modified: nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=965456&r1=965455&r2=965456&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java (original)
+++ nutch/branches/nutchbase/src/java/org/apache/nutch/crawl/WebTableReader.java Mon Jul 19 12:38:55 2010
@@ -16,7 +16,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
@@ -37,6 +36,7 @@ import org.gora.mapreduce.GoraMapper;
 import org.gora.query.Query;
 import org.gora.query.Result;
 import org.gora.store.DataStore;
+import org.apache.hadoop.hbase.util.Bytes;
 
 /**
  * Displays information about the entries of the webtable
@@ -281,7 +281,7 @@ public class WebTableReader extends Conf
             LOG.info("   " + st[2] + " :\t" + val);
           else
             LOG.info(st[0] + " " + code + " ("
-                + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
+                + CrawlStatus.getName((byte) code) + "):\t" + val);
         } else
           LOG.info(k + ":\t" + val);
       }