You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/12/07 12:21:16 UTC

svn commit: r483420 - in /lucene/nutch/trunk: lib/hadoop-0.7.1.jar lib/hadoop-0.9.1.jar src/java/org/apache/nutch/crawl/CrawlDb.java src/java/org/apache/nutch/parse/ParseOutputFormat.java src/test/org/apache/nutch/crawl/TestMapWritable.java

Author: ab
Date: Thu Dec  7 03:21:08 2006
New Revision: 483420

URL: http://svn.apache.org/viewvc?view=rev&rev=483420
Log:
Upgrade to Hadoop 0.9.1 .

Added:
    lucene/nutch/trunk/lib/hadoop-0.9.1.jar   (with props)
Removed:
    lucene/nutch/trunk/lib/hadoop-0.7.1.jar
Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
    lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java

Added: lucene/nutch/trunk/lib/hadoop-0.9.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.9.1.jar?view=auto&rev=483420
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/hadoop-0.9.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Thu Dec  7 03:21:08 2006
@@ -115,11 +115,13 @@
     FileSystem fs = new JobClient(job).getFs();
     Path old = new Path(crawlDb, "old");
     Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
-    fs.delete(old);
-    fs.rename(current, old);
+    if (fs.exists(current)) {
+      if (fs.exists(old)) fs.delete(old);
+      fs.rename(current, old);
+    }
     fs.mkdirs(crawlDb);
     fs.rename(newCrawlDb, current);
-    fs.delete(old);
+    if (fs.exists(old)) fs.delete(old);
   }
 
   public static void main(String[] args) throws Exception {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu Dec  7 03:21:08 2006
@@ -22,6 +22,7 @@
 import org.apache.commons.logging.LogFactory;
 
 import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.fetcher.Fetcher;
 import org.apache.hadoop.fs.*;
@@ -68,13 +69,13 @@
       new Path(new Path(job.getOutputPath(), CrawlDatum.PARSE_DIR_NAME), name);
     
     final MapFile.Writer textOut =
-      new MapFile.Writer(fs, text.toString(), Text.class, ParseText.class);
+      new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD);
     
     final MapFile.Writer dataOut =
-      new MapFile.Writer(fs, data.toString(), Text.class,ParseData.class,true);
+      new MapFile.Writer(job, fs, data.toString(), Text.class,ParseData.class);
     
     final SequenceFile.Writer crawlOut =
-      new SequenceFile.Writer(fs, crawl, Text.class, CrawlDatum.class);
+      SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class);
     
     return new RecordWriter() {
 

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Thu Dec  7 03:21:08 2006
@@ -106,8 +106,8 @@
     FileSystem fs = FileSystem.get(configuration);
     Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile");
     fs.delete(file);
-    org.apache.hadoop.io.SequenceFile.Writer writer = new SequenceFile.Writer(
-        fs, file, IntWritable.class, MapWritable.class);
+    org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(
+        fs, configuration, file, IntWritable.class, MapWritable.class);
     // write map
     System.out.println("start writing map's");
     long start = System.currentTimeMillis();
@@ -139,8 +139,8 @@
     fs.delete(file);
 
     // Text
-    System.out.println("start writing utf8's");
-    writer = new SequenceFile.Writer(fs, file, IntWritable.class, Text.class);
+    System.out.println("start writing Text's");
+    writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class);
     // write map
     start = System.currentTimeMillis();
     key = new IntWritable();
@@ -153,17 +153,17 @@
     }
     needed = System.currentTimeMillis() - start;
     writer.close();
-    System.out.println("needed time for writing utf8's: " + needed);
+    System.out.println("needed time for writing Text's: " + needed);
 
     // read map
-    System.out.println("start reading utf8's");
+    System.out.println("start reading Text's");
     reader = new SequenceFile.Reader(fs, file, configuration);
     start = System.currentTimeMillis();
     while (reader.next(key, value)) {
 
     }
     needed = System.currentTimeMillis() - start;
-    System.out.println("needed time for reading utf8: " + needed);
+    System.out.println("needed time for reading Text: " + needed);
     fs.delete(file);
   }