You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/12/07 12:21:16 UTC
svn commit: r483420 - in /lucene/nutch/trunk: lib/hadoop-0.7.1.jar
lib/hadoop-0.9.1.jar src/java/org/apache/nutch/crawl/CrawlDb.java
src/java/org/apache/nutch/parse/ParseOutputFormat.java
src/test/org/apache/nutch/crawl/TestMapWritable.java
Author: ab
Date: Thu Dec 7 03:21:08 2006
New Revision: 483420
URL: http://svn.apache.org/viewvc?view=rev&rev=483420
Log:
Upgrade to Hadoop 0.9.1 .
Added:
lucene/nutch/trunk/lib/hadoop-0.9.1.jar (with props)
Removed:
lucene/nutch/trunk/lib/hadoop-0.7.1.jar
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java
Added: lucene/nutch/trunk/lib/hadoop-0.9.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.9.1.jar?view=auto&rev=483420
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/hadoop-0.9.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Thu Dec 7 03:21:08 2006
@@ -115,11 +115,13 @@
FileSystem fs = new JobClient(job).getFs();
Path old = new Path(crawlDb, "old");
Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
- fs.delete(old);
- fs.rename(current, old);
+ if (fs.exists(current)) {
+ if (fs.exists(old)) fs.delete(old);
+ fs.rename(current, old);
+ }
fs.mkdirs(crawlDb);
fs.rename(newCrawlDb, current);
- fs.delete(old);
+ if (fs.exists(old)) fs.delete(old);
}
public static void main(String[] args) throws Exception {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu Dec 7 03:21:08 2006
@@ -22,6 +22,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.fetcher.Fetcher;
import org.apache.hadoop.fs.*;
@@ -68,13 +69,13 @@
new Path(new Path(job.getOutputPath(), CrawlDatum.PARSE_DIR_NAME), name);
final MapFile.Writer textOut =
- new MapFile.Writer(fs, text.toString(), Text.class, ParseText.class);
+ new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD);
final MapFile.Writer dataOut =
- new MapFile.Writer(fs, data.toString(), Text.class,ParseData.class,true);
+ new MapFile.Writer(job, fs, data.toString(), Text.class,ParseData.class);
final SequenceFile.Writer crawlOut =
- new SequenceFile.Writer(fs, crawl, Text.class, CrawlDatum.class);
+ SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class);
return new RecordWriter() {
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?view=diff&rev=483420&r1=483419&r2=483420
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Thu Dec 7 03:21:08 2006
@@ -106,8 +106,8 @@
FileSystem fs = FileSystem.get(configuration);
Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile");
fs.delete(file);
- org.apache.hadoop.io.SequenceFile.Writer writer = new SequenceFile.Writer(
- fs, file, IntWritable.class, MapWritable.class);
+ org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(
+ fs, configuration, file, IntWritable.class, MapWritable.class);
// write map
System.out.println("start writing map's");
long start = System.currentTimeMillis();
@@ -139,8 +139,8 @@
fs.delete(file);
// Text
- System.out.println("start writing utf8's");
- writer = new SequenceFile.Writer(fs, file, IntWritable.class, Text.class);
+ System.out.println("start writing Text's");
+ writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class);
// write map
start = System.currentTimeMillis();
key = new IntWritable();
@@ -153,17 +153,17 @@
}
needed = System.currentTimeMillis() - start;
writer.close();
- System.out.println("needed time for writing utf8's: " + needed);
+ System.out.println("needed time for writing Text's: " + needed);
// read map
- System.out.println("start reading utf8's");
+ System.out.println("start reading Text's");
reader = new SequenceFile.Reader(fs, file, configuration);
start = System.currentTimeMillis();
while (reader.next(key, value)) {
}
needed = System.currentTimeMillis() - start;
- System.out.println("needed time for reading utf8: " + needed);
+ System.out.println("needed time for reading Text: " + needed);
fs.delete(file);
}