You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2008/12/02 15:47:37 UTC
svn commit: r722477 - in /lucene/nutch/trunk: ./ lib/
lib/native/Linux-amd64-64/ lib/native/Linux-i386-32/
src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/segment/
Author: kubes
Date: Tue Dec 2 06:47:35 2008
New Revision: 722477
URL: http://svn.apache.org/viewvc?rev=722477&view=rev
Log:
NUTCH-663: Upgrade Nutch to use Hadoop 0.19
Added:
lucene/nutch/trunk/lib/hadoop-0.19.0-core.jar (with props)
Removed:
lucene/nutch/trunk/lib/hadoop-0.18.1-core.jar
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a
lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so
lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1
lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0
lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a
lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so
lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1
lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec 2 06:47:35 2008
@@ -285,6 +285,8 @@
(dogacan)
105. NUTCH-662 - Upgrade Nutch to use Lucene 2.4. (kubes)
+
+106. NUTCH-663 - Upgrade Nutch to use Hadoop 0.19 (kubes)
Release 0.9 - 2007-04-02
Added: lucene/nutch/trunk/lib/hadoop-0.19.0-core.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.19.0-core.jar?rev=722477&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/hadoop-0.19.0-core.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.a?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-amd64-64/libhadoop.so.1.0.0?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.a?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/native/Linux-i386-32/libhadoop.so.1.0.0?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Tue Dec 2 06:47:35 2008
@@ -58,12 +58,13 @@
/** Don't split inputs, to keep things polite. */
public InputSplit[] getSplits(JobConf job, int nSplits)
throws IOException {
- Path[] files = listPaths(job);
+ FileStatus[] files = listStatus(job);
FileSystem fs = FileSystem.get(job);
InputSplit[] splits = new InputSplit[files.length];
for (int i = 0; i < files.length; i++) {
- splits[i] = new FileSplit(files[i], 0,
- fs.getFileStatus(files[i]).getLen(), (String[])null);
+ FileStatus cur = files[i];
+ splits[i] = new FileSplit(cur.getPath(), 0,
+ cur.getLen(), (String[])null);
}
return splits;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java Tue Dec 2 06:47:35 2008
@@ -92,12 +92,13 @@
/** Don't split inputs, to keep things polite. */
public InputSplit[] getSplits(JobConf job, int nSplits)
throws IOException {
- Path[] files = listPaths(job);
+ FileStatus[] files = listStatus(job);
FileSplit[] splits = new FileSplit[files.length];
FileSystem fs = FileSystem.get(job);
for (int i = 0; i < files.length; i++) {
- splits[i] = new FileSplit(files[i], 0,
- fs.getFileStatus(files[i]).getLen(), (String[])null);
+ FileStatus cur = files[i];
+ splits[i] = new FileSplit(cur.getPath(), 0,
+ cur.getLen(), (String[])null);
}
return splits;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Tue Dec 2 06:47:35 2008
@@ -161,7 +161,6 @@
try {
return new SequenceFileRecordReader<Text, MetaWrapper>(job, fSplit) {
- @Override
public synchronized boolean next(Text key, MetaWrapper wrapper) throws IOException {
LOG.debug("Running OIF.next()");
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=722477&r1=722476&r2=722477&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Tue Dec 2 06:47:35 2008
@@ -47,6 +47,7 @@
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
@@ -203,21 +204,21 @@
JobConf job = createJobConf();
job.setJobName("read " + segment);
- if (ge) job.addInputPath(new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
- if (fe) job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
- if (pa) job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
- if (co) job.addInputPath(new Path(segment, Content.DIR_NAME));
- if (pd) job.addInputPath(new Path(segment, ParseData.DIR_NAME));
- if (pt) job.addInputPath(new Path(segment, ParseText.DIR_NAME));
+ if (ge) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+ if (fe) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));
+ if (pa) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));
+ if (co) FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
+ if (pd) FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
+ if (pt) FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(InputCompatMapper.class);
job.setReducerClass(SegmentReader.class);
Path tempDir = new Path(job.get("hadoop.tmp.dir", "/tmp") + "/segread-" + new java.util.Random().nextInt());
- fs.delete(tempDir);
+ fs.delete(tempDir, true);
- job.setOutputPath(tempDir);
+ FileOutputFormat.setOutputPath(job, tempDir);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NutchWritable.class);