You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/05/31 23:23:46 UTC
svn commit: r543264 - in /lucene/nutch/trunk: ./
src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/
src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/segment/
Author: ab
Date: Thu May 31 14:23:45 2007
New Revision: 543264
URL: http://svn.apache.org/viewvc?view=rev&rev=543264
Log:
NUTCH-392 - OutputFormat implementations should pass on Progressable.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Thu May 31 14:23:45 2007
@@ -26,6 +26,10 @@
9. NUTCH-61 - Support for adaptive re-fetch interval and detection of
unmodified content. (ab)
+
+10. NUTCH-392 - OutputFormat implementations should pass on Progressable.
+ (cutting via ab)
+
Release 0.9 - 2007-04-02
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java Thu May 31 14:23:45 2007
@@ -28,6 +28,7 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
@@ -58,7 +59,8 @@
new Path(new Path(job.getOutputPath(), Content.DIR_NAME), name);
final MapFile.Writer fetchOut =
- new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class);
+ new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
+ CompressionType.NONE, progress);
return new RecordWriter() {
private MapFile.Writer contentOut;
@@ -67,11 +69,12 @@
{
if (Fetcher.isStoringContent(job)) {
contentOut = new MapFile.Writer(job, fs, content.toString(),
- Text.class, Content.class);
+ Text.class, Content.class,
+ CompressionType.NONE, progress);
}
if (Fetcher.isParsing(job)) {
- parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, null);
+ parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Thu May 31 14:23:45 2007
@@ -60,7 +60,7 @@
public static class OutputFormat
extends org.apache.hadoop.mapred.OutputFormatBase {
public RecordWriter getRecordWriter(final FileSystem fs, JobConf job,
- String name, Progressable progress) throws IOException {
+ String name, final Progressable progress) throws IOException {
final Path perm = new Path(job.getOutputPath(), name);
final Path temp =
job.getLocalPath("index/_"+Integer.toString(new Random().nextInt()));
@@ -95,6 +95,7 @@
" (" + doc.get("lang") + ")");
}
writer.addDocument(doc, analyzer);
+ progress.progress();
}
public void close(final Reporter reporter) throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu May 31 14:23:45 2007
@@ -68,13 +68,16 @@
new Path(new Path(job.getOutputPath(), CrawlDatum.PARSE_DIR_NAME), name);
final MapFile.Writer textOut =
- new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class, CompressionType.RECORD);
+ new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class,
+ CompressionType.RECORD, progress);
final MapFile.Writer dataOut =
- new MapFile.Writer(job, fs, data.toString(), Text.class,ParseData.class);
+ new MapFile.Writer(job, fs, data.toString(), Text.class, ParseData.class,
+ CompressionType.RECORD, progress);
final SequenceFile.Writer crawlOut =
- SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class);
+ SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class,
+ CompressionType.NONE, progress);
return new RecordWriter() {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Thu May 31 14:23:45 2007
@@ -36,6 +36,7 @@
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
@@ -237,7 +238,7 @@
} else {
wname = new Path(new Path(new Path(job.getOutputPath(), segmentName + "-" + slice), dirName), name);
}
- res = new SequenceFile.Writer(fs, job, wname, Text.class, CrawlDatum.class);
+ res = new SequenceFile.Writer(fs, job, wname, Text.class, CrawlDatum.class, progress, new SequenceFile.Metadata());
sliceWriters.put(slice + dirName, res);
return res;
}
@@ -253,7 +254,7 @@
} else {
wname = new Path(new Path(new Path(job.getOutputPath(), segmentName + "-" + slice), dirName), name);
}
- res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz);
+ res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz, CompressionType.RECORD, progress);
sliceWriters.put(slice + dirName, res);
return res;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?view=diff&rev=543264&r1=543263&r2=543264
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu May 31 14:23:45 2007
@@ -94,7 +94,7 @@
/** Implements a text output format */
public static class TextOutputFormat extends org.apache.hadoop.mapred.OutputFormatBase {
- public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name, Progressable progress) throws IOException {
+ public RecordWriter getRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException {
final Path segmentDumpFile = new Path(job.getOutputPath(), name);