You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2006/06/02 18:12:37 UTC
svn commit: r411201 -
/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
Author: siren
Date: Fri Jun 2 09:12:36 2006
New Revision: 411201
URL: http://svn.apache.org/viewvc?rev=411201&view=rev
Log:
removed usage of deprecated hadoop api
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=411201&r1=411200&r2=411201&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Fri Jun 2 09:12:36 2006
@@ -16,7 +16,6 @@
package org.apache.nutch.crawl;
-import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.logging.Logger;
@@ -24,6 +23,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.*;
@@ -103,20 +103,20 @@
super(conf);
}
- public void merge(File output, File[] dbs, boolean filter) throws Exception {
+ public void merge(Path output, Path[] dbs, boolean filter) throws Exception {
JobConf job = createMergeJob(getConf(), output);
job.setBoolean("crawldb.merger.urlfilters", filter);
for (int i = 0; i < dbs.length; i++) {
- job.addInputDir(new File(dbs[i], CrawlDatum.DB_DIR_NAME));
+ job.addInputPath(new Path(dbs[i], CrawlDatum.DB_DIR_NAME));
}
JobClient.runJob(job);
FileSystem fs = FileSystem.get(getConf());
fs.mkdirs(output);
- fs.rename(job.getOutputDir(), new File(output, CrawlDatum.DB_DIR_NAME));
+ fs.rename(job.getOutputPath(), new Path(output, CrawlDatum.DB_DIR_NAME));
}
- public static JobConf createMergeJob(Configuration conf, File output) {
- File newCrawlDb = new File("crawldb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+ public static JobConf createMergeJob(Configuration conf, Path output) {
+ Path newCrawlDb = new Path("crawldb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
JobConf job = new NutchJob(conf);
job.setJobName("crawldb merge " + output);
@@ -127,7 +127,7 @@
job.setReducerClass(Merger.class);
- job.setOutputDir(newCrawlDb);
+ job.setOutputPath(newCrawlDb);
job.setOutputFormat(MapFileOutputFormat.class);
job.setOutputKeyClass(UTF8.class);
job.setOutputValueClass(CrawlDatum.class);
@@ -147,7 +147,7 @@
return;
}
Configuration conf = NutchConfiguration.create();
- File output = new File(args[0]);
+ Path output = new Path(args[0]);
ArrayList dbs = new ArrayList();
boolean filter = false;
for (int i = 1; i < args.length; i++) {
@@ -155,9 +155,9 @@
filter = true;
continue;
}
- dbs.add(new File(args[i]));
+ dbs.add(new Path(args[i]));
}
CrawlDbMerger merger = new CrawlDbMerger(conf);
- merger.merge(output, (File[]) dbs.toArray(new File[dbs.size()]), filter);
+ merger.merge(output, (Path[]) dbs.toArray(new Path[dbs.size()]), filter);
}
}