You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/06/22 14:20:33 UTC
svn commit: r416346 [1/3] - in /lucene/nutch/trunk/src:
java/org/apache/nutch/analysis/ java/org/apache/nutch/clustering/
java/org/apache/nutch/crawl/ java/org/apache/nutch/fetcher/
java/org/apache/nutch/indexer/ java/org/apache/nutch/net/ java/org/apa...
Author: jerome
Date: Thu Jun 22 05:20:29 2006
New Revision: 416346
URL: http://svn.apache.org/viewvc?rev=416346&view=rev
Log:
NUTCH-309 : Added logging code guards
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java
lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/mime/MimeTypesReader.java
lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
lucene/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/jena/OntologyImpl.java
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java Thu Jun 22 05:20:29 2006
@@ -157,13 +157,17 @@
TokenStream ts = new NutchDocumentTokenizer(new StringReader(line));
Token token = ts.next();
if (token == null) {
- LOG.warn("Line does not contain a field name: " + line);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Line does not contain a field name: " + line);
+ }
continue;
}
String field = token.termText();
token = ts.next();
if (token == null) {
- LOG.warn("Line contains only a field name, no word: " + line);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Line contains only a field name, no word: " + line);
+ }
continue;
}
String gram = token.termText();
@@ -208,7 +212,10 @@
/** Optimizes phrase queries to use n-grams when possible. */
public String[] optimizePhrase(Phrase phrase, String field) {
- //LOG.info("Optimizing " + phrase + " for " + field);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Optimizing " + phrase + " for " + field);
+ }
ArrayList result = new ArrayList();
TokenStream ts = getFilter(new ArrayTokens(phrase), field);
Token token, prev=null;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java Thu Jun 22 05:20:29 2006
@@ -76,18 +76,24 @@
if (extensionName != null) {
Extension extension = findExtension(extensionName);
if (extension != null) {
- LOG.info("Using clustering extension: " + extensionName);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using clustering extension: " + extensionName);
+ }
return (OnlineClusterer) extension.getExtensionInstance();
}
- LOG.warn("Clustering extension not found: '" + extensionName
- + "', trying the default");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Clustering extension not found: '" + extensionName +
+ "', trying the default");
+ }
// not found, fallback to the default, if available.
}
final Extension[] extensions = this.extensionPoint.getExtensions();
if (extensions.length > 0) {
- LOG.info("Using the first clustering extension found: "
- + extensions[0].getId());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using the first clustering extension found: " +
+ extensions[0].getId());
+ }
return (OnlineClusterer) extensions[0].getExtensionInstance();
} else {
return null;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Thu Jun 22 05:20:29 2006
@@ -84,14 +84,15 @@
throw new RuntimeException(dir + " already exists.");
}
- LOG.info("crawl started in: " + dir);
- LOG.info("rootUrlDir = " + rootUrlDir);
- LOG.info("threads = " + threads);
- LOG.info("depth = " + depth);
-
- if (topN != Integer.MAX_VALUE)
- LOG.info("topN = " + topN);
-
+ if (LOG.isInfoEnabled()) {
+ LOG.info("crawl started in: " + dir);
+ LOG.info("rootUrlDir = " + rootUrlDir);
+ LOG.info("threads = " + threads);
+ LOG.info("depth = " + depth);
+ if (topN != Integer.MAX_VALUE)
+ LOG.info("topN = " + topN);
+ }
+
Path crawlDb = new Path(dir + "/crawldb");
Path linkDb = new Path(dir + "/linkdb");
Path segments = new Path(dir + "/segments");
@@ -121,6 +122,6 @@
new DeleteDuplicates(job).dedup(new Path[] { indexes });
new IndexMerger(fs, fs.listPaths(indexes), index, tmpDir, job).merge();
- LOG.info("crawl finished: " + dir);
+ if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Thu Jun 22 05:20:29 2006
@@ -45,19 +45,24 @@
}
public void update(Path crawlDb, Path segment) throws IOException {
- LOG.info("CrawlDb update: starting");
- LOG.info("CrawlDb update: db: " + crawlDb);
- LOG.info("CrawlDb update: segment: " + segment);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb update: starting");
+ LOG.info("CrawlDb update: db: " + crawlDb);
+ LOG.info("CrawlDb update: segment: " + segment);
+ }
JobConf job = CrawlDb.createJob(getConf(), crawlDb);
job.addInputPath(new Path(segment, CrawlDatum.FETCH_DIR_NAME));
job.addInputPath(new Path(segment, CrawlDatum.PARSE_DIR_NAME));
- LOG.info("CrawlDb update: Merging segment data into db.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb update: Merging segment data into db.");
+ }
JobClient.runJob(job);
CrawlDb.install(job, crawlDb);
- LOG.info("CrawlDb update: done");
+ if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); }
}
public static JobConf createJob(Configuration config, Path crawlDb) {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Thu Jun 22 05:20:29 2006
@@ -70,7 +70,9 @@
if (filters.filter(((UTF8) key).toString()) == null)
return;
} catch (Exception e) {
- LOG.debug("Can't filter " + key + ": " + e);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Can't filter " + key + ": " + e);
+ }
}
}
CrawlDatum res = null;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Thu Jun 22 05:20:29 2006
@@ -205,7 +205,11 @@
}
public void processStatJob(String crawlDb, Configuration config) throws IOException {
- LOG.info("CrawlDb statistics start: " + crawlDb);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb statistics start: " + crawlDb);
+ }
+
Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());
JobConf job = new NutchJob(config);
@@ -252,27 +256,30 @@
else if (k.startsWith("avg")) avg++;
}
}
- LOG.info("Statistics for CrawlDb: " + crawlDb);
- Iterator it = stats.keySet().iterator();
- while (it.hasNext()) {
- String k = (String) it.next();
- LongWritable val = (LongWritable) stats.get(k);
- if (k.indexOf("score") != -1) {
- if (k.startsWith("min")) {
- LOG.info(k + ":\t" + (float) ((float) (val.get() / min) / 1000.0f));
- } else if (k.startsWith("max")) {
- LOG.info(k + ":\t" + (float) ((float) (val.get() / max) / 1000.0f));
- } else if (k.startsWith("avg")) {
- LOG.info(k + ":\t" + (float) ((float) (val.get() / avg) / 1000.0f));
- }
- } else if (k.startsWith("status")) {
- int code = Integer.parseInt(k.substring(k.indexOf(' ') + 1));
- LOG.info(k + " (" + CrawlDatum.statNames[code] + "):\t" + val);
- } else LOG.info(k + ":\t" + val);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Statistics for CrawlDb: " + crawlDb);
+ Iterator it = stats.keySet().iterator();
+ while (it.hasNext()) {
+ String k = (String) it.next();
+ LongWritable val = (LongWritable) stats.get(k);
+ if (k.indexOf("score") != -1) {
+ if (k.startsWith("min")) {
+ LOG.info(k + ":\t" + (float) ((float) (val.get() / min) / 1000.0f));
+ } else if (k.startsWith("max")) {
+ LOG.info(k + ":\t" + (float) ((float) (val.get() / max) / 1000.0f));
+ } else if (k.startsWith("avg")) {
+ LOG.info(k + ":\t" + (float) ((float) (val.get() / avg) / 1000.0f));
+ }
+ } else if (k.startsWith("status")) {
+ int code = Integer.parseInt(k.substring(k.indexOf(' ') + 1));
+ LOG.info(k + " (" + CrawlDatum.statNames[code] + "):\t" + val);
+ } else LOG.info(k + ":\t" + val);
+ }
}
// removing the tmp folder
fileSystem.delete(tmpFolder);
- LOG.info("CrawlDb statistics: done");
+ if (LOG.isInfoEnabled()) { LOG.info("CrawlDb statistics: done"); }
}
@@ -296,8 +303,11 @@
public void processDumpJob(String crawlDb, String output, Configuration config) throws IOException {
- LOG.info("CrawlDb dump: starting");
- LOG.info("CrawlDb db: " + crawlDb);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb dump: starting");
+ LOG.info("CrawlDb db: " + crawlDb);
+ }
+
Path outFolder = new Path(output);
JobConf job = new NutchJob(config);
@@ -314,12 +324,16 @@
job.setOutputValueClass(CrawlDatum.class);
JobClient.runJob(job);
- LOG.info("CrawlDb dump: done");
+ if (LOG.isInfoEnabled()) { LOG.info("CrawlDb dump: done"); }
}
public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException {
- LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
- LOG.info("CrawlDb db: " + crawlDb);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
+ LOG.info("CrawlDb db: " + crawlDb);
+ }
+
Path outFolder = new Path(output);
Path tempDir =
new Path(config.get("mapred.temp.dir", ".") +
@@ -343,7 +357,9 @@
job.setLong("CrawlDbReader.topN.min", Math.round(1000000.0 * min));
JobClient.runJob(job);
- LOG.info("CrawlDb topN: collecting topN scores.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CrawlDb topN: collecting topN scores.");
+ }
job = new NutchJob(config);
job.setLong("CrawlDbReader.topN", topN);
@@ -365,7 +381,7 @@
JobClient.runJob(job);
FileSystem fs = FileSystem.get(config);
fs.delete(tempDir);
- LOG.info("CrawlDb topN: done");
+ if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); }
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Thu Jun 22 05:20:29 2006
@@ -108,8 +108,10 @@
try {
scfilters.initialScore((UTF8)key, result);
} catch (ScoringFilterException e) {
- LOG.warn("Cannot filter init score for url " + key +
- ", using default: " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Cannot filter init score for url " + key +
+ ", using default: " + e.getMessage());
+ }
result.setScore(scoreInjected);
}
}
@@ -122,7 +124,9 @@
break;
case CrawlDatum.STATUS_SIGNATURE:
- LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
+ }
return;
case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure
if (old != null)
@@ -147,7 +151,9 @@
try {
scfilters.updateDbScore((UTF8)key, result, linked);
} catch (Exception e) {
- LOG.warn("Couldn't update score, key=" + key + ": " + e);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Couldn't update score, key=" + key + ": " + e);
+ }
}
output.collect(key, result);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Jun 22 05:20:29 2006
@@ -98,7 +98,9 @@
if (filters.filter(url.toString()) == null)
return;
} catch (URLFilterException e) {
- LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
+ }
}
CrawlDatum crawlDatum = (CrawlDatum)value;
@@ -112,7 +114,9 @@
try {
sort = scfilters.generatorSortValue((UTF8)key, crawlDatum, sort);
} catch (ScoringFilterException sfe) {
- LOG.warn("Couldn't filter generatorSortValue for " + key + ": " + sfe);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Couldn't filter generatorSortValue for " + key + ": " + sfe);
+ }
}
// sort by decreasing score
sortValue.set(sort);
@@ -150,9 +154,13 @@
InetAddress ia = InetAddress.getByName(host);
host = ia.getHostAddress();
} catch (UnknownHostException uhe) {
- LOG.debug("DNS lookup failed: " + host + ", skipping.");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("DNS lookup failed: " + host + ", skipping.");
+ }
dnsFailure++;
- if (dnsFailure % 1000 == 0) LOG.warn("DNS failures: " + dnsFailure);
+ if ((dnsFailure % 1000 == 0) && (LOG.isWarnEnabled())) {
+ LOG.warn("DNS failures: " + dnsFailure);
+ }
continue;
}
}
@@ -168,8 +176,10 @@
// skip URL if above the limit per host.
if (hostCount.get() > maxPerHost) {
if (hostCount.get() == maxPerHost + 1) {
- LOG.info("Host "+ host +" has more than "+ maxPerHost +" URLs."+
- " Skipping additional.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Host " + host + " has more than " + maxPerHost +
+ " URLs." + " Skipping additional.");
+ }
}
continue;
}
@@ -257,11 +267,13 @@
Path segment = new Path(segments, generateSegmentName());
Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME);
- LOG.info("Generator: starting");
- LOG.info("Generator: segment: " + segment);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Generator: starting");
+ LOG.info("Generator: segment: " + segment);
+ LOG.info("Generator: Selecting most-linked urls due for fetch.");
+ }
// map to inverted subset due for fetch, sort by link count
- LOG.info("Generator: Selecting most-linked urls due for fetch.");
JobConf job = new NutchJob(getConf());
job.setJobName("generate: select " + segment);
@@ -288,7 +300,9 @@
JobClient.runJob(job);
// invert again, paritition by host, sort by url hash
- LOG.info("Generator: Partitioning selected urls by host, for politeness.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Generator: Partitioning selected urls by host, for politeness.");
+ }
job = new NutchJob(getConf());
job.setJobName("generate: partition " + segment);
@@ -312,7 +326,7 @@
new JobClient(getConf()).getFs().delete(tempDir);
- LOG.info("Generator: done.");
+ if (LOG.isInfoEnabled()) { LOG.info("Generator: done."); }
return segment;
}
@@ -355,8 +369,9 @@
}
}
- if (topN != Long.MAX_VALUE)
+ if ((LOG.isInfoEnabled()) && (topN != Long.MAX_VALUE)) {
LOG.info("topN: " + topN);
+ }
Generator gen = new Generator(NutchConfiguration.create());
gen.generate(dbDir, segmentsDir, numFetchers, topN, curTime);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Jun 22 05:20:29 2006
@@ -70,7 +70,7 @@
url = urlNormalizer.normalize(url); // normalize the url
url = filters.filter(url); // filter the url
} catch (Exception e) {
- LOG.warn("Skipping " +url+":"+e);
+ if (LOG.isWarnEnabled()) { LOG.warn("Skipping " +url+":"+e); }
url = null;
}
if (url != null) { // if it passes
@@ -80,8 +80,10 @@
try {
scfilters.initialScore(value, datum);
} catch (ScoringFilterException e) {
- LOG.warn("Cannot filter init score for url " + url +
- ", using default (" + e.getMessage() + ")");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Cannot filter init score for url " + url +
+ ", using default (" + e.getMessage() + ")");
+ }
datum.setScore(scoreInjected);
}
output.collect(value, datum);
@@ -107,9 +109,12 @@
}
public void inject(Path crawlDb, Path urlDir) throws IOException {
- LOG.info("Injector: starting");
- LOG.info("Injector: crawlDb: " + crawlDb);
- LOG.info("Injector: urlDir: " + urlDir);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Injector: starting");
+ LOG.info("Injector: crawlDb: " + crawlDb);
+ LOG.info("Injector: urlDir: " + urlDir);
+ }
Path tempDir =
new Path(getConf().get("mapred.temp.dir", ".") +
@@ -117,7 +122,9 @@
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
// map text input file to a <url,CrawlDatum> file
- LOG.info("Injector: Converting injected urls to crawl db entries.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Injector: Converting injected urls to crawl db entries.");
+ }
JobConf sortJob = new NutchJob(getConf());
sortJob.setJobName("inject " + urlDir);
sortJob.setInputPath(urlDir);
@@ -131,7 +138,9 @@
JobClient.runJob(sortJob);
// merge with existing crawl db
- LOG.info("Injector: Merging injected urls into crawl db.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Injector: Merging injected urls into crawl db.");
+ }
JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
mergeJob.addInputPath(tempDir);
JobClient.runJob(mergeJob);
@@ -140,7 +149,7 @@
// clean up
FileSystem fs = new JobClient(getConf()).getFs();
fs.delete(tempDir);
- LOG.info("Injector: done");
+ if (LOG.isInfoEnabled()) { LOG.info("Injector: done"); }
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Thu Jun 22 05:20:29 2006
@@ -64,7 +64,9 @@
if (filters.filter(((UTF8)key).toString()) == null)
return;
} catch (Exception e) {
- LOG.debug("Can't filter " + key + ": " + e);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Can't filter " + key + ": " + e);
+ }
}
}
Inlinks inlinks = null;
@@ -85,7 +87,9 @@
if (filters.filter(in.getFromUrl()) == null)
continue;
} catch (Exception e) {
- LOG.debug("Can't filter " + key + ": " + e);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Can't filter " + key + ": " + e);
+ }
}
}
inlinks.add(in);
@@ -193,17 +197,24 @@
}
public void invert(Path linkDb, Path[] segments) throws IOException {
- LOG.info("LinkDb: starting");
- LOG.info("LinkDb: linkdb: " + linkDb);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("LinkDb: starting");
+ LOG.info("LinkDb: linkdb: " + linkDb);
+ }
JobConf job = LinkDb.createJob(getConf(), linkDb);
for (int i = 0; i < segments.length; i++) {
- LOG.info("LinkDb: adding segment: " + segments[i]);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("LinkDb: adding segment: " + segments[i]);
+ }
job.addInputPath(new Path(segments[i], ParseData.DIR_NAME));
}
JobClient.runJob(job);
FileSystem fs = FileSystem.get(getConf());
if (fs.exists(linkDb)) {
- LOG.info("LinkDb: merging with existing linkdb: " + linkDb);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("LinkDb: merging with existing linkdb: " + linkDb);
+ }
// try to merge
Path newLinkDb = job.getOutputPath();
job = LinkDb.createMergeJob(getConf(), linkDb);
@@ -213,7 +224,7 @@
fs.delete(newLinkDb);
}
LinkDb.install(job, linkDb);
- LOG.info("LinkDb: done");
+ if (LOG.isInfoEnabled()) { LOG.info("LinkDb: done"); }
}
private static JobConf createJob(Configuration config, Path linkDb) {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Thu Jun 22 05:20:29 2006
@@ -79,8 +79,11 @@
}
public static void processDumpJob(String linkdb, String output, Configuration config) throws IOException {
- LOG.info("LinkDb dump: starting");
- LOG.info("LinkDb db: " + linkdb);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("LinkDb dump: starting");
+ LOG.info("LinkDb db: " + linkdb);
+ }
Path outFolder = new Path(output);
JobConf job = new NutchJob(config);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Thu Jun 22 05:20:29 2006
@@ -313,7 +313,9 @@
clazz = Class.forName(UTF8.readString(in));
addIdEntry(id, clazz);
} catch (Exception e) {
- LOG.warn("MapWritable: unable to load internal map entry" + e.toString());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to load internal map entry" + e.toString());
+ }
fIdCount--;
}
}
@@ -329,8 +331,10 @@
fLast = fLast.fNextEntry = entry;
}
} catch (IOException e) {
- LOG.warn("MapWritable: unable to load meta data entry, ignoring.. : "
- + e.toString());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to load meta data entry, ignoring.. : " +
+ e.toString());
+ }
fSize--;
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java Thu Jun 22 05:20:29 2006
@@ -41,7 +41,9 @@
Signature impl = (Signature)conf.getObject(clazz);
if (impl == null) {
try {
- LOG.info("Using Signature impl: " + clazz);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using Signature impl: " + clazz);
+ }
Class implClass = Class.forName(clazz);
impl = (Signature)implClass.newInstance();
impl.setConf(conf);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Thu Jun 22 05:20:29 2006
@@ -115,8 +115,10 @@
break; // at eof, exit
}
} catch (IOException e) {
- e.printStackTrace(LogUtil.getFatalStream(LOG));
- LOG.fatal("fetcher caught:"+e.toString());
+ if (LOG.isFatalEnabled()) {
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
+ LOG.fatal("fetcher caught:"+e.toString());
+ }
break;
}
@@ -128,13 +130,16 @@
UTF8 url = new UTF8();
url.set(key);
try {
- LOG.info("fetching " + url); // fetch the page
-
+ if (LOG.isInfoEnabled()) { LOG.info("fetching " + url); }
+
+ // fetch the page
boolean redirecting;
int redirectCount = 0;
do {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("redirectCount=" + redirectCount);
+ }
redirecting = false;
- LOG.debug("redirectCount=" + redirectCount);
Protocol protocol = this.protocolFactory.getProtocol(url.toString());
ProtocolOutput output = protocol.getProtocolOutput(url, datum);
ProtocolStatus status = output.getStatus();
@@ -155,8 +160,10 @@
url = new UTF8(newUrl);
redirecting = true;
redirectCount++;
- LOG.debug(" - content redirect to " + url);
- } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(" - content redirect to " + url);
+ }
+ } else if (LOG.isDebugEnabled()) {
LOG.debug(" - content redirect skipped: " +
(newUrl != null ? "to same url" : "filtered"));
}
@@ -172,8 +179,10 @@
url = new UTF8(newUrl);
redirecting = true;
redirectCount++;
- LOG.debug(" - protocol redirect to " + url);
- } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(" - protocol redirect to " + url);
+ }
+ } else if (LOG.isDebugEnabled()) {
LOG.debug(" - protocol redirect skipped: " +
(newUrl != null ? "to same url" : "filtered"));
}
@@ -195,12 +204,16 @@
break;
default:
- LOG.warn("Unknown ProtocolStatus: " + status.getCode());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unknown ProtocolStatus: " + status.getCode());
+ }
output(url, datum, null, CrawlDatum.STATUS_FETCH_GONE);
}
if (redirecting && redirectCount >= maxRedirect) {
- LOG.info(" - redirect count exceeded " + url);
+ if (LOG.isInfoEnabled()) {
+ LOG.info(" - redirect count exceeded " + url);
+ }
output(url, datum, null, CrawlDatum.STATUS_FETCH_GONE);
}
@@ -215,15 +228,19 @@
}
} catch (Throwable e) {
- e.printStackTrace(LogUtil.getFatalStream(LOG));
- LOG.fatal("fetcher caught:"+e.toString());
+ if (LOG.isFatalEnabled()) {
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
+ LOG.fatal("fetcher caught:"+e.toString());
+ }
} finally {
synchronized (Fetcher.this) {activeThreads--;} // count threads
}
}
private void logError(UTF8 url, String message) {
- LOG.info("fetch of " + url + " failed with: " + message);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("fetch of " + url + " failed with: " + message);
+ }
synchronized (Fetcher.this) { // record failure
errors++;
}
@@ -246,8 +263,10 @@
try {
scfilters.passScoreBeforeParsing(key, datum, content);
} catch (Exception e) {
- e.printStackTrace(LogUtil.getWarnStream(LOG));
- LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
+ if (LOG.isWarnEnabled()) {
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
+ }
}
Parse parse = null;
@@ -260,7 +279,9 @@
parseStatus = new ParseStatus(e);
}
if (!parseStatus.isSuccess()) {
- LOG.warn("Error parsing: " + key + ": " + parseStatus);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Error parsing: " + key + ": " + parseStatus);
+ }
parse = parseStatus.getEmptyParse(getConf());
}
// Calculate page signature. For non-parsing fetchers this will
@@ -274,8 +295,10 @@
try {
scfilters.passScoreAfterParsing(key, content, parse);
} catch (Exception e) {
- e.printStackTrace(LogUtil.getWarnStream(LOG));
- LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
+ if (LOG.isWarnEnabled()) {
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
+ }
}
}
@@ -287,8 +310,10 @@
storingContent ? content : null,
parse != null ? new ParseImpl(parse) : null));
} catch (IOException e) {
- e.printStackTrace(LogUtil.getFatalStream(LOG));
- LOG.fatal("fetcher caught:"+e.toString());
+ if (LOG.isFatalEnabled()) {
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
+ LOG.fatal("fetcher caught:"+e.toString());
+ }
}
if (parse != null) return parse.getData().getStatus();
else return null;
@@ -349,7 +374,7 @@
this.maxRedirect = getConf().getInt("http.redirect.max", 3);
int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
- LOG.info("Fetcher: threads: " + threadCount);
+ if (LOG.isInfoEnabled()) { LOG.info("Fetcher: threads: " + threadCount); }
for (int i = 0; i < threadCount; i++) { // spawn threads
new FetcherThread(getConf()).start();
@@ -367,8 +392,10 @@
// some requests seem to hang, despite all intentions
synchronized (this) {
- if ((System.currentTimeMillis() - lastRequestStart) > timeout) {
- LOG.warn("Aborting with "+activeThreads+" hung threads.");
+ if ((System.currentTimeMillis() - lastRequestStart) > timeout) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Aborting with "+activeThreads+" hung threads.");
+ }
return;
}
}
@@ -380,8 +407,10 @@
public void fetch(Path segment, int threads, boolean parsing)
throws IOException {
- LOG.info("Fetcher: starting");
- LOG.info("Fetcher: segment: " + segment);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Fetcher: starting");
+ LOG.info("Fetcher: segment: " + segment);
+ }
JobConf job = new NutchJob(getConf());
job.setJobName("fetch " + segment);
@@ -406,7 +435,7 @@
job.setOutputValueClass(FetcherOutput.class);
JobClient.runJob(job);
- LOG.info("Fetcher: done");
+ if (LOG.isInfoEnabled()) { LOG.info("Fetcher: done"); }
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Thu Jun 22 05:20:29 2006
@@ -291,7 +291,7 @@
public void dedup(Path[] indexDirs)
throws IOException {
- LOG.info("Dedup: starting");
+ if (LOG.isInfoEnabled()) { LOG.info("Dedup: starting"); }
Path hashDir =
new Path("dedup-hash-"+
@@ -300,7 +300,9 @@
JobConf job = new NutchJob(getConf());
for (int i = 0; i < indexDirs.length; i++) {
- LOG.info("Dedup: adding indexes in: " + indexDirs[i]);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Dedup: adding indexes in: " + indexDirs[i]);
+ }
job.addInputPath(indexDirs[i]);
}
job.setJobName("dedup phase 1");
@@ -342,7 +344,7 @@
new JobClient(getConf()).getFs().delete(hashDir);
- LOG.info("Dedup: done");
+ if (LOG.isInfoEnabled()) { LOG.info("Dedup: done"); }
}
public static void main(String[] args) throws Exception {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Thu Jun 22 05:20:29 2006
@@ -75,7 +75,7 @@
Directory[] dirs = new Directory[indexes.length];
for (int i = 0; i < indexes.length; i++) {
- LOG.info("Adding " + indexes[i]);
+ if (LOG.isInfoEnabled()) { LOG.info("Adding " + indexes[i]); }
dirs[i] = new FsDirectory(fs, indexes[i], false, this.conf);
}
@@ -136,7 +136,9 @@
//
// Merge the indices
//
- LOG.info("merging indexes to: " + outputIndex);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("merging indexes to: " + outputIndex);
+ }
Path[] indexFiles = (Path[])indexDirs.toArray(new Path[indexDirs.size()]);
@@ -148,7 +150,7 @@
IndexMerger merger =
new IndexMerger(fs, indexFiles, outputIndex, workDir, conf);
merger.merge();
- LOG.info("done merging");
+ if (LOG.isInfoEnabled()) { LOG.info("done merging"); }
localFs.delete(workDir);
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Thu Jun 22 05:20:29 2006
@@ -108,9 +108,11 @@
throws IOException { // unwrap & index doc
Document doc = (Document)((ObjectWritable)value).get();
NutchAnalyzer analyzer = factory.get(doc.get("lang"));
- LOG.info(" Indexing [" + doc.getField("url").stringValue() + "]" +
- " with analyzer " + analyzer +
- " (" + doc.get("lang") + ")");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(" Indexing [" + doc.getField("url").stringValue() + "]" +
+ " with analyzer " + analyzer +
+ " (" + doc.get("lang") + ")");
+ }
writer.addDocument(doc, analyzer);
}
@@ -130,7 +132,8 @@
try {
prog.start();
- LOG.info("Optimizing index."); // optimize & close index
+ if (LOG.isInfoEnabled()) { LOG.info("Optimizing index."); }
+ // optimize & close index
writer.optimize();
writer.close();
fs.completeLocalOutput(perm, temp); // copy to dfs
@@ -195,7 +198,7 @@
parseData = (ParseData)value;
} else if (value instanceof ParseText) {
parseText = (ParseText)value;
- } else {
+ } else if (LOG.isWarnEnabled()) {
LOG.warn("Unrecognized type: "+value.getClass());
}
}
@@ -216,11 +219,13 @@
doc.add(new Field("digest", metadata.get(Fetcher.SIGNATURE_KEY),
Field.Store.YES, Field.Index.NO));
-// LOG.info("Url: "+key.toString());
-// LOG.info("Title: "+parseData.getTitle());
-// LOG.info(crawlDatum.toString());
-// if (inlinks != null) {
-// LOG.info(inlinks.toString());
+// if (LOG.isInfoEnabled()) {
+// LOG.info("Url: "+key.toString());
+// LOG.info("Title: "+parseData.getTitle());
+// LOG.info(crawlDatum.toString());
+// if (inlinks != null) {
+// LOG.info(inlinks.toString());
+// }
// }
Parse parse = new ParseImpl(parseText, parseData);
@@ -228,7 +233,7 @@
// run indexing filters
doc = this.filters.filter(doc, parse, (UTF8)key, fetchDatum, inlinks);
} catch (IndexingException e) {
- LOG.warn("Error indexing "+key+": "+e);
+ if (LOG.isWarnEnabled()) { LOG.warn("Error indexing "+key+": "+e); }
return;
}
@@ -238,7 +243,9 @@
boost = this.scfilters.indexerScore((UTF8)key, doc, dbDatum,
fetchDatum, parse, inlinks, boost);
} catch (ScoringFilterException e) {
- LOG.warn("Error calculating score " + key + ": " + e);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Error calculating score " + key + ": " + e);
+ }
return;
}
// apply boost to all indexed fields.
@@ -253,14 +260,18 @@
public void index(Path indexDir, Path crawlDb, Path linkDb, Path[] segments)
throws IOException {
- LOG.info("Indexer: starting");
- LOG.info("Indexer: linkdb: " + linkDb);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Indexer: starting");
+ LOG.info("Indexer: linkdb: " + linkDb);
+ }
JobConf job = new NutchJob(getConf());
job.setJobName("index " + indexDir);
for (int i = 0; i < segments.length; i++) {
- LOG.info("Indexer: adding segment: " + segments[i]);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Indexer: adding segment: " + segments[i]);
+ }
job.addInputPath(new Path(segments[i], CrawlDatum.FETCH_DIR_NAME));
job.addInputPath(new Path(segments[i], ParseData.DIR_NAME));
job.addInputPath(new Path(segments[i], ParseText.DIR_NAME));
@@ -282,7 +293,7 @@
job.setOutputValueClass(ObjectWritable.class);
JobClient.runJob(job);
- LOG.info("Indexer: done");
+ if (LOG.isInfoEnabled()) { LOG.info("Indexer: done"); }
}
public static void main(String[] args) throws Exception {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java Thu Jun 22 05:20:29 2006
@@ -50,7 +50,9 @@
for (int i = 0; i < extensions.length; i++) {
Extension extension = extensions[i];
IndexingFilter filter = (IndexingFilter) extension.getExtensionInstance();
- LOG.info("Adding " + filter.getClass().getName());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Adding " + filter.getClass().getName());
+ }
if (!filterMap.containsKey(filter.getClass().getName())) {
filterMap.put(filter.getClass().getName(), filter);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java Thu Jun 22 05:20:29 2006
@@ -99,22 +99,25 @@
List rules=new ArrayList();
try {
- LOG.info("loading " + filename);
+ if (LOG.isInfoEnabled()) { LOG.info("loading " + filename); }
// borrowed heavily from code in Configuration.java
Document doc =
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.parse(filename);
Element root = doc.getDocumentElement();
- if (!"regex-normalize".equals(root.getTagName()))
+ if ((!"regex-normalize".equals(root.getTagName())) &&
+ (LOG.isFatalEnabled())) {
LOG.fatal("bad conf file: top-level element not <regex-normalize>");
+ }
NodeList regexes = root.getChildNodes();
for (int i = 0; i < regexes.getLength(); i++) {
Node regexNode = regexes.item(i);
if (!(regexNode instanceof Element))
continue;
Element regex = (Element)regexNode;
- if (!"regex".equals(regex.getTagName()))
+ if ((!"regex".equals(regex.getTagName())) && (LOG.isWarnEnabled())) {
LOG.warn("bad conf file: element not <regex>");
+ }
NodeList fields = regex.getChildNodes();
String patternValue = null;
String subValue = null;
@@ -139,7 +142,9 @@
}
} catch (Exception e) {
- LOG.fatal("error parsing " + filename +" conf file: " + e);
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("error parsing " + filename +" conf file: " + e);
+ }
}
return rules;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java Thu Jun 22 05:20:29 2006
@@ -41,7 +41,9 @@
if (normalizer == null) {
try {
urlNormalizer = this.conf.get("urlnormalizer.class");
- LOG.info("Using URL normalizer: " + urlNormalizer);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using URL normalizer: " + urlNormalizer);
+ }
Class normalizerClass = Class.forName(urlNormalizer);
normalizer = (UrlNormalizer) normalizerClass.newInstance();
normalizer.setConf(this.conf);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java Thu Jun 22 05:20:29 2006
@@ -58,18 +58,24 @@
if (extensionName != null) {
Extension extension = findExtension(extensionName);
if (extension != null) {
- LOG.info("Using ontology extension: " + extensionName);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using ontology extension: " + extensionName);
+ }
return (Ontology) extension.getExtensionInstance();
}
- LOG.warn("Ontology extension not found: '" + extensionName
- + "', trying the default");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Ontology extension not found: '" + extensionName +
+ "', trying the default");
+ }
// not found, fallback to the default, if available.
}
Extension[] extensions = this.extensionPoint.getExtensions();
if (extensions.length > 0) {
- LOG.info("Using the first ontology extension found: "
- + extensions[0].getId());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using the first ontology extension found: " +
+ extensions[0].getId());
+ }
return (Ontology) extensions[0].getExtensionInstance();
} else {
return null;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java Thu Jun 22 05:20:29 2006
@@ -101,7 +101,9 @@
// (SHOULD really check cpu time used so that heavily loaded systems
// do not unnecessarily hit this limit.)
if (System.currentTimeMillis() - start >= 60000L) {
- LOG.warn("Time limit exceeded for getOutLinks");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Time limit exceeded for getOutLinks");
+ }
break;
}
result = matcher.getMatch();
@@ -110,7 +112,7 @@
}
} catch (Exception ex) {
// if the matcher fails (perhaps a malformed URL) we just log it and move on
- LOG.error("getOutlinks", ex);
+ if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
}
final Outlink[] retval;
@@ -152,7 +154,9 @@
//
// url = re.getParen(0);
//
- // LOG.trace("Extracted url: " + url);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("Extracted url: " + url);
+ // }
//
// try {
//
@@ -162,7 +166,7 @@
// } catch (MalformedURLException ex) {
// // if it is a malformed URL we just throw it away and continue with
// // extraction.
- // LOG.error("getOutlinks", ex);
+ // if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
// }
//
// pos = re.getParenEnd(0);
@@ -215,7 +219,7 @@
// } catch (MalformedURLException ex) {
// // if it is a malformed URL we just throw it away and continue with
// // extraction.
- // LOG.error("getOutlinks", ex);
+ // if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
// }
//
// pos = re.getParenEnd(0);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Thu Jun 22 05:20:29 2006
@@ -119,8 +119,10 @@
adjust = scfilters.distributeScoreToOutlink((UTF8)key, targetUrl,
parseData, target, null, links.length, validCount);
} catch (ScoringFilterException e) {
- LOG.warn("Cannot distribute score from " + key + " to " + targetUrl +
- " - skipped (" + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Cannot distribute score from " + key + " to " +
+ targetUrl + " - skipped (" + e.getMessage());
+ }
continue;
}
crawlOut.append(targetUrl, target);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java Thu Jun 22 05:20:29 2006
@@ -93,8 +93,10 @@
parsePluginUrl = new URL(fParsePluginsFile);
ppInputStream = parsePluginUrl.openStream();
} catch (Exception e) {
- LOG.warn("Unable to load parse plugins file from URL " +
- "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to load parse plugins file from URL " +
+ "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+ }
return pList;
}
} else {
@@ -109,8 +111,10 @@
parser = factory.newDocumentBuilder();
document = parser.parse(inputSource);
} catch (Exception e) {
- LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
- "Reason is [" + e + "]");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
+ "Reason is [" + e + "]");
+ }
return null;
}
@@ -163,7 +167,7 @@
// now add the plugin list and map it to this mimeType
pList.setPluginList(mimeTypeStr, plugList);
- } else {
+ } else if (LOG.isWarnEnabled()) {
LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
+ mimeTypeStr + ", continuing parse");
}
@@ -240,13 +244,17 @@
NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
- LOG.warn("No aliases defined in parse-plugins.xml!");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("No aliases defined in parse-plugins.xml!");
+ }
return aliases;
}
if (aliasRoot.getLength() > 1) {
// log a warning, but try and continue processing
- LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
+ }
}
Element aliasRootElem = (Element)aliasRoot.item(0);
@@ -257,8 +265,10 @@
Element aliasElem = (Element)aliasElements.item(i);
String parsePluginId = aliasElem.getAttribute("name");
String extensionId = aliasElem.getAttribute("extension-id");
- LOG.trace("Found alias: plugin-id: " + parsePluginId +
- ", extension-id: " + extensionId);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Found alias: plugin-id: " + parsePluginId +
+ ", extension-id: " + extensionId);
+ }
if (parsePluginId != null && extensionId != null) {
aliases.put(parsePluginId, extensionId);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Thu Jun 22 05:20:29 2006
@@ -77,12 +77,14 @@
try {
scfilters.passScoreAfterParsing((UTF8)key, content, parse);
} catch (ScoringFilterException e) {
- e.printStackTrace(LogUtil.getWarnStream(LOG));
- LOG.warn("Error passing score: "+key+": "+e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ LOG.warn("Error passing score: "+key+": "+e.getMessage());
+ }
return;
}
output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
- } else {
+ } else if (LOG.isWarnEnabled()) {
LOG.warn("Error parsing: "+key+": "+status.toString());
}
}
@@ -94,8 +96,11 @@
}
public void parse(Path segment) throws IOException {
- LOG.info("Parse: starting");
- LOG.info("Parse: segment: " + segment);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Parse: starting");
+ LOG.info("Parse: segment: " + segment);
+ }
JobConf job = new NutchJob(getConf());
job.setJobName("parse " + segment);
@@ -113,7 +118,7 @@
job.setOutputValueClass(ParseImpl.class);
JobClient.runJob(job);
- LOG.info("Parse: done");
+ if (LOG.isInfoEnabled()) { LOG.info("Parse: done"); }
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Thu Jun 22 05:20:29 2006
@@ -67,22 +67,28 @@
try {
parsers = this.parserFactory.getParsers(content.getContentType(), "");
} catch (ParserNotFound e) {
- LOG.warn("No suitable parser found when trying to parse content " +
- content);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("No suitable parser found when trying to parse content " +
+ content);
+ }
throw new ParseException(e.getMessage());
}
Parse parse = null;
for (int i=0; i<parsers.length; i++) {
- LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
+ }
parse = parsers[i].getParse(content);
if ((parse != null) && (parse.getData().getStatus().isSuccess())) {
return parse;
}
}
-
- LOG.warn("Unable to successfully parse content " + content.getUrl() +
- " of type " + content.getContentType());
+
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to successfully parse content " + content.getUrl() +
+ " of type " + content.getContentType());
+ }
ParseStatus ps = (parse.getData() != null) ? parse.getData().getStatus() : null;
return (ps == null) ? new ParseStatus().getEmptyParse(this.conf)
@@ -116,8 +122,10 @@
try {
p = this.parserFactory.getParserById(extId);
} catch (ParserNotFound e) {
- LOG.warn("No suitable parser found when trying to parse content " +
- content);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("No suitable parser found when trying to parse content " +
+ content);
+ }
throw new ParseException(e.getMessage());
}
@@ -126,8 +134,10 @@
if (parse != null && parse.getData().getStatus().isSuccess()) {
return parse;
} else {
- LOG.warn("Unable to successfully parse content " + content.getUrl() +
- " of type " + content.getContentType());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Unable to successfully parse content " + content.getUrl() +
+ " of type " + content.getContentType());
+ }
return new ParseStatus().getEmptyParse(this.conf);
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java Thu Jun 22 05:20:29 2006
@@ -70,7 +70,7 @@
}
}
- LOG.info("fetching: "+url);
+ if (LOG.isInfoEnabled()) { LOG.info("fetching: "+url); }
Configuration conf = NutchConfiguration.create();
ProtocolFactory factory = new ProtocolFactory(conf);
@@ -88,8 +88,10 @@
System.exit(-1);
}
- LOG.info("parsing: "+url);
- LOG.info("contentType: "+contentType);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("parsing: "+url);
+ LOG.info("contentType: "+contentType);
+ }
Parse parse = new ParseUtil(conf).parse(content);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Thu Jun 22 05:20:29 2006
@@ -131,12 +131,14 @@
}
parsers.add(p);
} catch (PluginRuntimeException e) {
+ if (LOG.isWarnEnabled()) {
e.printStackTrace(LogUtil.getWarnStream(LOG));
- LOG.warn("ParserFactory:PluginRuntimeException when "
- + "initializing parser plugin "
- + ext.getDescriptor().getPluginId()
- + " instance in getParsers "
- + "function: attempting to continue instantiating parsers");
+ LOG.warn("ParserFactory:PluginRuntimeException when "
+ + "initializing parser plugin "
+ + ext.getDescriptor().getPluginId()
+ + " instance in getParsers "
+ + "function: attempting to continue instantiating parsers");
+ }
}
}
return (Parser[]) parsers.toArray(new Parser[]{});
@@ -189,9 +191,11 @@
this.conf.setObject(parserExt.getId(), p);
return p;
} catch (PluginRuntimeException e) {
- LOG.warn("Canno initialize parser " +
- parserExt.getDescriptor().getPluginId() +
- " (cause: " + e.toString());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Canno initialize parser " +
+ parserExt.getDescriptor().getPluginId() +
+ " (cause: " + e.toString());
+ }
throw new ParserNotFound("Cannot init parser for id [" + id + "]");
}
}
@@ -211,8 +215,10 @@
try {
type = MimeType.clean(contentType);
} catch (MimeTypeException mte) {
- LOG.debug("Could not clean the content-type [" + contentType +
- "], Reason is [" + mte + "]. Using its raw version...");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Could not clean the content-type [" + contentType +
+ "], Reason is [" + mte + "]. Using its raw version...");
+ }
type = contentType;
}
@@ -299,22 +305,24 @@
if (ext == null) {
//try to get it just by its pluginId
ext = getExtension(extensions, parsePluginId);
-
- if (ext != null) {
- // plugin was enabled via plugin.includes
- // its plugin.xml just doesn't claim to support that
- // particular mimeType
- LOG.warn("ParserFactory:Plugin: " + parsePluginId +
- " mapped to contentType " + contentType +
- " via parse-plugins.xml, but " + "its plugin.xml " +
- "file does not claim to support contentType: " +
- contentType);
- } else {
- // plugin wasn't enabled via plugin.includes
- LOG.warn("ParserFactory: Plugin: " + parsePluginId +
- " mapped to contentType " + contentType +
- " via parse-plugins.xml, but not enabled via " +
- "plugin.includes in nutch-default.xml");
+
+ if (LOG.isWarnEnabled()) {
+ if (ext != null) {
+ // plugin was enabled via plugin.includes
+ // its plugin.xml just doesn't claim to support that
+ // particular mimeType
+ LOG.warn("ParserFactory:Plugin: " + parsePluginId +
+ " mapped to contentType " + contentType +
+ " via parse-plugins.xml, but " + "its plugin.xml " +
+ "file does not claim to support contentType: " +
+ contentType);
+ } else {
+ // plugin wasn't enabled via plugin.includes
+ LOG.warn("ParserFactory: Plugin: " + parsePluginId +
+ " mapped to contentType " + contentType +
+ " via parse-plugins.xml, but not enabled via " +
+ "plugin.includes in nutch-default.xml");
+ }
}
}
@@ -342,12 +350,14 @@
}
if (extList.size() > 0) {
- LOG.info("The parsing plugins: " + extList +
- " are enabled via the plugin.includes system " +
- "property, and all claim to support the content type " +
- contentType + ", but they are not mapped to it in the " +
- "parse-plugins.xml file");
- } else {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("The parsing plugins: " + extList +
+ " are enabled via the plugin.includes system " +
+ "property, and all claim to support the content type " +
+ contentType + ", but they are not mapped to it in the " +
+ "parse-plugins.xml file");
+ }
+ } else if (LOG.isDebugEnabled()) {
LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
"contentType " + contentType);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java Thu Jun 22 05:20:29 2006
@@ -273,7 +273,9 @@
arrayList.add(file2.getParentFile().toURL());
}
} catch (MalformedURLException e) {
- LOG.debug(getPluginId() + " " + e.toString());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(getPluginId() + " " + e.toString());
+ }
}
URL[] urls = (URL[]) arrayList.toArray(new URL[arrayList.size()]);
fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java Thu Jun 22 05:20:29 2006
@@ -76,9 +76,10 @@
for (int i = 0; i < pluginFolders.length; i++) {
String name = pluginFolders[i];
File directory = getPluginFolder(name);
- if (directory == null)
- continue;
- LOG.info("Plugins: looking in: " + directory.getAbsolutePath());
+ if (directory == null) { continue; }
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Plugins: looking in: " + directory.getAbsolutePath());
+ }
File[] files = directory.listFiles();
if (files == null)
continue;
@@ -88,17 +89,19 @@
String manifestPath = oneSubFolder.getAbsolutePath()
+ File.separator + "plugin.xml";
try {
- LOG.debug("parsing: " + manifestPath);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("parsing: " + manifestPath);
+ }
PluginDescriptor p = parseManifestFile(manifestPath);
map.put(p.getPluginId(), p);
} catch (MalformedURLException e) {
- LOG.warn(e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(e.toString()); }
} catch (SAXException e) {
- LOG.warn(e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(e.toString()); }
} catch (IOException e) {
- LOG.warn(e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(e.toString()); }
} catch (ParserConfigurationException e) {
- LOG.warn(e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(e.toString()); }
}
}
}
@@ -119,11 +122,15 @@
&& directory.isDirectory() && directory.listFiles().length > 0) {
return directory; // relative path that is not in the classpath
} else if (url == null) {
- LOG.warn("Plugins: directory not found: " + name);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Plugins: directory not found: " + name);
+ }
return null;
} else if (!"file".equals(url.getProtocol())) {
- LOG.warn("Plugins: not a file: url. Can't load plugins from: "
- + url);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Plugins: not a file: url. Can't load plugins from: "
+ + url);
+ }
return null;
}
String path = url.getPath();
@@ -185,8 +192,10 @@
}
PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version,
name, providerName, pluginClazz, pPath, this.conf);
- LOG.debug("plugin: id="+id+" name="+name+" version="+version
- +" provider="+providerName+"class="+pluginClazz);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("plugin: id="+id+" name="+name+" version="+version
+ +" provider="+providerName+"class="+pluginClazz);
+ }
parseExtension(rootElement, pluginDescriptor);
parseExtensionPoints(rootElement, pluginDescriptor);
parseLibraries(rootElement, pluginDescriptor);
@@ -260,7 +269,7 @@
String schema = oneExtensionPoint.getAttribute("schema");
ExtensionPoint extensionPoint = new ExtensionPoint(id, name,
schema);
- //LOG.debug("plugin: point="+id);
+ //if (LOG.isDebugEnabled()) { LOG.debug("plugin: point="+id); }
pPluginDescriptor.addExtensionPoint(extensionPoint);
}
}
@@ -290,8 +299,10 @@
String id = oneImplementation.getAttribute("id");
String extensionClass = oneImplementation
.getAttribute("class");
- LOG.debug("impl: point=" + pointId + " class="
- + extensionClass);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("impl: point=" + pointId + " class="
+ + extensionClass);
+ }
Extension extension = new Extension(pPluginDescriptor,
pointId, id, extensionClass, this.conf, this.pluginRepository);
NodeList parameters = oneImplementation.getElementsByTagName("parameter");
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java Thu Jun 22 05:20:29 2006
@@ -91,7 +91,7 @@
try {
installExtensions(fRegisteredPlugins);
} catch (PluginRuntimeException e) {
- LOG.fatal(e.toString());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); }
throw new RuntimeException(e.getMessage());
}
displayStatus();
@@ -119,7 +119,9 @@
for (int j=0; j<points.length; j++) {
ExtensionPoint point = points[j];
String xpId = point.getId();
- LOG.debug("Adding extension point " + xpId);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Adding extension point " + xpId);
+ }
fExtensionPoints.put(xpId, point);
}
}
@@ -210,10 +212,10 @@
checked.put(plugin.getPluginId(), plugin);
} catch (MissingDependencyException mde) {
// Simply ignore this plugin
- LOG.warn(mde.getMessage());
+ if (LOG.isWarnEnabled()) { LOG.warn(mde.getMessage()); }
} catch (CircularDependencyException cde) {
// Simply ignore this plugin
- LOG.warn(cde.getMessage());
+ if (LOG.isWarnEnabled()) { LOG.warn(cde.getMessage()); }
}
}
return new ArrayList(checked.values());
@@ -326,9 +328,11 @@
private void displayStatus() {
+ if (!LOG.isInfoEnabled()) { return; }
+
LOG.info("Plugin Auto-activation mode: [" + this.auto + "]");
-
LOG.info("Registered Plugins:");
+
if ((fRegisteredPlugins == null) || (fRegisteredPlugins.size() == 0)) {
LOG.info("\tNONE");
} else {
@@ -366,11 +370,11 @@
if (id == null) { continue; }
if (!includes.matcher(id).matches()) {
- LOG.debug("not including: " + id);
+ if (LOG.isDebugEnabled()) { LOG.debug("not including: " + id); }
continue;
}
if (excludes.matcher(id).matches()) {
- LOG.debug("excluding: " + id);
+ if (LOG.isDebugEnabled()) { LOG.debug("excluding: " + id); }
continue;
}
map.put(plugin.getPluginId(), plugin);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java Thu Jun 22 05:20:29 2006
@@ -111,7 +111,9 @@
if (tokens.hasMoreTokens()) {
String port = tokens.nextToken();
addrs.add(new InetSocketAddress(host, Integer.parseInt(port)));
- LOG.info("Client adding server " + host + ":" + port);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Client adding server " + host + ":" + port);
+ }
}
}
}
@@ -171,11 +173,15 @@
InetSocketAddress addr = defaultAddresses[i];
String[] segments = results[i];
if (segments == null) {
- LOG.warn("Client: no segments from: " + addr);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Client: no segments from: " + addr);
+ }
continue;
}
for (int j = 0; j < segments.length; j++) {
- LOG.trace("Client: segment "+segments[j]+" at "+addr);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Client: segment "+segments[j]+" at "+addr);
+ }
segmentToAddress.put(segments[j], addr);
}
liveAddresses.add(addr);
@@ -184,9 +190,11 @@
}
this.liveAddresses = (InetSocketAddress[]) // update liveAddresses
- liveAddresses.toArray(new InetSocketAddress[liveAddresses.size()]);
+ liveAddresses.toArray(new InetSocketAddress[liveAddresses.size()]);
- LOG.info("STATS: "+liveServers+" servers, "+liveSegments+" segments.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("STATS: "+liveServers+" servers, "+liveSegments+" segments.");
+ }
}
/** Return the names of segments searched. */
@@ -349,13 +357,17 @@
try{
Thread.sleep(10000);
} catch (InterruptedException ie){
- LOG.info("Thread sleep interrupted.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Thread sleep interrupted.");
+ }
}
try{
- LOG.info("Querying segments from search servers...");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Querying segments from search servers...");
+ }
updateSegments();
} catch (IOException ioe) {
- LOG.warn("No search servers available!");
+ if (LOG.isWarnEnabled()) { LOG.warn("No search servers available!"); }
liveAddresses=new InetSocketAddress[0];
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java Thu Jun 22 05:20:29 2006
@@ -66,7 +66,7 @@
public static NutchBean get(ServletContext app, Configuration conf) throws IOException {
NutchBean bean = (NutchBean)app.getAttribute("nutchBean");
if (bean == null) {
- LOG.info("creating new bean");
+ if (LOG.isInfoEnabled()) { LOG.info("creating new bean"); }
bean = new NutchBean(conf);
app.setAttribute("nutchBean", bean);
}
@@ -97,7 +97,9 @@
}
Path servers = new Path(dir, "search-servers.txt");
if (fs.exists(servers)) {
- LOG.info("searching servers in " + servers);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("searching servers in " + servers);
+ }
init(new DistributedSearch.Client(servers, conf));
} else {
init(new Path(dir, "index"), new Path(dir, "indexes"), new Path(
@@ -110,10 +112,14 @@
throws IOException {
IndexSearcher indexSearcher;
if (this.fs.exists(indexDir)) {
- LOG.info("opening merged index in " + indexDir);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("opening merged index in " + indexDir);
+ }
indexSearcher = new IndexSearcher(indexDir, this.conf);
} else {
- LOG.info("opening indexes in " + indexesDir);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("opening indexes in " + indexesDir);
+ }
Vector vDirs=new Vector();
Path [] directories = fs.listPaths(indexesDir);
@@ -133,7 +139,9 @@
indexSearcher = new IndexSearcher(directories, this.conf);
}
- LOG.info("opening segments in " + segmentsDir);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("opening segments in " + segmentsDir);
+ }
FetchedSegments segments = new FetchedSegments(this.fs, segmentsDir.toString(),this.conf);
this.segmentNames = segments.getSegmentNames();
@@ -143,7 +151,7 @@
this.summarizer = segments;
this.content = segments;
- LOG.info("opening linkdb in " + linkDb);
+ if (LOG.isInfoEnabled()) { LOG.info("opening linkdb in " + linkDb); }
this.linkDb = new LinkDbInlinks(fs, linkDb, this.conf);
}
@@ -235,7 +243,9 @@
float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
int numHitsRaw = (int)(numHits * rawHitsFactor);
- LOG.info("searching for "+numHitsRaw+" raw hits");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("searching for "+numHitsRaw+" raw hits");
+ }
Hits hits = searcher.search(query, numHitsRaw,
dedupField, sortField, reverse);
long total = hits.getTotal();
@@ -256,10 +266,14 @@
dedupField);
}
numHitsRaw = (int)(numHitsRaw * rawHitsFactor);
- LOG.info("re-searching for "+numHitsRaw+" raw hits, query: "+optQuery);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("re-searching for "+numHitsRaw+" raw hits, query: "+optQuery);
+ }
hits = searcher.search(optQuery, numHitsRaw,
dedupField, sortField, reverse);
- LOG.info("found "+hits.getTotal()+" raw hits");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("found "+hits.getTotal()+" raw hits");
+ }
rawHitNum = -1;
continue;
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Thu Jun 22 05:20:29 2006
@@ -71,7 +71,9 @@
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
- NutchBean.LOG.info("query request from " + request.getRemoteAddr());
+ if (NutchBean.LOG.isInfoEnabled()) {
+ NutchBean.LOG.info("query request from " + request.getRemoteAddr());
+ }
// get parameters from request
request.setCharacterEncoding("UTF-8");
@@ -123,8 +125,10 @@
(dedupField == null ? "" : "&dedupField=" + dedupField));
Query query = Query.parse(queryString, queryLang, this.conf);
- NutchBean.LOG.info("query: " + queryString);
- NutchBean.LOG.info("lang: " + queryLang);
+ if (NutchBean.LOG.isInfoEnabled()) {
+ NutchBean.LOG.info("query: " + queryString);
+ NutchBean.LOG.info("lang: " + queryLang);
+ }
// execute the query
Hits hits;
@@ -132,11 +136,15 @@
hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField,
sort, reverse);
} catch (IOException e) {
- NutchBean.LOG.warn("Search Error", e);
+ if (NutchBean.LOG.isWarnEnabled()) {
+ NutchBean.LOG.warn("Search Error", e);
+ }
hits = new Hits(0,new Hit[0]);
}
- NutchBean.LOG.info("total hits: " + hits.getTotal());
+ if (NutchBean.LOG.isInfoEnabled()) {
+ NutchBean.LOG.info("total hits: " + hits.getTotal());
+ }
// generate xml results
int end = (int)Math.min(hits.getLength(), start + hitsPerPage);
Re: svn commit: r416346 [1/3] - in /lucene/nutch/trunk/src: java/org/apache/nutch/analysis/
java/org/apache/nutch/clustering/ java/org/apache/nutch/crawl/ java/org/apache/nutch/fetcher/
java/org/apache/nutch/indexer/ java/org/apache/nutch/net/ java/org/apa...
Posted by Doug Cutting <cu...@apache.org>.
jerome@apache.org wrote:
> NUTCH-309 : Added logging code guards
[ ... ]
> + if (LOG.isWarnEnabled()) {
> + LOG.warn("Line does not contain a field name: " + line);
> + }
[ ...]
-1
I don't think guards should be added everywhere. They make the code
bigger and provide little benefit. Rather, guards should only be added
in performance critical code, and then only for "Debug"-level output.
"Info" and "Warn" levels are normally enabled, and developers should
thus not log messages at these levels so frequently that performance
will be compromised. And not all "Debug"-level log statements need
guards, only those that are in inner loops, where the construction of
the log message may significantly affect performance.
Doug