You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2020/04/28 07:53:22 UTC
[nutch] branch master updated: NUTCH-2783 Use (more) parametrized
logging - replace logging messages with string concatenations by
parametrized calls - remove LOG.isInfoEnabled() where parametrized logging
is used and no or minor extra calls are done to get logging parameters
(similar for other log levels) - replace needless .toString() and
Integer.toString(intVal)
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 3d3018b NUTCH-2783 Use (more) parametrized logging - replace logging messages with string concatenations by parametrized calls - remove LOG.isInfoEnabled() where parametrized logging is used and no or minor extra calls are done to get logging parameters (similar for other log levels) - replace needless .toString() and Integer.toString(intVal)
new 2e2ce6a Merge pull request #516 from sebastian-nagel/NUTCH-2783-logging
3d3018b is described below
commit 3d3018bbeed5aba4b11444f210307e92de489960
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Fri Apr 24 15:51:19 2020 +0200
NUTCH-2783 Use (more) parametrized logging
- replace logging messages with string concatenations by
parametrized calls
- remove LOG.isInfoEnabled() where parametrized logging is used
and no or minor extra calls are done to get logging parameters
(similar for other log levels)
- replace needless .toString() and Integer.toString(intVal)
---
src/java/org/apache/nutch/crawl/CrawlDb.java | 26 +++++++--------
src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 6 ++--
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 34 ++++++-------------
.../org/apache/nutch/crawl/CrawlDbReducer.java | 22 ++++---------
.../org/apache/nutch/crawl/DeduplicationJob.java | 4 +--
src/java/org/apache/nutch/crawl/Generator.java | 5 ++-
src/java/org/apache/nutch/crawl/Injector.java | 17 ++++------
src/java/org/apache/nutch/crawl/LinkDb.java | 29 +++++++----------
src/java/org/apache/nutch/crawl/LinkDbReader.java | 8 ++---
.../org/apache/nutch/crawl/SignatureFactory.java | 4 +--
src/java/org/apache/nutch/fetcher/Fetcher.java | 38 +++++++++-------------
.../org/apache/nutch/fetcher/FetcherThread.java | 12 +++----
.../apache/nutch/hostdb/UpdateHostDbMapper.java | 4 +--
.../apache/nutch/hostdb/UpdateHostDbReducer.java | 5 +--
.../org/apache/nutch/indexer/IndexerMapReduce.java | 10 +++---
src/java/org/apache/nutch/parse/ParseSegment.java | 6 ++--
src/java/org/apache/nutch/parse/ParserChecker.java | 6 ++--
.../org/apache/nutch/segment/SegmentMerger.java | 36 +++++++++-----------
src/java/org/apache/nutch/tools/FreeGenerator.java | 8 ++---
.../org/apache/nutch/util/SitemapProcessor.java | 4 +--
.../nutch/indexwriter/solr/SolrIndexWriter.java | 11 +++----
21 files changed, 116 insertions(+), 179 deletions(-)
diff --git a/src/java/org/apache/nutch/crawl/CrawlDb.java b/src/java/org/apache/nutch/crawl/CrawlDb.java
index da654ed..5d91b0a 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDb.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDb.java
@@ -98,15 +98,13 @@ public class CrawlDb extends NutchTool implements Tool {
boolean url404Purging = conf.getBoolean(CRAWLDB_PURGE_404, false);
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb update: starting at " + sdf.format(start));
- LOG.info("CrawlDb update: db: " + crawlDb);
- LOG.info("CrawlDb update: segments: " + Arrays.asList(segments));
- LOG.info("CrawlDb update: additions allowed: " + additionsAllowed);
- LOG.info("CrawlDb update: URL normalizing: " + normalize);
- LOG.info("CrawlDb update: URL filtering: " + filter);
- LOG.info("CrawlDb update: 404 purging: " + url404Purging);
- }
+ LOG.info("CrawlDb update: starting at {}", sdf.format(start));
+ LOG.info("CrawlDb update: db: {}", crawlDb);
+ LOG.info("CrawlDb update: segments: {}", Arrays.asList(segments));
+ LOG.info("CrawlDb update: additions allowed: {}", additionsAllowed);
+ LOG.info("CrawlDb update: URL normalizing: {}", normalize);
+ LOG.info("CrawlDb update: URL filtering: {}", filter);
+ LOG.info("CrawlDb update: 404 purging: {}", url404Purging);
for (int i = 0; i < segments.length; i++) {
FileSystem sfs = segments[i].getFileSystem(getConf());
@@ -117,16 +115,14 @@ public class CrawlDb extends NutchTool implements Tool {
if (sfs.exists(parse)) {
FileInputFormat.addInputPath(job, parse);
} else {
- LOG.info(" - adding fetched but unparsed segment " + segments[i]);
+ LOG.info(" - adding fetched but unparsed segment {}", segments[i]);
}
} else {
- LOG.info(" - skipping invalid segment " + segments[i]);
+ LOG.info(" - skipping invalid segment {}", segments[i]);
}
}
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb update: Merging segment data into db.");
- }
+ LOG.info("CrawlDb update: Merging segment data into db.");
FileSystem fs = crawlDb.getFileSystem(getConf());
Path outPath = FileOutputFormat.getOutputPath(job);
@@ -281,7 +277,7 @@ public class CrawlDb extends NutchTool implements Tool {
filter, additionsAllowed, force);
return 0;
} catch (Exception e) {
- LOG.error("CrawlDb update: " + StringUtils.stringifyException(e));
+ LOG.error("CrawlDb update: ", e);
return -1;
}
}
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
index bc77091..6cf2809 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
@@ -131,13 +131,11 @@ public class CrawlDbMerger extends Configured implements Tool {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
- LOG.info("CrawlDb merge: starting at " + sdf.format(start));
+ LOG.info("CrawlDb merge: starting at {}", sdf.format(start));
Job job = createMergeJob(getConf(), output, normalize, filter);
for (int i = 0; i < dbs.length; i++) {
- if (LOG.isInfoEnabled()) {
- LOG.info("Adding " + dbs[i]);
- }
+ LOG.info("Adding {}", dbs[i]);
FileInputFormat.addInputPath(job, new Path(dbs[i], CrawlDb.CURRENT_NAME));
}
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
index a0831da..dfcc87a 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
@@ -659,14 +659,12 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
}
}
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb statistics start: " + crawlDb);
- }
+ LOG.info("CrawlDb statistics start: {}", crawlDb);
TreeMap<String, Writable> stats = processStatJobHelper(crawlDb, config,
sort);
if (LOG.isInfoEnabled()) {
- LOG.info("Statistics for CrawlDb: " + crawlDb);
+ LOG.info("Statistics for CrawlDb: {}", crawlDb);
LongWritable totalCnt = new LongWritable(0);
if (stats.containsKey("T")) {
totalCnt = ((LongWritable) stats.get("T"));
@@ -729,10 +727,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
}
}
}
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb statistics: done");
- }
-
+ LOG.info("CrawlDb statistics: done");
}
public CrawlDatum get(String crawlDb, String url, Configuration config)
@@ -769,10 +764,8 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
Configuration config, String format, String regex, String status,
Integer retry, String expr, Float sample)
throws IOException, ClassNotFoundException, InterruptedException {
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb dump: starting");
- LOG.info("CrawlDb db: " + crawlDb);
- }
+ LOG.info("CrawlDb dump: starting");
+ LOG.info("CrawlDb db: {}", crawlDb);
Path outFolder = new Path(output);
@@ -802,7 +795,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
jobConf.setInt("retry", retry);
if (expr != null) {
jobConf.set("expr", expr);
- LOG.info("CrawlDb db: expr: " + expr);
+ LOG.info("CrawlDb db: expr: {}", expr);
}
if (sample != null) {
jobConf.setFloat("sample", sample);
@@ -826,9 +819,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
throw e;
}
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb dump: done");
- }
+ LOG.info("CrawlDb dump: done");
}
public static class CrawlDbDumpMapper
@@ -901,7 +892,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
if (LOG.isInfoEnabled()) {
LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
- LOG.info("CrawlDb db: " + crawlDb);
+ LOG.info("CrawlDb db: {}", crawlDb);
}
Path outFolder = new Path(output);
@@ -942,9 +933,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
throw e;
}
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb topN: collecting topN scores.");
- }
+ LOG.info("CrawlDb topN: collecting topN scores.");
job = NutchJob.getInstance(config);
job.setJobName("topN collect " + crawlDb);
job.getConfiguration().setLong("db.reader.topn", topN);
@@ -979,10 +968,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
}
fs.delete(tempDir, true);
- if (LOG.isInfoEnabled()) {
- LOG.info("CrawlDb topN: done");
- }
-
+ LOG.info("CrawlDb topN: done");
}
@Override
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReducer.java b/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
index 984a02d..bfc62c3 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
@@ -160,16 +160,14 @@ public class CrawlDbReducer extends
try {
scfilters.orphanedScore(key, old);
} catch (ScoringFilterException e) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Couldn't update orphaned score, key={}: {}", key, e);
- }
+ LOG.warn("Couldn't update orphaned score, key={}: {}", key, e);
}
context.write(key, old);
context.getCounter("CrawlDB status",
CrawlDatum.getStatusName(old.getStatus())).increment(1);
} else {
- LOG.warn("Missing fetch and old value, signature="
- + StringUtil.toHexString(signature));
+ LOG.warn("Missing fetch and old value, signature={}",
+ StringUtil.toHexString(signature));
}
return;
}
@@ -206,10 +204,8 @@ public class CrawlDbReducer extends
try {
scfilters.initialScore(key, result);
} catch (ScoringFilterException e) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Cannot filter init score for url " + key
- + ", using default: " + e.getMessage());
- }
+ LOG.warn("Cannot filter init score for url {}, using default: {}",
+ key, e.getMessage());
result.setScore(0.0f);
}
}
@@ -285,9 +281,7 @@ public class CrawlDbReducer extends
result = schedule.forceRefetch(key, result, false);
break;
case CrawlDatum.STATUS_SIGNATURE:
- if (LOG.isWarnEnabled()) {
- LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
- }
+ LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: {}", key);
return;
case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure
if (oldSet) {
@@ -320,9 +314,7 @@ public class CrawlDbReducer extends
try {
scfilters.updateDbScore(key, oldSet ? old : null, result, linkList);
} catch (Exception e) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Couldn't update score, key={}: {}", key, e);
- }
+ LOG.warn("Couldn't update score, key={}: {}", key, e);
}
// remove generation time, if any
result.getMetaData().remove(Nutch.WRITABLE_GENERATE_TIME_KEY);
diff --git a/src/java/org/apache/nutch/crawl/DeduplicationJob.java b/src/java/org/apache/nutch/crawl/DeduplicationJob.java
index 04b19e3..7751366 100644
--- a/src/java/org/apache/nutch/crawl/DeduplicationJob.java
+++ b/src/java/org/apache/nutch/crawl/DeduplicationJob.java
@@ -346,9 +346,7 @@ public class DeduplicationJob extends NutchTool implements Tool {
}
// merge with existing crawl db
- if (LOG.isInfoEnabled()) {
- LOG.info("Deduplication: Updating status of duplicate urls into crawl db.");
- }
+ LOG.info("Deduplication: Updating status of duplicate urls into crawl db.");
Job mergeJob = CrawlDb.createJob(getConf(), crawlDb);
FileInputFormat.addInputPath(mergeJob, tempDir);
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index be64695..5dcd2ea 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -927,9 +927,8 @@ public class Generator extends NutchTool implements Tool {
private Path partitionSegment(Path segmentsDir, Path inputDir, int numLists)
throws IOException, ClassNotFoundException, InterruptedException {
// invert again, partition by host/domain/IP, sort by url hash
- if (LOG.isInfoEnabled()) {
- LOG.info("Generator: Partitioning selected urls for politeness.");
- }
+ LOG.info("Generator: Partitioning selected urls for politeness.");
+
Path segment = new Path(segmentsDir, generateSegmentName());
Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME);
diff --git a/src/java/org/apache/nutch/crawl/Injector.java b/src/java/org/apache/nutch/crawl/Injector.java
index 9f898e6..84dc812 100644
--- a/src/java/org/apache/nutch/crawl/Injector.java
+++ b/src/java/org/apache/nutch/crawl/Injector.java
@@ -235,10 +235,9 @@ public class Injector extends NutchTool implements Tool {
key.set(url);
scfilters.injectedScore(key, datum);
} catch (ScoringFilterException e) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Cannot filter injected score for url " + url
- + ", using default (" + e.getMessage() + ")");
- }
+ LOG.warn(
+ "Cannot filter injected score for url {}, using default ({})",
+ url, e.getMessage());
}
context.getCounter("injector", "urls_injected").increment(1);
context.write(key, datum);
@@ -373,12 +372,10 @@ public class Injector extends NutchTool implements Tool {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
- if (LOG.isInfoEnabled()) {
- LOG.info("Injector: starting at " + sdf.format(start));
- LOG.info("Injector: crawlDb: " + crawlDb);
- LOG.info("Injector: urlDir: " + urlDir);
- LOG.info("Injector: Converting injected urls to crawl db entries.");
- }
+ LOG.info("Injector: starting at {}", sdf.format(start));
+ LOG.info("Injector: crawlDb: {}", crawlDb);
+ LOG.info("Injector: urlDir: {}", urlDir);
+ LOG.info("Injector: Converting injected urls to crawl db entries.");
// set configuration
Configuration conf = getConf();
diff --git a/src/java/org/apache/nutch/crawl/LinkDb.java b/src/java/org/apache/nutch/crawl/LinkDb.java
index 51e880d..e53411f 100644
--- a/src/java/org/apache/nutch/crawl/LinkDb.java
+++ b/src/java/org/apache/nutch/crawl/LinkDb.java
@@ -198,17 +198,15 @@ public class LinkDb extends NutchTool implements Tool {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
- if (LOG.isInfoEnabled()) {
- LOG.info("LinkDb: starting at {}", sdf.format(start));
- LOG.info("LinkDb: linkdb: {}", linkDb);
- LOG.info("LinkDb: URL normalize: {}", normalize);
- LOG.info("LinkDb: URL filter: {}", filter);
- if (conf.getBoolean(IGNORE_INTERNAL_LINKS, true)) {
- LOG.info("LinkDb: internal links will be ignored.");
- }
- if (conf.getBoolean(IGNORE_EXTERNAL_LINKS, false)) {
- LOG.info("LinkDb: external links will be ignored.");
- }
+ LOG.info("LinkDb: starting at {}", sdf.format(start));
+ LOG.info("LinkDb: linkdb: {}", linkDb);
+ LOG.info("LinkDb: URL normalize: {}", normalize);
+ LOG.info("LinkDb: URL filter: {}", filter);
+ if (conf.getBoolean(IGNORE_INTERNAL_LINKS, true)) {
+ LOG.info("LinkDb: internal links will be ignored.");
+ }
+ if (conf.getBoolean(IGNORE_EXTERNAL_LINKS, false)) {
+ LOG.info("LinkDb: external links will be ignored.");
}
if (conf.getBoolean(IGNORE_INTERNAL_LINKS, true)
&& conf.getBoolean(IGNORE_EXTERNAL_LINKS, false)) {
@@ -219,9 +217,7 @@ public class LinkDb extends NutchTool implements Tool {
}
for (int i = 0; i < segments.length; i++) {
- if (LOG.isInfoEnabled()) {
- LOG.info("LinkDb: adding segment: {}", segments[i]);
- }
+ LOG.info("LinkDb: adding segment: {}", segments[i]);
FileInputFormat.addInputPath(job, new Path(segments[i],
ParseData.DIR_NAME));
}
@@ -242,9 +238,8 @@ public class LinkDb extends NutchTool implements Tool {
}
if (fs.exists(currentLinkDb)) {
- if (LOG.isInfoEnabled()) {
- LOG.info("LinkDb: merging with existing linkdb: {}", linkDb);
- }
+ LOG.info("LinkDb: merging with existing linkdb: {}", linkDb);
+
// try to merge
Path newLinkDb = FileOutputFormat.getOutputPath(job);
job = LinkDbMerger.createMergeJob(getConf(), linkDb, normalize, filter);
diff --git a/src/java/org/apache/nutch/crawl/LinkDbReader.java b/src/java/org/apache/nutch/crawl/LinkDbReader.java
index 8b649c8..5d422b4 100644
--- a/src/java/org/apache/nutch/crawl/LinkDbReader.java
+++ b/src/java/org/apache/nutch/crawl/LinkDbReader.java
@@ -155,10 +155,10 @@ public class LinkDbReader extends AbstractChecker implements Closeable {
throws IOException, InterruptedException, ClassNotFoundException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
- if (LOG.isInfoEnabled()) {
- LOG.info("LinkDb dump: starting at " + sdf.format(start));
- LOG.info("LinkDb dump: db: " + linkdb);
- }
+
+ LOG.info("LinkDb dump: starting at {}", sdf.format(start));
+ LOG.info("LinkDb dump: db: {}", linkdb);
+
Path outFolder = new Path(output);
Job job = NutchJob.getInstance(getConf());
diff --git a/src/java/org/apache/nutch/crawl/SignatureFactory.java b/src/java/org/apache/nutch/crawl/SignatureFactory.java
index e017cf4..e605ec5 100644
--- a/src/java/org/apache/nutch/crawl/SignatureFactory.java
+++ b/src/java/org/apache/nutch/crawl/SignatureFactory.java
@@ -46,9 +46,7 @@ public class SignatureFactory {
Signature impl = (Signature) objectCache.getObject(clazz);
if (impl == null) {
try {
- if (LOG.isInfoEnabled()) {
- LOG.info("Using Signature impl: " + clazz);
- }
+ LOG.info("Using Signature impl: {}", clazz);
Class<?> implClass = Class.forName(clazz);
impl = (Signature) implClass.getConstructor().newInstance();
impl.setConf(conf);
diff --git a/src/java/org/apache/nutch/fetcher/Fetcher.java b/src/java/org/apache/nutch/fetcher/Fetcher.java
index e489ae8..687411e 100644
--- a/src/java/org/apache/nutch/fetcher/Fetcher.java
+++ b/src/java/org/apache/nutch/fetcher/Fetcher.java
@@ -199,14 +199,10 @@ public class Fetcher extends NutchTool implements Tool {
QueueFeeder feeder;
int threadCount = conf.getInt("fetcher.threads.fetch", 10);
- if (LOG.isInfoEnabled()) {
- LOG.info("Fetcher: threads: {}", threadCount);
- }
+ LOG.info("Fetcher: threads: {}", threadCount);
int timeoutDivisor = conf.getInt("fetcher.threads.timeout.divisor", 2);
- if (LOG.isInfoEnabled()) {
- LOG.info("Fetcher: time-out divisor: {}", timeoutDivisor);
- }
+ LOG.info("Fetcher: time-out divisor: {}", timeoutDivisor);
int queueDepthMuliplier = conf.getInt(
"fetcher.queue.depth.multiplier", 50);
@@ -240,17 +236,15 @@ public class Fetcher extends NutchTool implements Tool {
int throughputThresholdNumRetries = 0;
- int throughputThresholdPages = conf.getInt(
- "fetcher.throughput.threshold.pages", -1);
- if (LOG.isInfoEnabled()) {
- LOG.info("Fetcher: throughput threshold: {}", throughputThresholdPages);
- }
- int throughputThresholdMaxRetries = conf.getInt(
- "fetcher.throughput.threshold.retries", 5);
- if (LOG.isInfoEnabled()) {
- LOG.info("Fetcher: throughput threshold retries: {}",
- throughputThresholdMaxRetries);
- }
+ int throughputThresholdPages = conf
+ .getInt("fetcher.throughput.threshold.pages", -1);
+ LOG.info("Fetcher: throughput threshold: {}", throughputThresholdPages);
+
+ int throughputThresholdMaxRetries = conf
+ .getInt("fetcher.throughput.threshold.retries", 5);
+ LOG.info("Fetcher: throughput threshold retries: {}",
+ throughputThresholdMaxRetries);
+
long throughputThresholdTimeLimit = conf.getLong(
"fetcher.throughput.threshold.check.after", -1);
@@ -304,8 +298,9 @@ public class Fetcher extends NutchTool implements Tool {
// Check if we're dropping below the threshold
if (pagesLastSec < throughputThresholdPages) {
throughputThresholdNumRetries++;
- LOG.warn("{}: dropping below configured threshold of {} pages per second",
- Integer.toString(throughputThresholdNumRetries), Integer.toString(throughputThresholdPages));
+ LOG.warn(
+ "{}: dropping below configured threshold of {} pages per second",
+ throughputThresholdNumRetries, throughputThresholdPages);
// Quit if we dropped below threshold too many times
if (throughputThresholdNumRetries == throughputThresholdMaxRetries) {
@@ -457,8 +452,7 @@ public class Fetcher extends NutchTool implements Tool {
int maxOutlinkDepth = getConf().getInt("fetcher.follow.outlinks.depth", -1);
if (maxOutlinkDepth > 0) {
- LOG.info("Fetcher: following outlinks up to depth: {}",
- Integer.toString(maxOutlinkDepth));
+ LOG.info("Fetcher: following outlinks up to depth: {}", maxOutlinkDepth);
int maxOutlinkDepthNumLinks = getConf().getInt(
"fetcher.follow.outlinks.num.links", 4);
@@ -472,7 +466,7 @@ public class Fetcher extends NutchTool implements Tool {
}
LOG.info("Fetcher: maximum outlinks to follow: {}",
- Integer.toString(totalOutlinksToFollow));
+ totalOutlinksToFollow);
}
Job job = NutchJob.getInstance(getConf());
diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java
index 549cd36..bc0d639 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherThread.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java
@@ -303,9 +303,7 @@ public class FetcherThread extends Thread {
Thread.currentThread().getId(), fit.url,
fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay);
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("redirectCount={}", redirectCount);
- }
+ LOG.debug("redirectCount={}", redirectCount);
redirecting = false;
Protocol protocol = this.protocolFactory.getProtocol(fit.u);
BaseRobotRules rules = protocol.getRobotRules(fit.url, fit.datum,
@@ -346,11 +344,9 @@ public class FetcherThread extends Thread {
crawlDelay = minCrawlDelay;
}
fiq.crawlDelay = crawlDelay;
- if (LOG.isDebugEnabled()) {
- LOG.debug("Crawl delay for queue: " + fit.queueID
- + " is set to " + fiq.crawlDelay
- + " as per robots.txt. url: " + fit.url);
- }
+ LOG.debug(
+ "Crawl delay for queue: {} is set to {} as per robots.txt. url: ",
+ fit.queueID, fiq.crawlDelay, fit.url);
}
}
ProtocolOutput output = protocol.getProtocolOutput(fit.url,
diff --git a/src/java/org/apache/nutch/hostdb/UpdateHostDbMapper.java b/src/java/org/apache/nutch/hostdb/UpdateHostDbMapper.java
index c239349..9657621 100644
--- a/src/java/org/apache/nutch/hostdb/UpdateHostDbMapper.java
+++ b/src/java/org/apache/nutch/hostdb/UpdateHostDbMapper.java
@@ -195,7 +195,7 @@ public class UpdateHostDbMapper
// Filtered out?
if (buffer == null) {
context.getCounter("UpdateHostDb", "filtered_records").increment(1);
- LOG.info("UpdateHostDb: " + key.toString() + " hostdatum has been filtered");
+ LOG.info("UpdateHostDb: {} hostdatum has been filtered", keyStr);
return;
}
@@ -219,7 +219,7 @@ public class UpdateHostDbMapper
// Filtered out?
if (buffer == null) {
context.getCounter("UpdateHostDb", "filtered_records").increment(1);
- LOG.info("UpdateHostDb: " + key.toString() + " score has been filtered");
+ LOG.info("UpdateHostDb: {} score has been filtered", keyStr);
return;
}
diff --git a/src/java/org/apache/nutch/hostdb/UpdateHostDbReducer.java b/src/java/org/apache/nutch/hostdb/UpdateHostDbReducer.java
index 862a3c9..f473848 100644
--- a/src/java/org/apache/nutch/hostdb/UpdateHostDbReducer.java
+++ b/src/java/org/apache/nutch/hostdb/UpdateHostDbReducer.java
@@ -349,7 +349,7 @@ public class UpdateHostDbReducer
return;
} else {
context.getCounter("UpdateHostDb", "skipped_not_eligible").increment(1);
- LOG.info("UpdateHostDb: " + key.toString() + ": skipped_not_eligible");
+ LOG.info("UpdateHostDb: {}: skipped_not_eligible", key);
}
// Write the host datum if it wasn't written by the resolver thread
@@ -415,7 +415,8 @@ public class UpdateHostDbReducer
try {
// Wait for the executor to shut down completely
if (!executor.isTerminated()) {
- LOG.info("UpdateHostDb: resolver threads waiting: " + Integer.toString(executor.getPoolSize()));
+ LOG.info("UpdateHostDb: resolver threads waiting: {}",
+ executor.getPoolSize());
Thread.sleep(1000);
} else {
// All is well, get out
diff --git a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
index 856b99a..3e9bc15 100644
--- a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
+++ b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
@@ -289,8 +289,8 @@ public class IndexerMapReduce extends Configured {
parseText = (ParseText) value;
} else if (value instanceof Content) {
content = (Content)value;
- } else if (LOG.isWarnEnabled()) {
- LOG.warn("Unrecognized type: " + value.getClass());
+ } else {
+ LOG.warn("Unrecognized type: {}", value.getClass());
}
}
@@ -354,9 +354,7 @@ public class IndexerMapReduce extends Configured {
inlinks, boost);
} catch (final ScoringFilterException e) {
context.getCounter("IndexerStatus", "errors (ScoringFilter)").increment(1);
- if (LOG.isWarnEnabled()) {
- LOG.warn("Error calculating score {}: {}", key, e);
- }
+ LOG.warn("Error calculating score {}: {}", key, e);
return;
}
// apply boost to all indexed fields.
@@ -390,7 +388,7 @@ public class IndexerMapReduce extends Configured {
doc = filters.filter(doc, parse, key, fetchDatum, inlinks);
} catch (final IndexingException e) {
if (LOG.isWarnEnabled()) {
- LOG.warn("Error indexing " + key + ": " + e);
+ LOG.warn("Error indexing " + key + ": ", e);
}
context.getCounter("IndexerStatus", "errors (IndexingFilter)").increment(1);
return;
diff --git a/src/java/org/apache/nutch/parse/ParseSegment.java b/src/java/org/apache/nutch/parse/ParseSegment.java
index 9bd1f03..62551b2 100644
--- a/src/java/org/apache/nutch/parse/ParseSegment.java
+++ b/src/java/org/apache/nutch/parse/ParseSegment.java
@@ -152,13 +152,11 @@ public class ParseSegment extends NutchTool implements Tool {
try {
scfilters.passScoreAfterParsing(url, content, parse);
} catch (ScoringFilterException e) {
- if (LOG.isWarnEnabled()) {
- LOG.warn("Error passing score: " + url + ": " + e.getMessage());
- }
+ LOG.warn("Error passing score: {}: {}", url, e.getMessage());
}
long end = System.currentTimeMillis();
- LOG.info("Parsed (" + Long.toString(end - start) + "ms):" + url);
+ LOG.info("Parsed ({}ms): {}", (end - start), url);
context.write(
url,
diff --git a/src/java/org/apache/nutch/parse/ParserChecker.java b/src/java/org/apache/nutch/parse/ParserChecker.java
index 8b6e3a1..2a976ba 100644
--- a/src/java/org/apache/nutch/parse/ParserChecker.java
+++ b/src/java/org/apache/nutch/parse/ParserChecker.java
@@ -257,9 +257,9 @@ public class ParserChecker extends AbstractChecker {
content, parseResult.get(new Text(url)));
if (LOG.isInfoEnabled()) {
- LOG.info("parsing: " + url);
- LOG.info("contentType: " + contentType);
- LOG.info("signature: " + StringUtil.toHexString(signature));
+ LOG.info("parsing: {}", url);
+ LOG.info("contentType: {}", contentType);
+ LOG.info("signature: {}", StringUtil.toHexString(signature));
}
for (Map.Entry<Text, Parse> entry : parseResult) {
diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java
index a744539..7dbfd11 100644
--- a/src/java/org/apache/nutch/segment/SegmentMerger.java
+++ b/src/java/org/apache/nutch/segment/SegmentMerger.java
@@ -428,7 +428,7 @@ public class SegmentMerger extends Configured implements Tool{
mergeFilters = new SegmentMergeFilters(conf);
}
sliceSize = conf.getLong("segment.merger.slice", -1);
- if ((sliceSize > 0) && (LOG.isInfoEnabled())) {
+ if (sliceSize > 0) {
LOG.info("Slice size: {} URLs.", sliceSize);
}
if (sliceSize > 0) {
@@ -622,9 +622,7 @@ public class SegmentMerger extends Configured implements Tool{
public void merge(Path out, Path[] segs, boolean filter, boolean normalize,
long slice) throws IOException, ClassNotFoundException, InterruptedException {
String segmentName = Generator.generateSegmentName();
- if (LOG.isInfoEnabled()) {
- LOG.info("Merging {} segments to {}/{}", segs.length, out, segmentName);
- }
+ LOG.info("Merging {} segments to {}/{}", segs.length, out, segmentName);
Job job = NutchJob.getInstance(getConf());
Configuration conf = job.getConfiguration();
job.setJobName("mergesegs " + out + "/" + segmentName);
@@ -656,9 +654,7 @@ public class SegmentMerger extends Configured implements Tool{
segs[i] = null;
continue;
}
- if (LOG.isInfoEnabled()) {
- LOG.info("SegmentMerger: adding {}", segs[i]);
- }
+ LOG.info("SegmentMerger: adding {}", segs[i]);
Path cDir = new Path(segs[i], Content.DIR_NAME);
Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME);
@@ -679,20 +675,20 @@ public class SegmentMerger extends Configured implements Tool{
pg = g; pf = f; pp = p; pc = c; ppd = pd; ppt = pt;
}
- StringBuilder sb = new StringBuilder();
- if (c)
- sb.append(" " + Content.DIR_NAME);
- if (g)
- sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
- if (f)
- sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
- if (p)
- sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
- if (pd)
- sb.append(" " + ParseData.DIR_NAME);
- if (pt)
- sb.append(" " + ParseText.DIR_NAME);
if (LOG.isInfoEnabled()) {
+ StringBuilder sb = new StringBuilder();
+ if (c)
+ sb.append(" " + Content.DIR_NAME);
+ if (g)
+ sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
+ if (f)
+ sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
+ if (p)
+ sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
+ if (pd)
+ sb.append(" " + ParseData.DIR_NAME);
+ if (pt)
+ sb.append(" " + ParseText.DIR_NAME);
LOG.info("SegmentMerger: using segment data from: {}", sb.toString());
}
for (int i = 0; i < segs.length; i++) {
diff --git a/src/java/org/apache/nutch/tools/FreeGenerator.java b/src/java/org/apache/nutch/tools/FreeGenerator.java
index 5c45903..8c537d9 100644
--- a/src/java/org/apache/nutch/tools/FreeGenerator.java
+++ b/src/java/org/apache/nutch/tools/FreeGenerator.java
@@ -106,14 +106,12 @@ public class FreeGenerator extends Configured implements Tool {
scfilters.injectedScore(url, datum);
}
} catch (Exception e) {
- LOG.warn("Error adding url '" + value.toString() + "', skipping: "
- + StringUtils.stringifyException(e));
+ LOG.warn("Error adding url '{}', skipping: {}", value,
+ StringUtils.stringifyException(e));
return;
}
if (urlString == null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("- skipping " + value.toString());
- }
+ LOG.debug("- skipping {}", value);
return;
}
entry.datum = datum;
diff --git a/src/java/org/apache/nutch/util/SitemapProcessor.java b/src/java/org/apache/nutch/util/SitemapProcessor.java
index aff9779..c686d6a 100644
--- a/src/java/org/apache/nutch/util/SitemapProcessor.java
+++ b/src/java/org/apache/nutch/util/SitemapProcessor.java
@@ -357,9 +357,7 @@ public class SitemapProcessor extends Configured implements Tool {
public void sitemap(Path crawldb, Path hostdb, Path sitemapUrlDir, boolean strict, boolean filter,
boolean normalize, int threads) throws Exception {
long start = System.currentTimeMillis();
- if (LOG.isInfoEnabled()) {
- LOG.info("SitemapProcessor: Starting at {}", sdf.format(start));
- }
+ LOG.info("SitemapProcessor: Starting at {}", sdf.format(start));
FileSystem fs = crawldb.getFileSystem(getConf());
Path old = new Path(crawldb, "old");
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
index dbc480b..3b03e7d 100644
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -235,9 +235,8 @@ public class SolrIndexWriter implements IndexWriter {
private void push() throws IOException {
if (inputDocs.size() > 0) {
try {
- LOG.info("Indexing " + Integer.toString(inputDocs.size()) + "/"
- + Integer.toString(totalAdds) + " documents");
- LOG.info("Deleting " + Integer.toString(numDeletes) + " documents");
+ LOG.info("Indexing {}/{} documents", inputDocs.size(), totalAdds);
+ LOG.info("Deleting {} documents", numDeletes);
numDeletes = 0;
UpdateRequest req = new UpdateRequest();
req.add(inputDocs);
@@ -257,9 +256,9 @@ public class SolrIndexWriter implements IndexWriter {
if (deleteIds.size() > 0) {
try {
- LOG.info("SolrIndexer: deleting " + Integer.toString(deleteIds.size())
- + "/" + Integer.toString(totalDeletes) + " documents");
-
+ LOG.info("SolrIndexer: deleting {}/{} documents", deleteIds.size(),
+ totalDeletes);
+
UpdateRequest req = new UpdateRequest();
req.deleteById(deleteIds);
req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false);