You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2022/01/27 16:05:38 UTC
[nutch] branch master updated: NUTCH-2923: Added JobId in Job Failure logs (#721)
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new f8967c4 NUTCH-2923: Added JobId in Job Failure logs (#721)
f8967c4 is described below
commit f8967c431a680eb8802fb5d9c396abcb69d11d29
Author: Prakhar Chaube <52...@users.noreply.github.com>
AuthorDate: Thu Jan 27 21:33:51 2022 +0530
NUTCH-2923: Added JobId in Job Failure logs (#721)
* added JobId in Job Failure logs
* moved job failure log message logic to NutchJob.java
* added description for throws in JavaDoc
* logging only state from Job Status and Simplified Job name for SitemapProcessor
---
src/java/org/apache/nutch/crawl/CrawlDb.java | 5 ++---
src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 4 +---
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 16 ++++-----------
.../org/apache/nutch/crawl/DeduplicationJob.java | 8 ++------
src/java/org/apache/nutch/crawl/Generator.java | 12 +++--------
src/java/org/apache/nutch/crawl/Injector.java | 4 +---
src/java/org/apache/nutch/crawl/LinkDb.java | 8 ++------
src/java/org/apache/nutch/crawl/LinkDbMerger.java | 4 +---
src/java/org/apache/nutch/crawl/LinkDbReader.java | 4 +---
src/java/org/apache/nutch/fetcher/Fetcher.java | 4 +---
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 ++---
src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 4 +---
src/java/org/apache/nutch/indexer/CleaningJob.java | 4 +---
src/java/org/apache/nutch/indexer/IndexingJob.java | 4 +---
src/java/org/apache/nutch/parse/ParseSegment.java | 4 +---
.../apache/nutch/scoring/webgraph/LinkDumper.java | 10 ++++-----
.../apache/nutch/scoring/webgraph/LinkRank.java | 18 ++++++----------
.../apache/nutch/scoring/webgraph/NodeDumper.java | 4 +---
.../nutch/scoring/webgraph/ScoreUpdater.java | 5 ++---
.../apache/nutch/scoring/webgraph/WebGraph.java | 14 +++++--------
.../org/apache/nutch/segment/SegmentMerger.java | 4 +---
.../org/apache/nutch/segment/SegmentReader.java | 4 +---
src/java/org/apache/nutch/tools/FreeGenerator.java | 4 +---
.../apache/nutch/tools/arc/ArcSegmentCreator.java | 5 ++---
.../org/apache/nutch/tools/warc/WARCExporter.java | 4 +---
.../apache/nutch/util/CrawlCompletionStats.java | 4 +---
src/java/org/apache/nutch/util/NutchJob.java | 24 ++++++++++++++++++++++
.../nutch/util/ProtocolStatusStatistics.java | 4 +---
.../org/apache/nutch/util/SitemapProcessor.java | 5 ++---
.../apache/nutch/util/domain/DomainStatistics.java | 5 ++---
30 files changed, 78 insertions(+), 126 deletions(-)
diff --git a/src/java/org/apache/nutch/crawl/CrawlDb.java b/src/java/org/apache/nutch/crawl/CrawlDb.java
index 5d91b0a..3819bb3 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDb.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDb.java
@@ -129,9 +129,8 @@ public class CrawlDb extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDb update job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDb update",
+ job);
LOG.error(message);
NutchJob.cleanupAfterFailure(outPath, lock, fs);
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
index 6fef03a..70c6513 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
@@ -144,9 +144,7 @@ public class CrawlDbMerger extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDbMerger job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDbMerger", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(outPath, lock, fs);
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
index f31210a..f5c782a 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
@@ -556,9 +556,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDbReader job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
LOG.error(message);
fileSystem.delete(tmpFolder, true);
throw new RuntimeException(message);
@@ -847,9 +845,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDbReader job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
LOG.error(message);
throw new RuntimeException(message);
}
@@ -959,9 +955,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDbReader job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
LOG.error(message);
fs.delete(tempDir, true);
throw new RuntimeException(message);
@@ -993,9 +987,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CrawlDbReader job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
LOG.error(message);
fs.delete(tempDir, true);
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/crawl/DeduplicationJob.java b/src/java/org/apache/nutch/crawl/DeduplicationJob.java
index 5f1172d..ae5ac37 100644
--- a/src/java/org/apache/nutch/crawl/DeduplicationJob.java
+++ b/src/java/org/apache/nutch/crawl/DeduplicationJob.java
@@ -331,9 +331,7 @@ public class DeduplicationJob extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Crawl job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Crawl", job);
LOG.error(message);
fs.delete(tempDir, true);
throw new RuntimeException(message);
@@ -365,9 +363,7 @@ public class DeduplicationJob extends NutchTool implements Tool {
try {
boolean success = mergeJob.waitForCompletion(true);
if (!success) {
- String message = "Crawl job did not succeed, job status:"
- + mergeJob.getStatus().getState() + ", reason: "
- + mergeJob.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Crawl", mergeJob);
LOG.error(message);
fs.delete(tempDir, true);
NutchJob.cleanupAfterFailure(outPath, lock, fs);
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 9fec0ec..7eee5e7 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -891,9 +891,7 @@ public class Generator extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Generator job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Generator", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempDir, lock, fs);
throw new RuntimeException(message);
@@ -969,9 +967,7 @@ public class Generator extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Generator job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Generator", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempDir, lock, fs);
NutchJob.cleanupAfterFailure(tempDir2, lock, fs);
@@ -1035,9 +1031,7 @@ public class Generator extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Generator job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Generator", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/crawl/Injector.java b/src/java/org/apache/nutch/crawl/Injector.java
index 84dc812..a3512f6 100644
--- a/src/java/org/apache/nutch/crawl/Injector.java
+++ b/src/java/org/apache/nutch/crawl/Injector.java
@@ -435,9 +435,7 @@ public class Injector extends NutchTool implements Tool {
// run the job
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Injector job did not succeed, job status: "
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Injector", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempCrawlDb, lock, fs);
// throw exception so that calling routine can exit with error
diff --git a/src/java/org/apache/nutch/crawl/LinkDb.java b/src/java/org/apache/nutch/crawl/LinkDb.java
index e53411f..2b3d2ed 100644
--- a/src/java/org/apache/nutch/crawl/LinkDb.java
+++ b/src/java/org/apache/nutch/crawl/LinkDb.java
@@ -224,9 +224,7 @@ public class LinkDb extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "LinkDb job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDb", job);
LOG.error(message);
LockUtil.removeLockFile(fs, lock);
throw new RuntimeException(message);
@@ -248,9 +246,7 @@ public class LinkDb extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "LinkDb job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDb", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(newLinkDb, lock, fs);
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/crawl/LinkDbMerger.java b/src/java/org/apache/nutch/crawl/LinkDbMerger.java
index 6b93329..f696c59 100644
--- a/src/java/org/apache/nutch/crawl/LinkDbMerger.java
+++ b/src/java/org/apache/nutch/crawl/LinkDbMerger.java
@@ -124,9 +124,7 @@ public class LinkDbMerger extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "LinkDbMerge job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDbMerge", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/crawl/LinkDbReader.java b/src/java/org/apache/nutch/crawl/LinkDbReader.java
index 4cacd81..2bcceee 100644
--- a/src/java/org/apache/nutch/crawl/LinkDbReader.java
+++ b/src/java/org/apache/nutch/crawl/LinkDbReader.java
@@ -183,9 +183,7 @@ public class LinkDbReader extends AbstractChecker implements Closeable {
try{
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "LinkDbRead job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDbRead", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/fetcher/Fetcher.java b/src/java/org/apache/nutch/fetcher/Fetcher.java
index 9456c58..7cc87f4 100644
--- a/src/java/org/apache/nutch/fetcher/Fetcher.java
+++ b/src/java/org/apache/nutch/fetcher/Fetcher.java
@@ -521,9 +521,7 @@ public class Fetcher extends NutchTool implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Fetcher job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Fetcher", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index f4bd742..930e62c 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.SegmentReaderUtil;
@@ -197,9 +198,7 @@ public class ReadHostDb extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "ReadHostDb job did not succeed, job status: "
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("ReadHostDb", job);
LOG.error(message);
// throw exception so that calling routine can exit with error
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/hostdb/UpdateHostDb.java b/src/java/org/apache/nutch/hostdb/UpdateHostDb.java
index c903822..ffa68d0 100644
--- a/src/java/org/apache/nutch/hostdb/UpdateHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/UpdateHostDb.java
@@ -132,9 +132,7 @@ public class UpdateHostDb extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "UpdateHostDb job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("UpdateHostDb", job);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempHostDb, lock, fs);
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/indexer/CleaningJob.java b/src/java/org/apache/nutch/indexer/CleaningJob.java
index ca1198e..dc3ed69 100644
--- a/src/java/org/apache/nutch/indexer/CleaningJob.java
+++ b/src/java/org/apache/nutch/indexer/CleaningJob.java
@@ -164,9 +164,7 @@ public class CleaningJob implements Tool {
try{
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "CleaningJob did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("CleaningJob", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/indexer/IndexingJob.java b/src/java/org/apache/nutch/indexer/IndexingJob.java
index 0fe29a7..ff46bc0 100644
--- a/src/java/org/apache/nutch/indexer/IndexingJob.java
+++ b/src/java/org/apache/nutch/indexer/IndexingJob.java
@@ -145,9 +145,7 @@ public class IndexingJob extends NutchTool implements Tool {
try{
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Indexing job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Indexing", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/parse/ParseSegment.java b/src/java/org/apache/nutch/parse/ParseSegment.java
index f7c5797..7e4707d 100644
--- a/src/java/org/apache/nutch/parse/ParseSegment.java
+++ b/src/java/org/apache/nutch/parse/ParseSegment.java
@@ -254,9 +254,7 @@ public class ParseSegment extends NutchTool implements Tool {
try{
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Parse job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Parse", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
index 733edbc..6e2c629 100644
--- a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
+++ b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
@@ -355,9 +355,8 @@ public class LinkDumper extends Configured implements Tool {
LOG.info("LinkDumper: running inverter");
boolean success = inverter.waitForCompletion(true);
if (!success) {
- String message = "LinkDumper inverter job did not succeed, job status:"
- + inverter.getStatus().getState() + ", reason: "
- + inverter.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDumper inverter",
+ inverter);
LOG.error(message);
throw new RuntimeException(message);
}
@@ -385,9 +384,8 @@ public class LinkDumper extends Configured implements Tool {
LOG.info("LinkDumper: running merger");
boolean success = merger.waitForCompletion(true);
if (!success) {
- String message = "LinkDumper merger job did not succeed, job status:"
- + merger.getStatus().getState() + ", reason: "
- + merger.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("LinkDumper merger",
+ merger);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
index 39a9c63..739fe6c 100644
--- a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
+++ b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
@@ -117,9 +117,8 @@ public class LinkRank extends Configured implements Tool {
try {
boolean success = counter.waitForCompletion(true);
if (!success) {
- String message = "Link counter job did not succeed, job status:"
- + counter.getStatus().getState() + ", reason: "
- + counter.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Link counter",
+ counter);
LOG.error(message);
throw new RuntimeException(message);
}
@@ -216,9 +215,8 @@ public class LinkRank extends Configured implements Tool {
try {
boolean success = initializer.waitForCompletion(true);
if (!success) {
- String message = "Initialization job did not succeed, job status:"
- + initializer.getStatus().getState() + ", reason: "
- + initializer.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Initialization",
+ initializer);
LOG.error(message);
throw new RuntimeException(message);
}
@@ -270,9 +268,7 @@ public class LinkRank extends Configured implements Tool {
try {
boolean success = inverter.waitForCompletion(true);
if (!success) {
- String message = "Inverter job did not succeed, job status:"
- + inverter.getStatus().getState() + ", reason: "
- + inverter.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Inverter", inverter);
LOG.error(message);
throw new RuntimeException(message);
}
@@ -334,9 +330,7 @@ public class LinkRank extends Configured implements Tool {
try {
boolean success = analyzer.waitForCompletion(true);
if (!success) {
- String message = "Analysis job did not succeed, job status:"
- + analyzer.getStatus().getState() + ", reason: "
- + analyzer.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Analysis", analyzer);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
index fc2875e..ede9fa1 100644
--- a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
+++ b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
@@ -349,9 +349,7 @@ public class NodeDumper extends Configured implements Tool {
LOG.info("NodeDumper: running");
boolean success = dumper.waitForCompletion(true);
if (!success) {
- String message = "NodeDumper job did not succeed, job status:"
- + dumper.getStatus().getState() + ", reason: "
- + dumper.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("NodeDumper", dumper);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
index 6cc604f..130e1b2 100644
--- a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
+++ b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
@@ -188,9 +188,8 @@ public class ScoreUpdater extends Configured implements Tool{
try {
boolean success = updater.waitForCompletion(true);
if (!success) {
- String message = "Update CrawlDb from WebGraph job did not succeed, job status:"
- + updater.getStatus().getState() + ", reason: "
- + updater.getStatus().getFailureInfo();
+ String message = NutchJob
+ .getJobFailureLogMessage("Update CrawlDb from WebGraph", updater);
LOG.error(message);
// remove the temp crawldb on error
FileSystem fs = newCrawlDb.getFileSystem(conf);
diff --git a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
index 5b7a3fd..63d0ead 100644
--- a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
+++ b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
@@ -603,9 +603,8 @@ public class WebGraph extends Configured implements Tool {
LOG.info("OutlinkDb: running");
boolean success = outlinkJob.waitForCompletion(true);
if (!success) {
- String message = "OutlinkDb job did not succeed, job status:"
- + outlinkJob.getStatus().getState() + ", reason: "
- + outlinkJob.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("OutlinkDb",
+ outlinkJob);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempOutlinkDb, lock, fs);
throw new RuntimeException(message);
@@ -651,9 +650,8 @@ public class WebGraph extends Configured implements Tool {
LOG.info("InlinkDb: running");
boolean success = inlinkJob.waitForCompletion(true);
if (!success) {
- String message = "InlinkDb job did not succeed, job status:"
- + inlinkJob.getStatus().getState() + ", reason: "
- + inlinkJob.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("InlinkDb",
+ inlinkJob);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempInlinkDb, lock, fs);
throw new RuntimeException(message);
@@ -698,9 +696,7 @@ public class WebGraph extends Configured implements Tool {
LOG.info("NodeDb: running");
boolean success = nodeJob.waitForCompletion(true);
if (!success) {
- String message = "NodeDb job did not succeed, job status:"
- + nodeJob.getStatus().getState() + ", reason: "
- + nodeJob.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("NodeDb", nodeJob);
LOG.error(message);
// remove lock file and and temporary directory if an error occurs
NutchJob.cleanupAfterFailure(tempNodeDb, lock, fs);
diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java
index 2270647..056df3c 100644
--- a/src/java/org/apache/nutch/segment/SegmentMerger.java
+++ b/src/java/org/apache/nutch/segment/SegmentMerger.java
@@ -732,9 +732,7 @@ public class SegmentMerger extends Configured implements Tool{
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "SegmentMerger job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("SegmentMerger", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java b/src/java/org/apache/nutch/segment/SegmentReader.java
index 2f2fefd..877aeb6 100644
--- a/src/java/org/apache/nutch/segment/SegmentReader.java
+++ b/src/java/org/apache/nutch/segment/SegmentReader.java
@@ -235,9 +235,7 @@ public class SegmentReader extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "SegmentReader job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("SegmentReader", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/tools/FreeGenerator.java b/src/java/org/apache/nutch/tools/FreeGenerator.java
index 57344bb..039bcca 100644
--- a/src/java/org/apache/nutch/tools/FreeGenerator.java
+++ b/src/java/org/apache/nutch/tools/FreeGenerator.java
@@ -218,9 +218,7 @@ public class FreeGenerator extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "FreeGenerator job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("FreeGenerator", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
index c0ebb2d..d95ba61 100644
--- a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
+++ b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
@@ -390,9 +390,8 @@ public class ArcSegmentCreator extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "ArcSegmentCreator job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("ArcSegmentCreator",
+ job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/tools/warc/WARCExporter.java b/src/java/org/apache/nutch/tools/warc/WARCExporter.java
index 6943008..cf000ba 100644
--- a/src/java/org/apache/nutch/tools/warc/WARCExporter.java
+++ b/src/java/org/apache/nutch/tools/warc/WARCExporter.java
@@ -474,9 +474,7 @@ public class WARCExporter extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "WARCExporter job did not succeed, job status:"
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("WARCExporter", job);
LOG.error(message);
throw new RuntimeException(message);
}
diff --git a/src/java/org/apache/nutch/util/CrawlCompletionStats.java b/src/java/org/apache/nutch/util/CrawlCompletionStats.java
index 621484c..7210ee8 100644
--- a/src/java/org/apache/nutch/util/CrawlCompletionStats.java
+++ b/src/java/org/apache/nutch/util/CrawlCompletionStats.java
@@ -170,9 +170,7 @@ public class CrawlCompletionStats extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = jobName + " job did not succeed, job status: "
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage(jobName, job);
LOG.error(message);
// throw exception so that calling routine can exit with error
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/util/NutchJob.java b/src/java/org/apache/nutch/util/NutchJob.java
index 3e852eb..478b24f 100644
--- a/src/java/org/apache/nutch/util/NutchJob.java
+++ b/src/java/org/apache/nutch/util/NutchJob.java
@@ -33,6 +33,8 @@ public class NutchJob extends Job {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
+ private static final String JOB_FAILURE_LOG_FORMAT = "%s job did not succeed, job id: %s, job status: %s, reason: %s";
+
@SuppressWarnings("deprecation")
public NutchJob(Configuration conf, String jobName) throws IOException {
super(conf, jobName);
@@ -87,4 +89,26 @@ public class NutchJob extends Job {
}
}
+ /**
+ * Method to return job failure log message. To be used across all Jobs
+ *
+ * @param name
+ * Name/Type of the job
+ * @param job
+ * Job Object for Job details
+ * @return job failure log message
+ * @throws IOException
+ * Can occur during fetching job status
+ * @throws InterruptedException
+ * Can occur during fetching job status
+ */
+ public static String getJobFailureLogMessage(String name, Job job)
+ throws IOException, InterruptedException {
+ if (job != null) {
+ return String.format(JOB_FAILURE_LOG_FORMAT, name, job.getJobID(),
+ job.getStatus().getState(), job.getStatus().getFailureInfo());
+ }
+ return "";
+ }
+
}
diff --git a/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java b/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java
index 213c1c2..2499da0 100644
--- a/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java
+++ b/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java
@@ -120,9 +120,7 @@ public class ProtocolStatusStatistics extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = jobName + " job did not succeed, job status: "
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage(jobName, job);
LOG.error(message);
// throw exception so that calling routine can exit with error
throw new RuntimeException(message);
diff --git a/src/java/org/apache/nutch/util/SitemapProcessor.java b/src/java/org/apache/nutch/util/SitemapProcessor.java
index 1a1955e..98f7df8 100644
--- a/src/java/org/apache/nutch/util/SitemapProcessor.java
+++ b/src/java/org/apache/nutch/util/SitemapProcessor.java
@@ -411,9 +411,8 @@ public class SitemapProcessor extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "SitemapProcessor_" + crawldb.toString()
- + " job did not succeed, job status: " + job.getStatus().getState()
- + ", reason: " + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("SitemapProcessor",
+ job);
LOG.error(message);
NutchJob.cleanupAfterFailure(tempCrawlDb, lock, fs);
// throw exception so that calling routine can exit with error
diff --git a/src/java/org/apache/nutch/util/domain/DomainStatistics.java b/src/java/org/apache/nutch/util/domain/DomainStatistics.java
index 0d789ed..638b6c9 100644
--- a/src/java/org/apache/nutch/util/domain/DomainStatistics.java
+++ b/src/java/org/apache/nutch/util/domain/DomainStatistics.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
@@ -140,9 +141,7 @@ public class DomainStatistics extends Configured implements Tool {
try {
boolean success = job.waitForCompletion(true);
if (!success) {
- String message = "Injector job did not succeed, job status: "
- + job.getStatus().getState() + ", reason: "
- + job.getStatus().getFailureInfo();
+ String message = NutchJob.getJobFailureLogMessage("Injector", job);
LOG.error(message);
// throw exception so that calling routine can exit with error
throw new RuntimeException(message);