You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/06/22 14:20:33 UTC
svn commit: r416346 [2/3] - in /lucene/nutch/trunk/src:
java/org/apache/nutch/analysis/ java/org/apache/nutch/clustering/
java/org/apache/nutch/crawl/ java/org/apache/nutch/fetcher/
java/org/apache/nutch/indexer/ java/org/apache/nutch/net/ java/org/apa...
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java Thu Jun 22 05:20:29 2006
@@ -66,8 +66,10 @@
ArrayList fieldNames = parseFieldNames(extension, "fields");
ArrayList rawFieldNames = parseFieldNames(extension, "raw-fields");
if (fieldNames.size() == 0 && rawFieldNames.size() == 0) {
- LOG.warn("QueryFilter: " + extension.getId()
- + " names no fields.");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("QueryFilter: " + extension.getId()
+ + " names no fields.");
+ }
continue;
}
filters[i] = (QueryFilter) extension.getExtensionInstance();
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java Thu Jun 22 05:20:29 2006
@@ -48,10 +48,12 @@
.getExtensionPoint(Summarizer.X_POINT_ID)
.getExtensions();
summarizer = (Summarizer) extensions[0].getExtensionInstance();
- LOG.info("Using the first summarizer extension found: " +
- extensions[0].getId());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Using the first summarizer extension found: " +
+ extensions[0].getId());
+ }
} catch (Exception e) {
- LOG.warn(e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(e.toString()); }
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Thu Jun 22 05:20:29 2006
@@ -316,8 +316,9 @@
if (conf.getBoolean("segment.merger.filter", false))
filters = new URLFilters(conf);
sliceSize = conf.getLong("segment.merger.slice", -1);
- if (sliceSize > 0)
+ if ((sliceSize > 0) && (LOG.isInfoEnabled())) {
LOG.info("Slice size: " + sliceSize + " URLs.");
+ }
}
public void close() throws IOException {
@@ -337,7 +338,9 @@
return;
}
} catch (Exception e) {
- LOG.warn("Cannot filter key " + key + ": " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Cannot filter key " + key + ": " + e.getMessage());
+ }
}
}
output.collect(key, value);
@@ -531,9 +534,11 @@
public void merge(Path out, Path[] segs, boolean filter, long slice) throws Exception {
String segmentName = Generator.generateSegmentName();
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName);
+ }
JobConf job = new JobConf(getConf());
job.setJobName("mergesegs " + out + "/" + segmentName);
- LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName);
job.setBoolean("segment.merger.filter", filter);
job.setLong("segment.merger.slice", slice);
job.set("segment.merger.segmentName", segmentName);
@@ -547,11 +552,15 @@
boolean pt = true;
for (int i = 0; i < segs.length; i++) {
if (!fs.exists(segs[i])) {
- LOG.warn("SegmentMerger: input dir " + segs[i] + " doesn't exist, skipping.");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Input dir " + segs[i] + " doesn't exist, skipping.");
+ }
segs[i] = null;
continue;
}
- LOG.info("SegmentMerger: adding " + segs[i]);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("SegmentMerger: adding " + segs[i]);
+ }
Path cDir = new Path(segs[i], Content.DIR_NAME);
Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME);
@@ -572,7 +581,9 @@
if (p) sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
if (pd) sb.append(" " + ParseData.DIR_NAME);
if (pt) sb.append(" " + ParseText.DIR_NAME);
- LOG.info("SegmentMerger: using segment data from:" + sb.toString());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("SegmentMerger: using segment data from:" + sb.toString());
+ }
for (int i = 0; i < segs.length; i++) {
if (segs[i] == null) continue;
if (g) {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Thu Jun 22 05:20:29 2006
@@ -155,7 +155,7 @@
dump.append("\nParseData::\n").append(((ParseData) value).toString());
} else if (value instanceof ParseText) {
dump.append("\nParseText::\n").append(((ParseText) value).toString());
- } else {
+ } else if (LOG.isWarnEnabled()) {
LOG.warn("Unrecognized type: " + value.getClass());
}
}
@@ -163,7 +163,10 @@
}
public void dump(Path segment, Path output) throws IOException {
- LOG.info("SegmentReader: dump segment: " + segment);
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("SegmentReader: dump segment: " + segment);
+ }
JobConf job = createJobConf();
job.setJobName("read " + segment);
@@ -208,8 +211,11 @@
try {
currentRecordNumber = append(fs, job, partFile, writer, currentRecordNumber);
} catch (IOException exception) {
- LOG.warn("Couldn't copy the content of " + partFile.toString() + " into " + dumpFile.toString());
- LOG.warn(exception.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Couldn't copy the content of " + partFile.toString() +
+ " into " + dumpFile.toString());
+ LOG.warn(exception.getMessage());
+ }
}
}
} finally {
@@ -217,7 +223,7 @@
}
}
fs.delete(tempDir);
- LOG.info("SegmentReader: done");
+ if (LOG.isInfoEnabled()) { LOG.info("SegmentReader: done"); }
}
/** Appends two files and updates the Recno counter */
@@ -250,7 +256,7 @@
public void get(final Path segment, final UTF8 key, Writer writer,
final Map results) throws Exception {
- LOG.info("SegmentReader: get '" + key + "'");
+ if (LOG.isInfoEnabled()) { LOG.info("SegmentReader: get '" + key + "'"); }
ArrayList threads = new ArrayList();
if (co) threads.add(new Thread() {
public void run() {
@@ -323,7 +329,9 @@
while (it.hasNext()) {
if (((Thread)it.next()).isAlive()) cnt++;
}
- if (cnt > 0) LOG.debug("(" + cnt + " to retrieve)");
+ if ((cnt > 0) && (LOG.isDebugEnabled())) {
+ LOG.debug("(" + cnt + " to retrieve)");
+ }
} while (cnt > 0);
for (int i = 0; i < keys.length; i++) {
List res = (List)results.get(keys[i][0]);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java Thu Jun 22 05:20:29 2006
@@ -70,7 +70,9 @@
if (bean == null)
return;
- bean.LOG.info("request from " + request.getRemoteAddr());
+ if (bean.LOG.isInfoEnabled()) {
+ bean.LOG.info("request from " + request.getRemoteAddr());
+ }
Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
Integer.parseInt(request.getParameter("id")));
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java Thu Jun 22 05:20:29 2006
@@ -180,7 +180,7 @@
// Inc the number of pages, insert the page, and
// possibly print status.
//
- LOG.info(curURL);
+ if (LOG.isInfoEnabled()) { LOG.info(curURL); }
pages++;
//
@@ -208,14 +208,16 @@
* When parsing begins
*/
public void startDocument() {
- LOG.info("Begin parse");
+ if (LOG.isInfoEnabled()) { LOG.info("Begin parse"); }
}
/**
* When parsing ends
*/
public void endDocument() {
- LOG.info("Completed parse. Found " + pages + " pages.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Completed parse. Found " + pages + " pages.");
+ }
}
/**
@@ -236,25 +238,32 @@
* Emit the exception message
*/
public void error(SAXParseException spe) {
- LOG.fatal("Error: " + spe.toString() + ": " + spe.getMessage());
- spe.printStackTrace(LogUtil.getFatalStream(LOG));
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Error: " + spe.toString() + ": " + spe.getMessage());
+ spe.printStackTrace(LogUtil.getFatalStream(LOG));
+ }
}
/**
* Emit the exception message, with line numbers
*/
public void fatalError(SAXParseException spe) {
- LOG.fatal("Fatal err: " + spe.toString() + ": " + spe.getMessage());
- LOG.fatal("Last known line is " + location.getLineNumber() + ", column " + location.getColumnNumber());
- spe.printStackTrace(LogUtil.getFatalStream(LOG));
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Fatal err: " + spe.toString() + ": " + spe.getMessage());
+ LOG.fatal("Last known line is " + location.getLineNumber() +
+ ", column " + location.getColumnNumber());
+ spe.printStackTrace(LogUtil.getFatalStream(LOG));
+ }
}
/**
* Emit exception warning message
*/
public void warning(SAXParseException spe) {
- LOG.warn("Warning: " + spe.toString() + ": " + spe.getMessage());
- spe.printStackTrace(LogUtil.getFatalStream(LOG));
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Warning: " + spe.toString() + ": " + spe.getMessage());
+ spe.printStackTrace(LogUtil.getWarnStream(LOG));
+ }
}
}
@@ -279,7 +288,7 @@
skew, topicPattern);
reader.setContentHandler(rp);
reader.setErrorHandler(rp);
- LOG.info("skew = " + rp.hashSkew);
+ if (LOG.isInfoEnabled()) { LOG.info("skew = " + rp.hashSkew); }
//
// Open filtered text stream. The UTF8Filter makes sure that
@@ -291,8 +300,10 @@
InputSource is = new InputSource(in);
reader.parse(is);
} catch (Exception e) {
- LOG.fatal(e.toString());
- e.printStackTrace(LogUtil.getFatalStream(LOG));
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal(e.toString());
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
+ }
System.exit(0);
} finally {
in.close();
@@ -310,8 +321,10 @@
}
}
catch (Exception e) {
- LOG.fatal(e.toString());
- e.printStackTrace(LogUtil.getFatalStream(LOG));
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal(e.toString());
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
+ }
System.exit(0);
} finally {
in.close();
@@ -372,7 +385,9 @@
}
regExp = regExp.concat((String) topics.get(j));
regExp = regExp.concat(").*");
- LOG.info("Topic selection pattern = " + regExp);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Topic selection pattern = " + regExp);
+ }
topicPattern = Pattern.compile(regExp);
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java Thu Jun 22 05:20:29 2006
@@ -189,7 +189,9 @@
output.flush();
output.close();
} catch (Exception e) {
- LOG.warn("Error closing: " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Error closing: " + e.getMessage());
+ }
}
}
@@ -257,7 +259,9 @@
}
if (!dryrun) {
IndexReader.unlock(dir);
- LOG.debug(" - had to unlock index in " + dir);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(" - had to unlock index in " + dir);
+ }
}
}
reader = IndexReader.open(dir);
@@ -270,26 +274,35 @@
dir = FSDirectory.getDirectory(indexDirs[i], false);
if (IndexReader.isLocked(dir)) {
if (!unlock) {
- LOG.warn(dr + "Index " + indexDirs[i] + " is locked. Skipping...");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(dr + "Index " + indexDirs[i] + " is locked. Skipping...");
+ }
continue;
}
if (!dryrun) {
IndexReader.unlock(dir);
- LOG.debug(" - had to unlock index in " + dir);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(" - had to unlock index in " + dir);
+ }
}
}
IndexReader r = IndexReader.open(dir);
indexes.add(r);
numIdx++;
} catch (Exception e) {
- LOG.warn(dr + "Invalid index in " + indexDirs[i] + " - skipping...");
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(dr + "Invalid index in " + indexDirs[i] + " - skipping...");
+ }
}
}
if (indexes.size() == 0) throw new Exception("No input indexes.");
IndexReader[] readers = (IndexReader[])indexes.toArray(new IndexReader[0]);
reader = new MultiReader(readers);
}
- LOG.info(dr + "Opened " + numIdx + " index(es) with total " + reader.numDocs() + " documents.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(dr + "Opened " + numIdx + " index(es) with total " +
+ reader.numDocs() + " documents.");
+ }
searcher = new IndexSearcher(reader);
}
@@ -322,19 +335,27 @@
AllHitsCollector ahc = new AllHitsCollector(bits);
boolean doDelete = false;
for (int i = 0; i < queries.length; i++) {
- LOG.info(dr + "Processing query: " + queries[i].toString());
+ if (LOG.isInfoEnabled()) {
+ LOG.info(dr + "Processing query: " + queries[i].toString());
+ }
bits.clear();
try {
searcher.search(queries[i], ahc);
} catch (IOException e) {
- LOG.warn(dr + " - failed: " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(dr + " - failed: " + e.getMessage());
+ }
continue;
}
if (bits.cardinality() == 0) {
- LOG.info(dr + " - no matching documents.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(dr + " - no matching documents.");
+ }
continue;
}
- LOG.info(dr + " - found " + bits.cardinality() + " document(s).");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(dr + " - found " + bits.cardinality() + " document(s).");
+ }
// Now delete all matching documents
int docNum = -1, start = 0, cnt = 0;
// probably faster than looping sequentially through all index values?
@@ -355,11 +376,15 @@
cnt++;
}
} catch (Exception e) {
- LOG.warn(dr + " - failed to delete doc #" + docNum);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(dr + " - failed to delete doc #" + docNum);
+ }
}
start = docNum + 1;
}
- LOG.info(dr + " - deleted " + cnt + " document(s).");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(dr + " - deleted " + cnt + " document(s).");
+ }
}
// close checkers
if (checkers != null) {
@@ -370,20 +395,22 @@
try {
reader.close();
} catch (IOException e) {
- LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage());
+ }
}
}
public static void main(String[] args) throws Exception {
if (args.length == 0) {
usage();
- LOG.fatal("Missing arguments");
+ if (LOG.isFatalEnabled()) { LOG.fatal("Missing arguments"); }
return;
}
File idx = new File(args[0]);
if (!idx.isDirectory()) {
usage();
- LOG.fatal("Not a directory: " + idx);
+ if (LOG.isFatalEnabled()) { LOG.fatal("Not a directory: " + idx); }
return;
}
Vector paths = new Vector();
@@ -398,7 +425,7 @@
});
if (dirs == null || dirs.length == 0) {
usage();
- LOG.fatal("No indexes in " + idx);
+ if (LOG.isFatalEnabled()) { LOG.fatal("No indexes in " + idx); }
return;
}
for (int i = 0; i < dirs.length; i++) {
@@ -409,7 +436,9 @@
}
if (paths.size() == 0) {
usage();
- LOG.fatal("No indexes in " + idx + " or its subdirs.");
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("No indexes in " + idx + " or its subdirs.");
+ }
return;
}
}
@@ -432,7 +461,9 @@
dryrun = true;
} else {
usage();
- LOG.fatal("Unrecognized option: " + args[i]);
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Unrecognized option: " + args[i]);
+ }
return;
}
}
@@ -465,20 +496,26 @@
is = conf.getConfResourceAsInputStream(qPath);
}
if (is == null) {
- LOG.fatal("Can't load queries from " + qPath);
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Can't load queries from " + qPath);
+ }
return;
}
try {
queries = parseQueries(is);
} catch (Exception e) {
- LOG.fatal("Error parsing queries: " + e.getMessage());
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Error parsing queries: " + e.getMessage());
+ }
return;
}
try {
PruneIndexTool pit = new PruneIndexTool(indexes, queries, checkers, force, dryrun);
pit.run();
} catch (Exception e) {
- LOG.fatal("Error running PruneIndexTool: " + e.getMessage());
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Error running PruneIndexTool: " + e.getMessage());
+ }
return;
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java Thu Jun 22 05:20:29 2006
@@ -51,7 +51,9 @@
ERROR = Log.class.getMethod("error", new Class[] { Object.class });
FATAL = Log.class.getMethod("fatal", new Class[] { Object.class });
} catch(Exception e) {
- LOG.error("Cannot init log methods", e);
+ if (LOG.isErrorEnabled()) {
+ LOG.error("Cannot init log methods", e);
+ }
}
}
@@ -99,7 +101,9 @@
try {
method.invoke(logger, new Object[] { toString().trim() });
} catch (Exception e) {
- LOG.fatal("Cannot log with method [" + method + "]", e);
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Cannot log with method [" + method + "]", e);
+ }
}
reset();
scan = 0;
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java Thu Jun 22 05:20:29 2006
@@ -74,7 +74,9 @@
t.start();
}
Log l = LogFactory.getLog("org.apache.nutch.util");
- l.debug("ThreadPool created with " + numThreads + " threads.");
+ if (l.isDebugEnabled()) {
+ l.debug("ThreadPool created with " + numThreads + " threads.");
+ }
}
/**
@@ -130,6 +132,8 @@
public void shutdown() {
running = false;
Log l = LogFactory.getLog("org.apache.nutch.util");
- l.debug("ThreadPool shutting down.");
+ if (l.isDebugEnabled()) {
+ l.debug("ThreadPool shutting down.");
+ }
}
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/mime/MimeTypesReader.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/mime/MimeTypesReader.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/mime/MimeTypesReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/mime/MimeTypesReader.java Thu Jun 22 05:20:29 2006
@@ -68,7 +68,9 @@
Document document = builder.parse(new InputSource(stream));
types = visit(document);
} catch (Exception e) {
- logger.warn(e.toString() + " while loading mime-types");
+ if (logger.isWarnEnabled()) {
+ logger.warn(e.toString() + " while loading mime-types");
+ }
types = new MimeType[0];
}
return types;
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java Thu Jun 22 05:20:29 2006
@@ -128,13 +128,19 @@
try {
Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
if (lang == null) {
- logger.warn("Language not supported in Carrot2: " + lcode);
+ if (logger.isWarnEnabled()) {
+ logger.warn("Language not supported in Carrot2: " + lcode);
+ }
} else {
languageList.add(lang);
- logger.debug("Language loaded: " + lcode);
+ if (logger.isDebugEnabled()) {
+ logger.debug("Language loaded: " + lcode);
+ }
}
} catch (Throwable t) {
- logger.warn("Language could not be loaded: " + lcode, t);
+ if (logger.isWarnEnabled()) {
+ logger.warn("Language could not be loaded: " + lcode, t);
+ }
}
}
return new LingoLocalFilterComponent(
@@ -222,8 +228,10 @@
this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
}
- logger.info("Default language: " + defaultLanguage);
- logger.info("Enabled languages: " + Arrays.asList(languages));
+ if (logger.isInfoEnabled()) {
+ logger.info("Default language: " + defaultLanguage);
+ logger.info("Enabled languages: " + Arrays.asList(languages));
+ }
initialize();
}
Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java Thu Jun 22 05:20:29 2006
@@ -95,7 +95,9 @@
CCDeleteUnlicensedTool dd = new CCDeleteUnlicensedTool(readers);
int count = dd.deleteUnlicensed();
- LOG.info("CC: deleted "+count+" out of "+maxDoc);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CC: deleted "+count+" out of "+maxDoc);
+ }
dd.close();
}
}
Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java Thu Jun 22 05:20:29 2006
@@ -56,7 +56,9 @@
// index the license
String licenseUrl = metadata.get(CreativeCommons.LICENSE_URL);
if (licenseUrl != null) {
- LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
+ }
// add the entire license as cc:license=xxx
addFeature(doc, "license=" + licenseUrl);
@@ -99,7 +101,9 @@
addFeature(doc, feature);
}
} catch (MalformedURLException e) {
- LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
+ }
}
}
Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java Thu Jun 22 05:20:29 2006
@@ -76,13 +76,17 @@
// add license to metadata
if (licenseUrl != null) {
- LOG.info("CC: found "+licenseUrl+" in "+licenseLocation+" of "+base);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CC: found "+licenseUrl+" in "+licenseLocation+" of "+base);
+ }
metadata.add(CreativeCommons.LICENSE_URL, licenseUrl);
metadata.add(CreativeCommons.LICENSE_LOCATION, licenseLocation);
}
if (walker.workType != null) {
- LOG.info("CC: found "+walker.workType+" in "+base);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("CC: found "+walker.workType+" in "+base);
+ }
metadata.add(CreativeCommons.WORK_TYPE, walker.workType);
}
@@ -175,7 +179,9 @@
DocumentBuilder parser = FACTORY.newDocumentBuilder();
doc = parser.parse(new InputSource(new StringReader(comment)));
} catch (Exception e) {
- LOG.warn("CC: Failed to parse RDF in "+base+": "+e);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("CC: Failed to parse RDF in "+base+": "+e);
+ }
//e.printStackTrace();
return;
}
@@ -183,7 +189,7 @@
// check that root is rdf:RDF
NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
if (roots.getLength() != 1) {
- LOG.warn("CC: No RDF root in "+base);
+ if (LOG.isWarnEnabled()) { LOG.warn("CC: No RDF root in "+base); }
return;
}
Element rdf = (Element)roots.item(0);
@@ -217,7 +223,9 @@
// add object and predicate to metadata
// metadata.put(object, predicate);
- // LOG.info("CC: found: "+predicate+"="+object);
+ // if (LOG.isInfoEnabled()) {
+ // LOG.info("CC: found: "+predicate+"="+object);
+ // }
}
}
Modified: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Thu Jun 22 05:20:29 2006
@@ -75,7 +75,9 @@
doc.add(new Field("anchor", anchors[i], Field.Store.NO, Field.Index.TOKENIZED));
}
} catch (IOException ioe) {
- LOG.warn("BasicIndexingFilter: can't get anchors for " + url.toString());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("BasicIndexingFilter: can't get anchors for " + url.toString());
+ }
}
// title
Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Thu Jun 22 05:20:29 2006
@@ -155,9 +155,13 @@
"dd.MM.yyyy zzz"
});
time = parsedDate.getTime();
- // LOG.warn(url + ": parsed date: " + date +" to:"+time);
+ // if (LOG.isWarnEnabled()) {
+ // LOG.warn(url + ": parsed date: " + date +" to:"+time);
+ // }
} catch (Exception e2) {
- LOG.warn(url + ": can't parse erroneous date: " + date);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn(url + ": can't parse erroneous date: " + date);
+ }
}
}
return time;
@@ -194,7 +198,7 @@
try {
mimeType = new MimeType(contentType);
} catch (MimeTypeException e) {
- LOG.warn(url + e.toString());
+ if (LOG.isWarnEnabled()) { LOG.warn(url + e.toString()); }
mimeType = null;
}
}
Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java Thu Jun 22 05:20:29 2006
@@ -68,7 +68,7 @@
}
}
} catch (Exception e) {
- LOG.fatal(e.toString());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); }
}
}
Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java Thu Jun 22 05:20:29 2006
@@ -121,11 +121,13 @@
p.load(this.getClass().getResourceAsStream("langmappings.properties"));
Enumeration alllanguages = p.keys();
-
- LOG.info(new StringBuffer()
- .append("Language identifier configuration [")
- .append(minLength).append("-").append(maxLength)
- .append("/").append(analyzeLength).append("]").toString());
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info(new StringBuffer()
+ .append("Language identifier configuration [")
+ .append(minLength).append("-").append(maxLength)
+ .append("/").append(analyzeLength).append("]").toString());
+ }
StringBuffer list = new StringBuffer("Language identifier plugin supports:");
HashMap tmpIdx = new HashMap();
@@ -155,7 +157,7 @@
list.append(" " + lang + "(" + ngrams.size() + ")");
is.close();
} catch (IOException e1) {
- LOG.fatal(e1.toString());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e1.toString()); }
}
}
}
@@ -169,11 +171,11 @@
ngramsIdx.put(entry.getSeq(), array);
}
}
- LOG.info(list.toString());
+ if (LOG.isInfoEnabled()) { LOG.info(list.toString()); }
// Create the suspect profile
suspect = new NGramProfile("suspect", minLength, maxLength);
} catch (Exception e) {
- LOG.fatal(e.toString());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); }
}
}
Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java Thu Jun 22 05:20:29 2006
@@ -319,7 +319,7 @@
}
}
} catch (Exception e) {
- LOG.fatal(e.toString());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); }
}
return sum;
}
@@ -523,7 +523,7 @@
}
} catch (Exception e) {
- LOG.fatal("Caught an exception:" + e);
+ if (LOG.isFatalEnabled()) { LOG.fatal("Caught an exception:" + e); }
}
}
Modified: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original)
+++ lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Thu Jun 22 05:20:29 2006
@@ -178,7 +178,9 @@
}
} catch (Throwable e) {
// XXX Maybe bogus: assume this is allowed.
- logger.trace("Exception checking robot rules for " + url + ": " + e);
+ if (logger.isTraceEnabled()) {
+ logger.trace("Exception checking robot rules for " + url + ": " + e);
+ }
}
String host = blockAddr(u);
@@ -231,10 +233,10 @@
// handle this in the higher layer.
return new ProtocolOutput(c, new ProtocolStatus(protocolStatusCode, u));
} else if (code == 400) { // bad request, mark as GONE
- logger.trace("400 Bad request: " + u);
+ if (logger.isTraceEnabled()) { logger.trace("400 Bad request: " + u); }
return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.GONE, u));
} else if (code == 401) { // requires authorization, but no valid auth provided.
- logger.trace("401 Authentication Required");
+ if (logger.isTraceEnabled()) { logger.trace("401 Authentication Required"); }
return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.ACCESS_DENIED, "Authentication required: "
+ urlString));
} else if (code == 404) {
@@ -392,8 +394,12 @@
String agentURL,
String agentEmail) {
- if ( (agentName == null) || (agentName.trim().length() == 0) )
- LOGGER.fatal("No User-Agent string set (http.agent.name)!");
+ if ( (agentName == null) || (agentName.trim().length() == 0) ) {
+ // TODO : NUTCH-258
+ if (LOGGER.isFatalEnabled()) {
+ LOGGER.fatal("No User-Agent string set (http.agent.name)!");
+ }
+ }
StringBuffer buf= new StringBuffer();
@@ -428,17 +434,20 @@
}
protected void logConf() {
- logger.info("http.proxy.host = " + proxyHost);
- logger.info("http.proxy.port = " + proxyPort);
- logger.info("http.timeout = " + timeout);
- logger.info("http.content.limit = " + maxContent);
- logger.info("http.agent = " + userAgent);
- logger.info("fetcher.server.delay = " + serverDelay);
- logger.info("http.max.delays = " + maxDelays);
+ if (logger.isInfoEnabled()) {
+ logger.info("http.proxy.host = " + proxyHost);
+ logger.info("http.proxy.port = " + proxyPort);
+ logger.info("http.timeout = " + timeout);
+ logger.info("http.content.limit = " + maxContent);
+ logger.info("http.agent = " + userAgent);
+ logger.info("fetcher.server.delay = " + serverDelay);
+ logger.info("http.max.delays = " + maxDelays);
+ }
}
public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException {
- LOGGER.trace("uncompressing....");
+
+ if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
byte[] content = GZIPUtils.unzipBestEffort(compressed, getMaxContent());
Modified: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java (original)
+++ lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java Thu Jun 22 05:20:29 2006
@@ -206,11 +206,15 @@
//
if (agents.size() == 0) {
agents.add(agentName);
- LOG.fatal("No agents listed in 'http.robots.agents' property!");
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("No agents listed in 'http.robots.agents' property!");
+ }
} else if (!((String)agents.get(0)).equalsIgnoreCase(agentName)) {
agents.add(0, agentName);
- LOG.fatal("Agent we advertise (" + agentName
- + ") not listed first in 'http.robots.agents' property!");
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("Agent we advertise (" + agentName
+ + ") not listed first in 'http.robots.agents' property!");
+ }
}
setRobotNames((String[]) agents.toArray(new String[agents.size()]));
}
@@ -320,7 +324,9 @@
try {
path= URLDecoder.decode(path, CHARACTER_ENCODING);
} catch (Exception e) {
- LOG.warn("error parsing robots rules- can't decode path: " + path);
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("error parsing robots rules- can't decode path: " + path);
+ }
}
if (path.length() == 0) { // "empty rule"
@@ -388,7 +394,7 @@
RobotRuleSet robotRules = (RobotRuleSet)CACHE.get(host);
if (robotRules == null) { // cache miss
- LOG.trace("cache miss " + url);
+ if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
try {
Response response = http.getResponse(new URL(url, "/robots.txt"),
new CrawlDatum(), true);
@@ -400,7 +406,9 @@
else
robotRules = EMPTY_RULES; // use default rules
} catch (Throwable t) {
- LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
+ }
robotRules = EMPTY_RULES;
}
Modified: lucene/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java (original)
+++ lucene/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java Thu Jun 22 05:20:29 2006
@@ -134,12 +134,12 @@
String file = getRulesFile(conf);
Reader reader = conf.getConfResourceAsReader(file);
if (reader == null) {
- LOG.fatal("Can't find resource: " + file);
+ if (LOG.isFatalEnabled()) { LOG.fatal("Can't find resource: " + file); }
} else {
try {
rules = readRulesFile(reader);
} catch (IOException e) {
- LOG.fatal(e.getMessage());
+ if (LOG.isFatalEnabled()) { LOG.fatal(e.getMessage()); }
//TODO mb@media-style.com: throw Exception? Because broken api.
throw new RuntimeException(e.getMessage(), e);
}
@@ -187,7 +187,7 @@
}
String regex = line.substring(1);
- LOG.trace("Adding rule [" + regex + "]");
+ if (LOG.isTraceEnabled()) { LOG.trace("Adding rule [" + regex + "]"); }
RegexRule rule = createRule(sign, regex);
rules.add(rule);
}
Modified: lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/jena/OntologyImpl.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/jena/OntologyImpl.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/jena/OntologyImpl.java (original)
+++ lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/jena/OntologyImpl.java Thu Jun 22 05:20:29 2006
@@ -18,6 +18,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.ontology.*;
+import org.apache.nutch.util.LogUtil;
import org.apache.nutch.util.NutchConfiguration;
import com.hp.hpl.jena.ontology.Individual;
@@ -71,7 +72,7 @@
//only initialize all the static variables
//if first time called to this ontology constructor
if (ontology == null) {
- LOG.info( "creating new ontology");
+ if (LOG.isInfoEnabled()) { LOG.info( "creating new ontology"); }
parser = new OwlParser();
ontology = this;
}
@@ -101,14 +102,13 @@
private void load (Object m, String url) {
try {
- LOG.info( "reading "+url);
+ if (LOG.isInfoEnabled()) { LOG.info( "reading "+url); }
((OntModel)m).read(url);
} catch (Exception e) {
- LOG.fatal("failed on attempting to read ontology "+url);
- LOG.fatal(e.getMessage());
- StackTraceElement[] traces = e.getStackTrace();
- for (int i=0; i<traces.length; i++) {
- LOG.fatal(traces[i].toString());
+ if (LOG.isFatalEnabled()) {
+ LOG.fatal("failed on attempting to read ontology "+url);
+ LOG.fatal(e.getMessage());
+ e.printStackTrace(LogUtil.getFatalStream(LOG));
}
}
}
@@ -332,11 +332,11 @@
String urls = conf.get("extension.ontology.urls");
if (urls==null || urls.trim().equals("")) {
- LOG.fatal("No ontology url found.");
+ if (LOG.isFatalEnabled()) { LOG.fatal("No ontology url found."); }
return;
}
ontology.load(urls.split("\\s+"));
- LOG.info( "created new ontology");
+ if (LOG.isInfoEnabled()) { LOG.info( "created new ontology"); }
for (Iterator i = getParser().rootClasses( getModel() );
i.hasNext(); ) {
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Thu Jun 22 05:20:29 2006
@@ -126,7 +126,9 @@
metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding);
if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
- LOG.trace(base + ": setting encoding to " + encoding);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(base + ": setting encoding to " + encoding);
+ }
}
}
@@ -137,7 +139,9 @@
metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding);
if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
- LOG.trace(base + ": setting encoding to " + encoding);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(base + ": setting encoding to " + encoding);
+ }
}
}
}
@@ -151,10 +155,12 @@
// same share)
encoding = defaultCharEncoding;
metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, defaultCharEncoding);
- LOG.trace(base + ": falling back to " + defaultCharEncoding);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(base + ": falling back to " + defaultCharEncoding);
+ }
}
input.setEncoding(encoding);
- LOG.trace("Parsing...");
+ if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); }
root = parse(input);
} catch (IOException e) {
return new ParseStatus(e).getEmptyParse(getConf());
@@ -169,15 +175,17 @@
// get meta directives
HTMLMetaProcessor.getMetaTags(metaTags, root, base);
- LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
+ }
// check meta directives
if (!metaTags.getNoIndex()) { // okay to index
StringBuffer sb = new StringBuffer();
- LOG.trace("Getting text...");
+ if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); }
utils.getText(sb, root); // extract text
text = sb.toString();
sb.setLength(0);
- LOG.trace("Getting title...");
+ if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); }
utils.getTitle(sb, root); // extract title
title = sb.toString().trim();
}
@@ -185,10 +193,12 @@
if (!metaTags.getNoFollow()) { // okay to follow links
ArrayList l = new ArrayList(); // extract outlinks
URL baseTag = utils.getBase(root);
- LOG.trace("Getting links...");
+ if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); }
utils.getOutlinks(baseTag!=null?baseTag:base, l, root);
outlinks = (Outlink[])l.toArray(new Outlink[l.size()]);
- LOG.trace("found "+outlinks.length+" outlinks in "+content.getUrl());
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("found "+outlinks.length+" outlinks in "+content.getUrl());
+ }
}
if (!metaTags.getNoCache()) { // okay to cache
@@ -256,7 +266,9 @@
frag = doc.createDocumentFragment();
parser.parse(input, frag);
if (!frag.hasChildNodes()) break;
- LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
+ if (LOG.isInfoEnabled()) {
+ LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
+ }
res.appendChild(frag);
}
} catch (Exception x) { x.printStackTrace(LogUtil.getWarnStream(LOG));};
Modified: lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java Thu Jun 22 05:20:29 2006
@@ -87,7 +87,9 @@
if (i > 0) script.append('\n');
script.append(nn.item(i).getNodeValue());
}
- //LOG.info("script: language=" + lang + ", text: " + script.toString());
+ // if (LOG.isInfoEnabled()) {
+ // LOG.info("script: language=" + lang + ", text: " + script.toString());
+ // }
Outlink[] links = getJSLinks(script.toString(), base, base);
if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
// no other children of interest here, go one level up.
@@ -164,7 +166,7 @@
try {
baseURL = new URL(base);
} catch (Exception e) {
- LOG.error("getJSLinks", e);
+ if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", e); }
}
try {
@@ -189,20 +191,22 @@
url = result.group(2);
PatternMatcherInput input1 = new PatternMatcherInput(url);
if (!matcher1.matches(input1, pattern1)) {
- //LOG.trace(" - invalid '" + url + "'");
+ //if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" + url + "'"); }
continue;
}
if (url.startsWith("www.")) {
url = "http://" + url;
} else url = new URL(baseURL, url).toString();
url = url.replaceAll("&", "&");
- LOG.trace(" - outlink from JS: '" + url + "'");
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(" - outlink from JS: '" + url + "'");
+ }
outlinks.add(new Outlink(url, anchor, getConf()));
}
} catch (Exception ex) {
// if it is a malformed URL we just throw it away and continue with
// extraction.
- LOG.error("getJSLinks", ex);
+ if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", ex); }
}
final Outlink[] retval;
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java Thu Jun 22 05:20:29 2006
@@ -65,8 +65,10 @@
if (event == null || event.getName() == null
|| !event.getName().startsWith(PPTConstants.POWERPOINT_DOCUMENT)) {
- LOG.warn("Stream not processed. It is not a PowerPoint document: : "
- + event.getName());
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Stream not processed. It is not a PowerPoint document: : "
+ + event.getName());
+ }
return;
}
@@ -108,14 +110,22 @@
containerTextBox = extractTextBoxes(containerTextBox, offset,
pptdata, offsetPD);
} else if (PPTConstants.PPT_ATOM_DRAWINGGROUP == type) {
- // LOG.trace("PPT_DRAWINGGROUP_ATOM ignored: " + type);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("PPT_DRAWINGGROUP_ATOM ignored: " + type);
+ // }
} else if (PPTConstants.PPT_ATOM_TEXTBYTE == type) {
- // LOG.trace("PPT_TEXTBYTE_ATOM ignored: " + type);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("PPT_TEXTBYTE_ATOM ignored: " + type);
+ // }
} else if (PPTConstants.PPT_ATOM_TEXTCHAR == type) {
- // LOG.trace("PPT_TEXTCHAR_ATOM ignored: " + type);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("PPT_TEXTCHAR_ATOM ignored: " + type);
+ // }
} else {
// no action
- // LOG.trace("type not handled: " + type);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("type not handled: " + type);
+ // }
}
}
@@ -123,7 +133,7 @@
offsetPD);
if (slides.size() == 0) {
- LOG.info("No slides extracted!");
+ if (LOG.isInfoEnabled()) { LOG.info("No slides extracted!"); }
} else {
Slide slide = (Slide) slides.get(slides.size() - 1);
@@ -158,7 +168,7 @@
}
} catch (Throwable ex) {
// because of not killing complete crawling all Throwables are catched.
- LOG.error("processPOIFSReaderEvent", ex);
+ if (LOG.isErrorEnabled()) { LOG.error("processPOIFSReaderEvent", ex); }
}
}
@@ -205,7 +215,7 @@
if (currentID == PPTConstants.PPT_MASTERSLIDE) {
// Ignore Master Slide objects
- LOG.trace("Ignore master slide.");
+ if (LOG.isTraceEnabled()) { LOG.trace("Ignore master slide."); }
i++;
continue;
}
@@ -226,8 +236,10 @@
*/
if ((offsetPD - 20) != recordSize) {
// TODO something wrong? Probably an OLE-Object, which we ignore.
- LOG.debug("offsetPD - 20=" + (offsetPD - 20) + " recordsize="
- + recordSize);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("offsetPD - 20=" + (offsetPD - 20) + " recordsize="
+ + recordSize);
+ }
} else {
for (int startPos = i + 8; startPos < offsetPD - 20
&& startPos < recordSize; startPos++) { // && startPos <
@@ -290,22 +302,24 @@
} else {
// ignored
- // LOG.trace("Ignored atom type: " + type);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("Ignored atom type: " + type);
+ // }
}
} catch (Throwable e) {
- LOG.error("extractTextBoxes", e);
+ if (LOG.isErrorEnabled()) { LOG.error("extractTextBoxes", e); }
break;
}
}
}
} else {
- /*
- * Record type is ignored
- */
- // LOG.trace("Ignored record type: " + type);
+ // Record type is ignored
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("Ignored record type: " + type);
+ // }
}
} catch (Throwable ee) {
- LOG.error("extractClientTextBoxes", ee);
+ if (LOG.isErrorEnabled()) { LOG.error("extractClientTextBoxes", ee); }
break;
}
}
@@ -355,8 +369,8 @@
byte value = pptdata[(int) ii + 2];
outStream.write(value);
} catch (ArrayIndexOutOfBoundsException ex) {
- LOG.trace("size=" + pptdata.length);
- LOG.error("extractSlides", ex);
+ if (LOG.isTraceEnabled()) { LOG.trace("size=" + pptdata.length); }
+ if (LOG.isErrorEnabled()) { LOG.error("extractSlides", ex); }
}
}
@@ -401,11 +415,13 @@
/*
* Diagram records are ignored
*/
- LOG.trace("Drawing Groups are ignored.");
+ if (LOG.isTraceEnabled()) { LOG.trace("Drawing Groups are ignored."); }
break;
} else {
// ignored
- // LOG.trace("Unhandled atomType: " + atomType);
+ // if (LOG.isTraceEnabled()) {
+ // LOG.trace("Unhandled atomType: " + atomType);
+ // }
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java Thu Jun 22 05:20:29 2006
@@ -47,7 +47,7 @@
input.reset();
if (input.available() > 0) {
this.reader.read(input);
- } else {
+ } else if (LOG.isWarnEnabled()) {
LOG.warn("Input <=0 :" + input.available());
}
return (this.text != null) ? text.toString() : null;
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java Thu Jun 22 05:20:29 2006
@@ -125,8 +125,10 @@
return new ParseStatus(ParseStatus.FAILED,
"Can't decrypt document - invalid password. " + e).getEmptyParse(getConf());
} catch (Exception e) { // run time exception
- LOG.warn("General exception in PDF parser: "+e.getMessage());
- e.printStackTrace(LogUtil.getWarnStream(LOG));
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("General exception in PDF parser: "+e.getMessage());
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ }
return new ParseStatus(ParseStatus.FAILED,
"Can't be handled as pdf document. " + e).getEmptyParse(getConf());
} finally {
Modified: lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java Thu Jun 22 05:20:29 2006
@@ -95,8 +95,10 @@
theRSSChannels = ((FeedParserListenerImpl) listener).getChannels();
} catch (Exception e) { // run time exception
- e.printStackTrace(LogUtil.getWarnStream(LOG));
- LOG.trace("nutch:parse-rss:RSSParser Exception: " + e.getMessage());
+ if (LOG.isWarnEnabled()) {
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ LOG.warn("nutch:parse-rss:RSSParser Exception: " + e.getMessage());
+ }
return new ParseStatus(ParseStatus.FAILED,
"Can't be handled as rss document. " + e).getEmptyParse(getConf());
}
@@ -130,10 +132,11 @@
theOutlinks.add(new Outlink(r.getLink(), "", getConf()));
}
} catch (MalformedURLException e) {
- LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
- + r.getLink()
- + ": Attempting to continue processing outlinks");
- e.printStackTrace(LogUtil.getWarnStream(LOG));
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("MalformedURL: " + r.getLink());
+ LOG.warn("Attempting to continue processing outlinks");
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ }
continue;
}
}
@@ -160,10 +163,11 @@
theOutlinks.add(new Outlink(whichLink, "", getConf()));
}
} catch (MalformedURLException e) {
- LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
- + whichLink
- + ": Attempting to continue processing outlinks");
- e.printStackTrace(LogUtil.getWarnStream(LOG));
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("MalformedURL: " + whichLink);
+ LOG.warn("Attempting to continue processing outlinks");
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ }
continue;
}
}
@@ -172,18 +176,24 @@
}
- LOG.trace("nutch:parse-rss:getParse:indexText=" + indexText);
- LOG.trace("nutch:parse-rss:getParse:contentTitle=" + contentTitle);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("nutch:parse-rss:getParse:indexText=" + indexText);
+ LOG.trace("nutch:parse-rss:getParse:contentTitle=" + contentTitle);
+ }
- } else {
+ } else if (LOG.isTraceEnabled()) {
LOG.trace("nutch:parse-rss:Error:getParse: No RSS Channels recorded!");
}
// format the outlinks
Outlink[] outlinks = (Outlink[]) theOutlinks.toArray(new Outlink[theOutlinks.size()]);
- LOG.trace("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks");
- // LOG.info("Outlinks: "+outlinks);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks");
+ }
+ // if (LOG.isInfoEnabled()) {
+ // LOG.info("Outlinks: "+outlinks);
+ // }
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
contentTitle.toString(), outlinks, content.getMetadata());
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Thu Jun 22 05:20:29 2006
@@ -62,7 +62,7 @@
try {
final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
final int len = Integer.parseInt(contentLen);
- LOG.debug("ziplen: " + len);
+ if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); }
final byte[] contentInBytes = content.getContent();
final ByteArrayInputStream bainput = new ByteArrayInputStream(
contentInBytes);
@@ -101,7 +101,7 @@
content.getMetadata());
parseData.setConf(this.conf);
- LOG.trace("Zip file parsed sucessfully !!");
+ if (LOG.isTraceEnabled()) { LOG.trace("Zip file parsed sucessfully !!"); }
return new ParseImpl(resultText, parseData);
}
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java Thu Jun 22 05:20:29 2006
@@ -106,8 +106,9 @@
resultText += entry.getName() + " " + parse.getText() + " ";
} catch (ParseException e) {
-
- LOG.info("fetch okay, but can't parse " + fname + ", reason: " + e.getMessage());
+ if (LOG.isInfoEnabled()) {
+ LOG.info("fetch okay, but can't parse " + fname + ", reason: " + e.getMessage());
+ }
}
}
}
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Thu Jun 22 05:20:29 2006
@@ -101,7 +101,9 @@
}
if (url.getPath() != url.getFile()) {
- File.LOG.warn("url.getPath() != url.getFile(): " + url);
+ if (File.LOG.isWarnEnabled()) {
+ File.LOG.warn("url.getPath() != url.getFile(): " + url);
+ }
}
String path = "".equals(url.getPath()) ? "/" : url.getPath();
@@ -179,8 +181,11 @@
&& (n = is.read(this.content, offset, len-offset)) >= 0) {
offset += n;
}
- if (offset < len) // keep whatever already have, but issue a warning
- File.LOG.warn("not enough bytes read from file: "+f.getPath());
+ if (offset < len) { // keep whatever already have, but issue a warning
+ if (File.LOG.isWarnEnabled()) {
+ File.LOG.warn("not enough bytes read from file: "+f.getPath());
+ }
+ }
is.close();
// set headers
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Thu Jun 22 05:20:29 2006
@@ -28,6 +28,7 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.util.LogUtil;
import org.apache.hadoop.conf.Configuration;
@@ -92,15 +93,20 @@
if (!"ftp".equals(url.getProtocol()))
throw new FtpException("Not a ftp url:" + url);
- if (url.getPath() != url.getFile())
- Ftp.LOG.warn("url.getPath() != url.getFile(): " + url);
+ if (url.getPath() != url.getFile()) {
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("url.getPath() != url.getFile(): " + url);
+ }
+ }
String path = "".equals(url.getPath()) ? "/" : url.getPath();
try {
if (ftp.followTalk) {
- Ftp.LOG.info("fetching "+url);
+ if (Ftp.LOG.isInfoEnabled()) {
+ Ftp.LOG.info("fetching "+url);
+ }
} else {
if (Ftp.LOG.isTraceEnabled()) {
Ftp.LOG.trace("fetching "+url);
@@ -113,14 +119,17 @@
// should start anew.
if (ftp.client != null && ftp.keepConnection
&& ftp.renewalTime < System.currentTimeMillis()) {
- Ftp.LOG.info("delete client because idled too long");
+ if (Ftp.LOG.isInfoEnabled()) {
+ Ftp.LOG.info("delete client because idled too long");
+ }
ftp.client = null;
}
// start anew if needed
if (ftp.client == null) {
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("start client");
+ }
// the real client
ftp.client = new Client();
// when to renew, take the lesser
@@ -142,9 +151,10 @@
if (ftp.client.isConnected()) {
InetAddress remoteAddress = ftp.client.getRemoteAddress();
if (!addr.equals(remoteAddress)) {
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("disconnect from "+remoteAddress
+" before connect to "+addr);
+ }
// quit from current site
ftp.client.logout();
ftp.client.disconnect();
@@ -154,20 +164,24 @@
// connect to current site if needed
if (!ftp.client.isConnected()) {
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("connect to "+addr);
+ }
ftp.client.connect(addr);
if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
ftp.client.disconnect();
- Ftp.LOG.warn("ftp.client.connect() failed: "
- + addr + " " + ftp.client.getReplyString());
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("ftp.client.connect() failed: "
+ + addr + " " + ftp.client.getReplyString());
+ }
this.code = 500; // http Internal Server Error
return;
}
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("log into "+addr);
+ }
if (!ftp.client.login(ftp.userName, ftp.passWord)) {
// login failed.
@@ -176,7 +190,9 @@
// but throw exception, which then will be handled by caller
// (not dealt with here at all) .
ftp.client.disconnect();
- Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+ }
this.code = 401; // http Unauthorized
return;
}
@@ -185,13 +201,16 @@
if (!ftp.client.setFileType(FTP.BINARY_FILE_TYPE)) {
ftp.client.logout();
ftp.client.disconnect();
- Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+ }
this.code = 500; // http Internal Server Error
return;
}
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("set parser for "+addr);
+ }
// SYST is valid only after login
try {
@@ -203,18 +222,24 @@
ftp.parser = (new DefaultFTPFileEntryParserFactory())
.createFileEntryParser(parserKey);
} catch (FtpExceptionBadSystResponse e) {
- Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+ }
ftp.parser = null;
} catch (ParserInitializationException e) {
// ParserInitializationException is RuntimeException defined in
// org.apache.commons.net.ftp.parser.ParserInitializationException
- Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+ }
ftp.parser = null;
} finally {
if (ftp.parser == null) {
// do not log as severe, otherwise
// FetcherThread/RequestScheduler will abort
- Ftp.LOG.warn("ftp.parser is null: "+addr);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("ftp.parser is null: "+addr);
+ }
ftp.client.logout();
ftp.client.disconnect();
this.code = 500; // http Internal Server Error
@@ -223,8 +248,9 @@
}
} else {
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("use existing connection");
+ }
}
this.content = null;
@@ -239,30 +265,32 @@
if (ftp.client != null && ftp.keepConnection) {
ftp.renewalTime = System.currentTimeMillis()
+ ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("reset renewalTime to "
+ftp.httpDateFormat.toString(ftp.renewalTime));
+ }
}
// getDirAsHttpResponse() or getFileAsHttpResponse() above
// may have deleted ftp.client
if (ftp.client != null && !ftp.keepConnection) {
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("disconnect from "+addr);
+ }
ftp.client.logout();
ftp.client.disconnect();
}
} catch (Exception e) {
- ftp.LOG.warn(""+e);
- StackTraceElement stes[] = e.getStackTrace();
- for (int i=0; i<stes.length; i++) {
- ftp.LOG.warn(" "+stes[i].toString());
+ if (ftp.LOG.isWarnEnabled()) {
+ ftp.LOG.warn(""+e);
+ e.printStackTrace(LogUtil.getWarnStream(ftp.LOG));
}
// for any un-foreseen exception (run time exception or not),
// do ultimate clean and leave ftp.client for garbage collection
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("delete client due to exception");
+ }
ftp.client = null;
// or do explicit garbage collection?
// System.gc();
@@ -313,17 +341,20 @@
// control connection is off, clean up
// ftp.client.disconnect();
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("delete client because server cut off control channel: "+e);
+ }
ftp.client = null;
// in case this FtpExceptionControlClosedByForcedDataClose is
// thrown by retrieveList() (not retrieveFile()) above,
if (os == null) { // indicating throwing by retrieveList()
//throw new FtpException("fail to get attibutes: "+path);
- Ftp.LOG.warn(
- "Please try larger maxContentLength for ftp.client.retrieveList(). "
- + e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn(
+ "Please try larger maxContentLength for ftp.client.retrieveList(). "
+ + e);
+ }
// in a way, this is our request fault
this.code = 400; // http Bad request
return;
@@ -360,10 +391,12 @@
} catch (FtpExceptionUnknownForcedDataClose e) {
// Please note control channel is still live.
// in a way, this is our request fault
- Ftp.LOG.warn(
- "Unrecognized reply after forced close of data channel. "
- + "If this is acceptable, please modify Client.java accordingly. "
- + e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn(
+ "Unrecognized reply after forced close of data channel. "
+ + "If this is acceptable, please modify Client.java accordingly. "
+ + e);
+ }
this.code = 400; // http Bad Request
}
@@ -403,8 +436,9 @@
// control connection is off, clean up
// ftp.client.disconnect();
- if (ftp.followTalk)
+ if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("delete client because server cut off control channel: "+e);
+ }
ftp.client = null;
this.content = list2html(list, path, "/".equals(path) ? false : true);
@@ -424,13 +458,15 @@
} catch (FtpExceptionUnknownForcedDataClose e) {
// Please note control channel is still live.
// in a way, this is our request fault
- Ftp.LOG.warn(
- "Unrecognized reply after forced close of data channel. "
- + "If this is acceptable, please modify Client.java accordingly. "
- + e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn(
+ "Unrecognized reply after forced close of data channel. "
+ + "If this is acceptable, please modify Client.java accordingly. "
+ + e);
+ }
this.code = 400; // http Bad Request
} catch (FtpExceptionCanNotHaveDataConnection e) {
- Ftp.LOG.warn(""+ e);
+ if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
this.code = 500; // http Iternal Server Error
}
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java Thu Jun 22 05:20:29 2006
@@ -43,7 +43,9 @@
try {
__logIt(event);
} catch (IOException e) {
- __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
+ if (__logger.isInfoEnabled()) {
+ __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
+ }
}
}
@@ -51,11 +53,14 @@
try {
__logIt(event);
} catch (IOException e) {
- __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
+ if (__logger.isInfoEnabled()) {
+ __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
+ }
}
}
private void __logIt(ProtocolCommandEvent event) throws IOException {
+ if (!__logger.isInfoEnabled()) { return; }
BufferedReader br =
new BufferedReader(new StringReader(event.getMessage()));
String line;
Modified: lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Thu Jun 22 05:20:29 2006
@@ -114,7 +114,7 @@
String userAgent = http.getUserAgent();
if ((userAgent == null) || (userAgent.length() == 0)) {
- Http.LOG.fatal("User-agent is not set!");
+ if (Http.LOG.isFatalEnabled()) { Http.LOG.fatal("User-agent is not set!"); }
} else {
reqStr.append("User-Agent: ");
reqStr.append(userAgent);
@@ -230,19 +230,21 @@
ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
while (!doneChunks) {
- Http.LOG.trace("Http: starting chunk");
+ if (Http.LOG.isTraceEnabled()) {
+ Http.LOG.trace("Http: starting chunk");
+ }
readLine(in, line, false);
String chunkLenStr;
- // LOG.trace("chunk-header: '" + line + "'");
+ // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); }
int pos= line.indexOf(";");
if (pos < 0) {
chunkLenStr= line.toString();
} else {
chunkLenStr= line.substring(0, pos);
- // LOG.trace("got chunk-ext: " + line.substring(pos+1));
+ // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); }
}
chunkLenStr= chunkLenStr.trim();
int chunkLen;
@@ -276,7 +278,7 @@
// DANGER!!! Will printed GZIPed stuff right to your
// terminal!
- // LOG.trace("read: " + new String(bytes, 0, len));
+ // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, len)); }
out.write(bytes, 0, len);
chunkBytesRead+= len;
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java Thu Jun 22 05:20:29 2006
@@ -45,7 +45,7 @@
context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null);
return context;
} catch (Exception e) {
- LOG.error(e.getMessage(), e);
+ if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); }
throw new HttpClientError(e.toString());
}
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Thu Jun 22 05:20:29 2006
@@ -135,8 +135,10 @@
Credentials ntCreds = new NTCredentials(ntlmUsername, ntlmPassword, ntlmHost, ntlmDomain);
client.getState().setCredentials(new AuthScope(ntlmHost, AuthScope.ANY_PORT), ntCreds);
- LOG.info("Added NTLM credentials for " + ntlmUsername);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Added NTLM credentials for " + ntlmUsername);
+ }
}
- LOG.info("Configured Client");
+ if (LOG.isInfoEnabled()) { LOG.info("Configured Client"); }
}
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java Thu Jun 22 05:20:29 2006
@@ -99,7 +99,9 @@
}
}
if (challenge == null) {
- LOG.trace("Authentication challenge is null");
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Authentication challenge is null");
+ }
return null;
}
@@ -110,8 +112,10 @@
if (challengeString.equals("NTLM")) {
challengeString="Basic realm=techweb";
}
-
- LOG.trace("Checking challengeString=" + challengeString);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Checking challengeString=" + challengeString);
+ }
auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
if (auth != null) return auth;
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java?rev=416346&r1=416345&r2=416346&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Thu Jun 22 05:20:29 2006
@@ -57,14 +57,17 @@
setConf(conf);
this.challenge = challenge;
- LOG.trace("BasicAuthentication challenge is " + challenge);
credentials = new ArrayList();
String username = this.conf.get("http.auth.basic." + challenge + ".user");
- LOG.trace("BasicAuthentication username=" + username);
String password = this.conf.get("http.auth.basic." + challenge + ".password");
- LOG.trace("BasicAuthentication password=" + password);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("BasicAuthentication challenge is " + challenge);
+ LOG.trace("BasicAuthentication username=" + username);
+ LOG.trace("BasicAuthentication password=" + password);
+ }
+
if (username == null) {
throw new HttpAuthenticationException("Username for " + challenge + " is null");
}
@@ -75,7 +78,9 @@
byte[] credBytes = (username + ":" + password).getBytes();
credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
- LOG.trace("Basic credentials: " + credentials);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Basic credentials: " + credentials);
+ }
}
@@ -145,7 +150,9 @@
try {
newAuth = new HttpBasicAuthentication(realm, conf);
} catch (HttpAuthenticationException hae) {
- LOG.trace("HttpBasicAuthentication failed for " + challenge);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("HttpBasicAuthentication failed for " + challenge);
+ }
}
authMap.put(realm, newAuth);
return newAuth;