You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/06/12 22:51:45 UTC
svn commit: r413742 [3/3] - in /lucene/nutch/trunk: ./ bin/ conf/
src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/clustering/
src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/
src/java/org/apache/nutch/indexer/ src/java/...
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java Mon Jun 12 13:51:40 2006
@@ -19,9 +19,10 @@
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
-import java.util.logging.Logger;
-import org.apache.hadoop.util.LogFormatter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.poi.hdf.extractor.Utils;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
@@ -40,8 +41,7 @@
*/
class ContentReaderListener implements POIFSReaderListener {
- private static final Logger LOG = LogFormatter
- .getLogger(ContentReaderListener.class.getName());
+ private static final Log LOG = LogFactory.getLog(ContentReaderListener.class);
/** Buffer holding the content of the file */
protected final transient StringBuffer buf;
@@ -65,8 +65,8 @@
if (event == null || event.getName() == null
|| !event.getName().startsWith(PPTConstants.POWERPOINT_DOCUMENT)) {
- LOG.warning("Stream not processed. It is not a PowerPoint document: : "
- + event.getName());
+ LOG.warn("Stream not processed. It is not a PowerPoint document: : "
+ + event.getName());
return;
}
@@ -108,14 +108,14 @@
containerTextBox = extractTextBoxes(containerTextBox, offset,
pptdata, offsetPD);
} else if (PPTConstants.PPT_ATOM_DRAWINGGROUP == type) {
- // LOG.finest("PPT_DRAWINGGROUP_ATOM ignored: " + type);
+ // LOG.trace("PPT_DRAWINGGROUP_ATOM ignored: " + type);
} else if (PPTConstants.PPT_ATOM_TEXTBYTE == type) {
- // LOG.finest("PPT_TEXTBYTE_ATOM ignored: " + type);
+ // LOG.trace("PPT_TEXTBYTE_ATOM ignored: " + type);
} else if (PPTConstants.PPT_ATOM_TEXTCHAR == type) {
- // LOG.finest("PPT_TEXTCHAR_ATOM ignored: " + type);
+ // LOG.trace("PPT_TEXTCHAR_ATOM ignored: " + type);
} else {
// no action
- // LOG.finest("type not handled: " + type);
+ // LOG.trace("type not handled: " + type);
}
}
@@ -158,7 +158,7 @@
}
} catch (Throwable ex) {
// because of not killing complete crawling all Throwables are catched.
- LOG.throwing(this.getClass().getName(), "processPOIFSReaderEvent", ex);
+ LOG.error("processPOIFSReaderEvent", ex);
}
}
@@ -205,7 +205,7 @@
if (currentID == PPTConstants.PPT_MASTERSLIDE) {
// Ignore Master Slide objects
- LOG.finest("Ignore master slide.");
+ LOG.trace("Ignore master slide.");
i++;
continue;
}
@@ -226,7 +226,7 @@
*/
if ((offsetPD - 20) != recordSize) {
// TODO something wrong? Probably an OLE-Object, which we ignore.
- LOG.finer("offsetPD - 20=" + (offsetPD - 20) + " recordsize="
+ LOG.debug("offsetPD - 20=" + (offsetPD - 20) + " recordsize="
+ recordSize);
} else {
for (int startPos = i + 8; startPos < offsetPD - 20
@@ -290,10 +290,10 @@
} else {
// ignored
- // LOG.finest("Ignored atom type: " + type);
+ // LOG.trace("Ignored atom type: " + type);
}
} catch (Throwable e) {
- LOG.throwing(this.getClass().getName(), "extractTextBoxes", e);
+ LOG.error("extractTextBoxes", e);
break;
}
}
@@ -302,10 +302,10 @@
/*
* Record type is ignored
*/
- // LOG.finest("Ignored record type: " + type);
+ // LOG.trace("Ignored record type: " + type);
}
} catch (Throwable ee) {
- LOG.throwing(this.getClass().getName(), "extractClientTextBoxes", ee);
+ LOG.error("extractClientTextBoxes", ee);
break;
}
}
@@ -355,8 +355,8 @@
byte value = pptdata[(int) ii + 2];
outStream.write(value);
} catch (ArrayIndexOutOfBoundsException ex) {
- LOG.finest("size=" + pptdata.length);
- LOG.throwing(this.getClass().getName(), "extractSlides", ex);
+ LOG.trace("size=" + pptdata.length);
+ LOG.error("extractSlides", ex);
}
}
@@ -401,11 +401,11 @@
/*
* Diagram records are ignored
*/
- LOG.finest("Drawing Groups are ignored.");
+ LOG.trace("Drawing Groups are ignored.");
break;
} else {
// ignored
- // LOG.finest("Unhandled atomType: " + atomType);
+ // LOG.trace("Unhandled atomType: " + atomType);
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java Mon Jun 12 13:51:40 2006
@@ -48,7 +48,7 @@
if (input.available() > 0) {
this.reader.read(input);
} else {
- LOG.warning("Input <=0 :" + input.available());
+ LOG.warn("Input <=0 :" + input.available());
}
return (this.text != null) ? text.toString() : null;
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java Mon Jun 12 13:51:40 2006
@@ -23,17 +23,18 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
-import java.util.logging.Logger;
import junit.framework.TestCase;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.hadoop.io.UTF8;
@@ -54,8 +55,7 @@
* @version 1.0
*/
public class TestMSPowerPointParser extends TestCase {
- private static final Logger LOG = LogFormatter
- .getLogger(TestMSPowerPointParser.class.getName());
+ private static final Log LOG = LogFactory.getLog(TestMSPowerPointParser.class);
private static final String CHARSET = "UTF-8";
Modified: lucene/nutch/trunk/src/plugin/parse-oo/src/java/org/apache/nutch/parse/oo/OOParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-oo/src/java/org/apache/nutch/parse/oo/OOParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-oo/src/java/org/apache/nutch/parse/oo/OOParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-oo/src/java/org/apache/nutch/parse/oo/OOParser.java Mon Jun 12 13:51:40 2006
@@ -20,14 +20,16 @@
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
-import java.util.logging.Logger;
import java.util.zip.*;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.LogUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.jaxen.*;
import org.jaxen.jdom.JDOMXPath;
@@ -42,8 +44,7 @@
* @author Andrzej Bialecki
*/
public class OOParser implements Parser {
- public static final Logger LOG =
- LogFormatter.getLogger(OOParser.class.getName());
+ public static final Log LOG = LogFactory.getLog(OOParser.class);
private Configuration conf;
@@ -84,7 +85,7 @@
}
zis.close();
} catch (Exception e) { // run time exception
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
return new ParseStatus(ParseStatus.FAILED,
"Can't be handled as OO document. " + e).getEmptyParse(conf);
}
@@ -215,4 +216,4 @@
osw.close();
*/
}
-}
\ No newline at end of file
+}
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java Mon Jun 12 13:51:40 2006
@@ -25,10 +25,13 @@
import org.pdfbox.exceptions.CryptographyException;
import org.pdfbox.exceptions.InvalidPasswordException;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.nutch.protocol.Content;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
@@ -37,12 +40,11 @@
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.OutlinkExtractor;
+import org.apache.nutch.util.LogUtil;
import java.text.SimpleDateFormat;
import java.util.Calendar;
-import java.util.logging.Logger;
-
import java.io.ByteArrayInputStream;
import java.io.IOException;
@@ -60,29 +62,9 @@
*********************************************/
public class PdfParser implements Parser {
- public static final Logger LOG =
- LogFormatter.getLogger("org.apache.nutch.parse.pdf");
+ public static final Log LOG = LogFactory.getLog("org.apache.nutch.parse.pdf");
private Configuration conf;
- public PdfParser () {
- // redirect org.apache.log4j.Logger to java's native logger, in order
- // to, at least, suppress annoying log4j warnings.
- // Note on 20040614 by Xing:
- // log4j is used by pdfbox. This snippet'd better be moved
- // to a common place shared by all parsers that use log4j.
- org.apache.log4j.Logger rootLogger =
- org.apache.log4j.Logger.getRootLogger();
-
- rootLogger.setLevel(org.apache.log4j.Level.INFO);
-
- org.apache.log4j.Appender appender = new org.apache.log4j.WriterAppender(
- new org.apache.log4j.SimpleLayout(),
- org.apache.hadoop.util.LogFormatter.getLogStream(
- this.LOG, java.util.logging.Level.INFO));
-
- rootLogger.addAppender(appender);
- }
-
public Parse getParse(Content content) {
// in memory representation of pdf file
@@ -143,8 +125,8 @@
return new ParseStatus(ParseStatus.FAILED,
"Can't decrypt document - invalid password. " + e).getEmptyParse(getConf());
} catch (Exception e) { // run time exception
- LOG.warning("General exception in PDF parser: "+e.getMessage());
- e.printStackTrace();
+ LOG.warn("General exception in PDF parser: "+e.getMessage());
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
return new ParseStatus(ParseStatus.FAILED,
"Can't be handled as pdf document. " + e).getEmptyParse(getConf());
} finally {
Modified: lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java Mon Jun 12 13:51:40 2006
@@ -19,14 +19,15 @@
// JDK imports
import java.io.ByteArrayInputStream;
import java.net.MalformedURLException;
-import java.util.logging.Logger;
import java.util.List;
import java.util.Vector;
-import java.util.logging.Level;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
// Hadoop imports
import org.apache.hadoop.io.UTF8;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
// Nutch imports
@@ -42,6 +43,7 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.util.LogUtil;
import org.apache.nutch.util.NutchConfiguration;
// RSS parsing imports
@@ -60,37 +62,11 @@
* </p>
*/
public class RSSParser implements Parser {
- public static final Logger LOG = LogFormatter
- .getLogger("org.apache.nutch.parse.rss");
+ public static final Log LOG = LogFactory.getLog("org.apache.nutch.parse.rss");
private Configuration conf;
/**
* <p>
- * Default Constructor
- * </p>
- */
- public RSSParser() {
-
- // redirect org.apache.log4j.Logger to java's native logger, in order
- // to, at least, suppress annoying log4j warnings.
- // Note on 20040614 by Xing:
- // log4j is used by pdfbox. This snippet'd better be moved
- // to a common place shared by all parsers that use log4j.
- org.apache.log4j.Logger rootLogger = org.apache.log4j.Logger
- .getRootLogger();
-
- rootLogger.setLevel(org.apache.log4j.Level.INFO);
-
- org.apache.log4j.Appender appender = new org.apache.log4j.WriterAppender(
- new org.apache.log4j.SimpleLayout(),
- org.apache.hadoop.util.LogFormatter.getLogStream(this.LOG,
- java.util.logging.Level.INFO));
-
- rootLogger.addAppender(appender);
- }
-
- /**
- * <p>
* Implementation method, parses the RSS content, and then returns a
* {@link ParseImpl}.
* </p>
@@ -119,8 +95,8 @@
theRSSChannels = ((FeedParserListenerImpl) listener).getChannels();
} catch (Exception e) { // run time exception
- e.printStackTrace();
- LOG.fine("nutch:parse-rss:RSSParser Exception: " + e.getMessage());
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ LOG.trace("nutch:parse-rss:RSSParser Exception: " + e.getMessage());
return new ParseStatus(ParseStatus.FAILED,
"Can't be handled as rss document. " + e).getEmptyParse(getConf());
}
@@ -157,7 +133,7 @@
LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
+ r.getLink()
+ ": Attempting to continue processing outlinks");
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
continue;
}
}
@@ -187,7 +163,7 @@
LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
+ whichLink
+ ": Attempting to continue processing outlinks");
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
continue;
}
}
@@ -196,17 +172,17 @@
}
- LOG.fine("nutch:parse-rss:getParse:indexText=" + indexText);
- LOG.fine("nutch:parse-rss:getParse:contentTitle=" + contentTitle);
+ LOG.trace("nutch:parse-rss:getParse:indexText=" + indexText);
+ LOG.trace("nutch:parse-rss:getParse:contentTitle=" + contentTitle);
} else {
- LOG.fine("nutch:parse-rss:Error:getParse: No RSS Channels recorded!");
+ LOG.trace("nutch:parse-rss:Error:getParse: No RSS Channels recorded!");
}
// format the outlinks
Outlink[] outlinks = (Outlink[]) theOutlinks.toArray(new Outlink[theOutlinks.size()]);
- LOG.fine("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks");
+ LOG.trace("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks");
// LOG.info("Outlinks: "+outlinks);
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
@@ -224,7 +200,7 @@
}
public static void main(String[] args) throws Exception {
- LOG.setLevel(Level.FINE);
+ //LOG.setLevel(Level.FINE);
String url = args[0];
Configuration conf = NutchConfiguration.create();
RSSParser parser = new RSSParser();
Modified: lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java Mon Jun 12 13:51:40 2006
@@ -19,16 +19,18 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*;
-import java.util.logging.Logger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.LogUtil;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
import com.anotherbigidea.flash.interfaces.*;
import com.anotherbigidea.flash.readers.*;
@@ -44,7 +46,7 @@
* @author Andrzej Bialecki
*/
public class SWFParser implements Parser {
- public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.parse.swf");
+ public static final Log LOG = LogFactory.getLog("org.apache.nutch.parse.swf");
private Configuration conf = null;
@@ -99,7 +101,7 @@
outlinks.add(olinks[i]);
}
} catch (Exception e) { // run time exception
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getErrorStream(LOG));
return new ParseStatus(ParseStatus.FAILED, "Can't be handled as SWF document. " + e).getEmptyParse(conf);
} finally {}
if (text == null) text = "";
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Mon Jun 12 13:51:40 2006
@@ -19,10 +19,12 @@
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.Properties;
-import java.util.logging.Logger;
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.Outlink;
@@ -32,7 +34,6 @@
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
/**
@@ -43,8 +44,7 @@
*/
public class ZipParser implements Parser {
- private static final Logger LOG = LogFormatter.getLogger(ZipParser.class
- .getName());
+ private static final Log LOG = LogFactory.getLog(ZipParser.class);
private Configuration conf;
/** Creates a new instance of ZipParser */
@@ -62,7 +62,7 @@
try {
final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
final int len = Integer.parseInt(contentLen);
- System.out.println("ziplen: " + len);
+ LOG.debug("ziplen: " + len);
final byte[] contentInBytes = content.getContent();
final ByteArrayInputStream bainput = new ByteArrayInputStream(
contentInBytes);
@@ -101,7 +101,7 @@
content.getMetadata());
parseData.setConf(this.conf);
- LOG.finest("Zip file parsed sucessfully !!");
+ LOG.trace("Zip file parsed sucessfully !!");
return new ParseImpl(resultText, parseData);
}
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java Mon Jun 12 13:51:40 2006
@@ -20,11 +20,17 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
-import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.net.URL;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+
// Nutch imports
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
@@ -34,8 +40,6 @@
import org.apache.nutch.parse.ParseException;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.protocol.Content;
-import org.apache.hadoop.util.LogFormatter;
-import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.mime.MimeTypes;
@@ -48,7 +52,7 @@
/** Get the MimeTypes resolver instance. */
private MimeTypes MIME;
- public static final Logger LOG = LogFormatter.getLogger(ZipTextExtractor.class.getName());
+ public static final Log LOG = LogFactory.getLog(ZipTextExtractor.class);
private Configuration conf;
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Mon Jun 12 13:51:40 2006
@@ -16,6 +16,8 @@
package org.apache.nutch.protocol.file;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.hadoop.io.UTF8;
@@ -23,7 +25,6 @@
import org.apache.nutch.net.protocols.HttpDateFormat;
import org.apache.nutch.net.protocols.Response;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;
@@ -31,9 +32,6 @@
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
import java.net.URL;
/************************************
@@ -46,8 +44,7 @@
***********************************/
public class File implements Protocol {
- public static final Logger LOG =
- LogFormatter.getLogger("org.apache.nutch.protocol.file.File");
+ public static final Log LOG = LogFactory.getLog(File.class);
static final int MAX_REDIRECTS = 5;
@@ -91,8 +88,9 @@
throw new FileException("Too many redirects: " + url);
u = new URL(response.getHeader("Location"));
redirects++;
- if (LOG.isLoggable(Level.FINE))
- LOG.fine("redirect to " + u);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("redirect to " + u);
+ }
} else { // convert to exception
throw new FileError(code);
@@ -141,7 +139,7 @@
file.setMaxContentLength(maxContentLength);
// set log level
- LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
+ //LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
Content content = file.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Mon Jun 12 13:51:40 2006
@@ -20,7 +20,6 @@
import java.net.URL;
import java.util.Date;
import java.util.TreeMap;
-import java.util.logging.Level;
import java.io.IOException;
// Nutch imports
@@ -97,11 +96,13 @@
if (!"file".equals(url.getProtocol()))
throw new FileException("Not a file url:" + url);
- if (File.LOG.isLoggable(Level.FINE))
- File.LOG.fine("fetching " + url);
+ if (File.LOG.isTraceEnabled()) {
+ File.LOG.trace("fetching " + url);
+ }
- if (url.getPath() != url.getFile())
- File.LOG.warning("url.getPath() != url.getFile(): " + url);
+ if (url.getPath() != url.getFile()) {
+ File.LOG.warn("url.getPath() != url.getFile(): " + url);
+ }
String path = "".equals(url.getPath()) ? "/" : url.getPath();
@@ -179,7 +180,7 @@
offset += n;
}
if (offset < len) // keep whatever already have, but issue a warning
- File.LOG.warning("not enough bytes read from file: "+f.getPath());
+ File.LOG.warn("not enough bytes read from file: "+f.getPath());
is.close();
// set headers
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Mon Jun 12 13:51:40 2006
@@ -16,6 +16,8 @@
package org.apache.nutch.protocol.ftp;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.commons.net.ftp.FTPFileEntryParser;
@@ -24,7 +26,6 @@
import org.apache.nutch.net.protocols.HttpDateFormat;
import org.apache.nutch.net.protocols.Response;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;
@@ -32,9 +33,6 @@
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
import java.net.URL;
import java.io.IOException;
@@ -49,8 +47,7 @@
***********************************/
public class Ftp implements Protocol {
- public static final Logger LOG =
- LogFormatter.getLogger("org.apache.nutch.protocol.ftp.Ftp");
+ public static final Log LOG = LogFactory.getLog(Ftp.class);
static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
@@ -135,9 +132,9 @@
throw new FtpException("Too many redirects: " + url);
u = new URL(response.getHeader("Location"));
redirects++;
- if (LOG.isLoggable(Level.FINE))
- LOG.fine("redirect to " + u);
-
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("redirect to " + u);
+ }
} else { // convert to exception
throw new FtpError(code);
}
@@ -208,7 +205,7 @@
ftp.setMaxContentLength(maxContentLength);
// set log level
- LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
+ //LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
Content content = ftp.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Mon Jun 12 13:51:40 2006
@@ -36,7 +36,6 @@
import java.util.List;
import java.util.LinkedList;
-import java.util.logging.Level;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -94,7 +93,7 @@
throw new FtpException("Not a ftp url:" + url);
if (url.getPath() != url.getFile())
- Ftp.LOG.warning("url.getPath() != url.getFile(): " + url);
+ Ftp.LOG.warn("url.getPath() != url.getFile(): " + url);
String path = "".equals(url.getPath()) ? "/" : url.getPath();
@@ -103,8 +102,9 @@
if (ftp.followTalk) {
Ftp.LOG.info("fetching "+url);
} else {
- if (Ftp.LOG.isLoggable(Level.FINE))
- Ftp.LOG.fine("fetching "+url);
+ if (Ftp.LOG.isTraceEnabled()) {
+ Ftp.LOG.trace("fetching "+url);
+ }
}
InetAddress addr = InetAddress.getByName(url.getHost());
@@ -160,7 +160,7 @@
ftp.client.connect(addr);
if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
ftp.client.disconnect();
- Ftp.LOG.warning("ftp.client.connect() failed: "
+ Ftp.LOG.warn("ftp.client.connect() failed: "
+ addr + " " + ftp.client.getReplyString());
this.code = 500; // http Internal Server Error
return;
@@ -176,7 +176,7 @@
// but throw exception, which then will be handled by caller
// (not dealt with here at all) .
ftp.client.disconnect();
- Ftp.LOG.warning("ftp.client.login() failed: "+addr);
+ Ftp.LOG.warn("ftp.client.login() failed: "+addr);
this.code = 401; // http Unauthorized
return;
}
@@ -185,7 +185,7 @@
if (!ftp.client.setFileType(FTP.BINARY_FILE_TYPE)) {
ftp.client.logout();
ftp.client.disconnect();
- Ftp.LOG.warning("ftp.client.setFileType() failed: "+addr);
+ Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
this.code = 500; // http Internal Server Error
return;
}
@@ -203,18 +203,18 @@
ftp.parser = (new DefaultFTPFileEntryParserFactory())
.createFileEntryParser(parserKey);
} catch (FtpExceptionBadSystResponse e) {
- Ftp.LOG.warning("ftp.client.getSystemName() failed: "+addr+" "+e);
+ Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
ftp.parser = null;
} catch (ParserInitializationException e) {
// ParserInitializationException is RuntimeException defined in
// org.apache.commons.net.ftp.parser.ParserInitializationException
- Ftp.LOG.warning("createFileEntryParser() failed. "+addr+" "+e);
+ Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
ftp.parser = null;
} finally {
if (ftp.parser == null) {
// do not log as severe, otherwise
// FetcherThread/RequestScheduler will abort
- Ftp.LOG.warning("ftp.parser is null: "+addr);
+ Ftp.LOG.warn("ftp.parser is null: "+addr);
ftp.client.logout();
ftp.client.disconnect();
this.code = 500; // http Internal Server Error
@@ -254,10 +254,10 @@
}
} catch (Exception e) {
- ftp.LOG.warning(""+e);
+ ftp.LOG.warn(""+e);
StackTraceElement stes[] = e.getStackTrace();
for (int i=0; i<stes.length; i++) {
- ftp.LOG.warning(" "+stes[i].toString());
+ ftp.LOG.warn(" "+stes[i].toString());
}
// for any un-foreseen exception (run time exception or not),
// do ultimate clean and leave ftp.client for garbage collection
@@ -321,7 +321,7 @@
// thrown by retrieveList() (not retrieveFile()) above,
if (os == null) { // indicating throwing by retrieveList()
//throw new FtpException("fail to get attibutes: "+path);
- Ftp.LOG.warning(
+ Ftp.LOG.warn(
"Please try larger maxContentLength for ftp.client.retrieveList(). "
+ e);
// in a way, this is our request fault
@@ -360,7 +360,7 @@
} catch (FtpExceptionUnknownForcedDataClose e) {
// Please note control channel is still live.
// in a way, this is our request fault
- Ftp.LOG.warning(
+ Ftp.LOG.warn(
"Unrecognized reply after forced close of data channel. "
+ "If this is acceptable, please modify Client.java accordingly. "
+ e);
@@ -424,13 +424,13 @@
} catch (FtpExceptionUnknownForcedDataClose e) {
// Please note control channel is still live.
// in a way, this is our request fault
- Ftp.LOG.warning(
+ Ftp.LOG.warn(
"Unrecognized reply after forced close of data channel. "
+ "If this is acceptable, please modify Client.java accordingly. "
+ e);
this.code = 400; // http Bad Request
} catch (FtpExceptionCanNotHaveDataConnection e) {
- Ftp.LOG.warning(""+ e);
+ Ftp.LOG.warn(""+ e);
this.code = 500; // http Iternal Server Error
}
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java Mon Jun 12 13:51:40 2006
@@ -20,7 +20,8 @@
import java.io.StringReader;
import java.io.IOException;
-import java.util.logging.Logger;
+import org.apache.commons.logging.Log;
+
import org.apache.commons.net.ProtocolCommandEvent;
import org.apache.commons.net.ProtocolCommandListener;
@@ -31,9 +32,9 @@
***/
public class PrintCommandListener implements ProtocolCommandListener
{
- private Logger __logger;
+ private Log __logger;
- public PrintCommandListener(Logger logger)
+ public PrintCommandListener(Log logger)
{
__logger = logger;
}
Modified: lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java Mon Jun 12 13:51:40 2006
@@ -18,23 +18,25 @@
// JDK imports
import java.io.IOException;
import java.net.URL;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
// Nutch imports
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
-import org.apache.hadoop.util.LogFormatter;
-import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
public class Http extends HttpBase {
- public static final Logger LOG =
- LogFormatter.getLogger(Http.class.getName());
+ public static final Log LOG = LogFactory.getLog(Http.class);
public Http() {
@@ -43,11 +45,11 @@
public void setConf(Configuration conf) {
super.setConf(conf);
- Level logLevel = Level.WARNING;
- if (conf.getBoolean("http.verbose", false)) {
- logLevel = Level.FINE;
- }
- LOG.setLevel(logLevel);
+// Level logLevel = Level.WARNING;
+// if (conf.getBoolean("http.verbose", false)) {
+// logLevel = Level.FINE;
+// }
+// LOG.setLevel(logLevel);
}
public static void main(String[] args) throws Exception {
Modified: lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Mon Jun 12 13:51:40 2006
@@ -26,7 +26,6 @@
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.URL;
-import java.util.logging.Level;
// Nutch imports
import org.apache.nutch.crawl.CrawlDatum;
@@ -35,6 +34,7 @@
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
import org.apache.nutch.protocol.http.api.HttpException;
+import org.apache.nutch.util.LogUtil;
/** An HTTP response. */
@@ -60,8 +60,9 @@
if (!"http".equals(url.getProtocol()))
throw new HttpException("Not an HTTP url:" + url);
- if (Http.LOG.isLoggable(Level.FINE))
- Http.LOG.fine("fetching " + url);
+ if (Http.LOG.isTraceEnabled()) {
+ Http.LOG.trace("fetching " + url);
+ }
String path = "".equals(url.getFile()) ? "/" : url.getFile();
@@ -113,7 +114,7 @@
String userAgent = http.getUserAgent();
if ((userAgent == null) || (userAgent.length() == 0)) {
- Http.LOG.severe("User-agent is not set!");
+ Http.LOG.fatal("User-agent is not set!");
} else {
reqStr.append("User-Agent: ");
reqStr.append(userAgent);
@@ -148,8 +149,9 @@
if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
content = http.processGzipEncoded(content, url);
} else {
- if (Http.LOG.isLoggable(Level.FINE))
- Http.LOG.fine("fetched " + content.length + " bytes from " + url);
+ if (Http.LOG.isTraceEnabled()) {
+ Http.LOG.trace("fetched " + content.length + " bytes from " + url);
+ }
}
} finally {
@@ -228,19 +230,19 @@
ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
while (!doneChunks) {
- Http.LOG.fine("Http: starting chunk");
+ Http.LOG.trace("Http: starting chunk");
readLine(in, line, false);
String chunkLenStr;
- // LOG.fine("chunk-header: '" + line + "'");
+ // LOG.trace("chunk-header: '" + line + "'");
int pos= line.indexOf(";");
if (pos < 0) {
chunkLenStr= line.toString();
} else {
chunkLenStr= line.substring(0, pos);
- // LOG.fine("got chunk-ext: " + line.substring(pos+1));
+ // LOG.trace("got chunk-ext: " + line.substring(pos+1));
}
chunkLenStr= chunkLenStr.trim();
int chunkLen;
@@ -274,7 +276,7 @@
// DANGER!!! Will printed GZIPed stuff right to your
// terminal!
- // LOG.fine("read: " + new String(bytes, 0, len));
+ // LOG.trace("read: " + new String(bytes, 0, len));
out.write(bytes, 0, len);
chunkBytesRead+= len;
@@ -370,7 +372,7 @@
processHeaderLine(line);
} catch (Exception e) {
// fixme:
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getErrorStream(Http.LOG));
}
return;
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Mon Jun 12 13:51:40 2006
@@ -19,8 +19,10 @@
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
// HTTP Client imports
import org.apache.commons.httpclient.Credentials;
@@ -38,14 +40,13 @@
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
public class Http extends HttpBase {
- public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.net.Http");
+ public static final Log LOG = LogFactory.getLog(Http.class);
private static MultiThreadedHttpConnectionManager connectionManager =
new MultiThreadedHttpConnectionManager();
@@ -76,13 +77,13 @@
this.ntlmPassword = conf.get("http.auth.ntlm.password", "");
this.ntlmDomain = conf.get("http.auth.ntlm.domain", "");
this.ntlmHost = conf.get("http.auth.ntlm.host", "");
- Level logLevel = Level.WARNING;
- if (conf.getBoolean("http.verbose", false)) {
- logLevel = Level.FINE;
- }
- LOG.setLevel(logLevel);
- Logger.getLogger("org.apache.commons.httpclient.HttpMethodDirector")
- .setLevel(logLevel);
+ //Level logLevel = Level.WARNING;
+ //if (conf.getBoolean("http.verbose", false)) {
+ // logLevel = Level.FINE;
+ //}
+ //LOG.setLevel(logLevel);
+ //Logger.getLogger("org.apache.commons.httpclient.HttpMethodDirector")
+ // .setLevel(logLevel);
configureClient();
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java Mon Jun 12 13:51:40 2006
@@ -3,20 +3,25 @@
package org.apache.nutch.protocol.httpclient;
+// JDK imports
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.apache.nutch.metadata.Metadata;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.util.LogFormatter;
+// Hadoop imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configurable;
+// Nutch imports
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.util.LogUtil;
+
/**
* Provides the Http protocol implementation
@@ -38,8 +43,7 @@
*/
public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
- public static final Logger LOG =
- LogFormatter.getLogger(HttpAuthenticationFactory.class.getName());
+ public static final Log LOG = LogFactory.getLog(HttpAuthenticationFactory.class);
private static Map auths = new TreeMap();
@@ -57,11 +61,11 @@
public void setConf(Configuration conf) {
this.conf = conf;
- if (conf.getBoolean("http.auth.verbose", false)) {
- LOG.setLevel(Level.FINE);
- } else {
- LOG.setLevel(Level.WARNING);
- }
+ //if (conf.getBoolean("http.auth.verbose", false)) {
+ // LOG.setLevel(Level.FINE);
+ //} else {
+ // LOG.setLevel(Level.WARNING);
+ //}
}
public Configuration getConf() {
@@ -95,7 +99,7 @@
}
}
if (challenge == null) {
- LOG.fine("Authentication challenge is null");
+ LOG.trace("Authentication challenge is null");
return null;
}
@@ -107,14 +111,14 @@
challengeString="Basic realm=techweb";
}
- LOG.fine("Checking challengeString=" + challengeString);
+ LOG.trace("Checking challengeString=" + challengeString);
auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
if (auth != null) return auth;
//TODO Add additional Authentication lookups here
}
} catch (Exception e) {
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getErrorStream(LOG));
}
return null;
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Mon Jun 12 13:51:40 2006
@@ -3,21 +3,26 @@
package org.apache.nutch.protocol.httpclient;
+// JDK imports
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+// Commons Codec imports
import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.util.LogFormatter;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+// Hadoop imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configurable;
+
/**
* Implementation of RFC 2617 Basic Authentication. Usernames and passwords are stored
* in standard Nutch configuration files using the following properties:
@@ -28,8 +33,7 @@
*/
public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
- public static final Logger LOG =
- LogFormatter.getLogger(HttpBasicAuthentication.class.getName());
+ public static final Log LOG = LogFactory.getLog(HttpBasicAuthentication.class);
private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
@@ -53,13 +57,13 @@
setConf(conf);
this.challenge = challenge;
- LOG.fine("BasicAuthentication challenge is " + challenge);
+ LOG.trace("BasicAuthentication challenge is " + challenge);
credentials = new ArrayList();
String username = this.conf.get("http.auth.basic." + challenge + ".user");
- LOG.fine("BasicAuthentication username=" + username);
+ LOG.trace("BasicAuthentication username=" + username);
String password = this.conf.get("http.auth.basic." + challenge + ".password");
- LOG.fine("BasicAuthentication password=" + password);
+ LOG.trace("BasicAuthentication password=" + password);
if (username == null) {
throw new HttpAuthenticationException("Username for " + challenge + " is null");
@@ -71,7 +75,7 @@
byte[] credBytes = (username + ":" + password).getBytes();
credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
- LOG.fine("Basic credentials: " + credentials);
+ LOG.trace("Basic credentials: " + credentials);
}
@@ -81,11 +85,11 @@
public void setConf(Configuration conf) {
this.conf = conf;
- if (conf.getBoolean("http.auth.verbose", false)) {
- LOG.setLevel(Level.FINE);
- } else {
- LOG.setLevel(Level.WARNING);
- }
+ //if (conf.getBoolean("http.auth.verbose", false)) {
+ // LOG.setLevel(Level.FINE);
+ //} else {
+ // LOG.setLevel(Level.WARNING);
+ //}
}
public Configuration getConf() {
@@ -141,7 +145,7 @@
try {
newAuth = new HttpBasicAuthentication(realm, conf);
} catch (HttpAuthenticationException hae) {
- LOG.fine("HttpBasicAuthentication failed for " + challenge);
+ LOG.trace("HttpBasicAuthentication failed for " + challenge);
}
authMap.put(realm, newAuth);
return newAuth;
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Mon Jun 12 13:51:40 2006
@@ -22,6 +22,10 @@
import java.net.URL;
import java.util.Date;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
// HTTP Client imports
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpVersion;
@@ -34,6 +38,7 @@
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.http.api.HttpBase;
+import org.apache.nutch.util.LogUtil;
/**
@@ -41,6 +46,8 @@
*/
public class HttpResponse implements Response {
+ public final static Log LOG = LogFactory.getLog(HttpResponse.class);
+
private URL url;
private String orig;
@@ -121,7 +128,7 @@
}
}
} catch (org.apache.commons.httpclient.ProtocolException pe) {
- pe.printStackTrace();
+ pe.printStackTrace(LogUtil.getErrorStream(LOG));
get.releaseConnection();
throw new IOException(pe.toString());
} finally {
Modified: lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java Mon Jun 12 13:51:40 2006
@@ -21,9 +21,11 @@
import org.apache.nutch.searcher.QueryFilter;
import org.apache.nutch.searcher.QueryException;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.hadoop.conf.Configuration;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.RangeQuery;
@@ -32,7 +34,6 @@
import java.util.regex.Pattern;
import java.util.regex.Matcher;
-import java.util.logging.Logger;
/**
* Handles "date:" query clauses, causing them to search the field "date"
@@ -42,8 +43,7 @@
*/
public class DateQueryFilter implements QueryFilter {
- public static final Logger LOG
- = LogFormatter.getLogger(DateQueryFilter.class.getName());
+ public static final Log LOG = LogFactory.getLog(DateQueryFilter.class);
private static final String FIELD_NAME = "date";
Modified: lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java Mon Jun 12 13:51:40 2006
@@ -18,6 +18,10 @@
import java.util.List;
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.UTF8;
import org.apache.lucene.document.Document;
@@ -30,6 +34,7 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.ScoringFilterException;
+import org.apache.nutch.util.LogUtil;
/**
* This plugin implements a variant of an Online Page Importance Computation
@@ -42,6 +47,9 @@
* @author Andrzej Bialecki
*/
public class OPICScoringFilter implements ScoringFilter {
+
+ private final static Log LOG = LogFactory.getLog(OPICScoringFilter.class);
+
private Configuration conf;
private float scoreInjected;
private float scorePower;
@@ -96,7 +104,7 @@
try {
score = Float.parseFloat(scoreString);
} catch (Exception e) {
- e.printStackTrace();
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
}
}
if (countFiltered) {
Modified: lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java (original)
+++ lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java Mon Jun 12 13:51:40 2006
@@ -25,11 +25,14 @@
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
-import java.util.logging.Logger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.nutch.util.DomUtil;
+import org.apache.nutch.util.LogUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.xerces.dom.DocumentImpl;
import org.w3c.dom.Document;
@@ -40,8 +43,7 @@
public static final String DEFAULT_FILE_NAME = "subcollections.xml";
- static final Logger LOG = org.apache.hadoop.util.LogFormatter.getLogger(CollectionManager.class
- .getName());
+ static final Log LOG = LogFactory.getLog(CollectionManager.class);
transient Map collectionMap = new HashMap();
@@ -70,8 +72,8 @@
getConf().get("subcollections.config", DEFAULT_FILE_NAME));
parse(input);
} catch (Exception e) {
- LOG.info("Error occured:" + e);
- e.printStackTrace(System.out);
+ LOG.warn("Error occured:" + e);
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
}
}
@@ -168,7 +170,7 @@
collections += " " + subCol.name;
}
}
- LOG.fine("subcollections:" + collections);
+ LOG.trace("subcollections:" + collections);
return collections;
}
Modified: lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java Mon Jun 12 13:51:40 2006
@@ -18,10 +18,12 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.UTF8;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.nutch.parse.Parse;
import org.apache.nutch.util.NutchConfiguration;
@@ -32,7 +34,6 @@
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
-import java.util.logging.Logger;
public class SubcollectionIndexingFilter extends Configured implements IndexingFilter {
@@ -52,8 +53,7 @@
/**
* Logger
*/
- public static final Logger LOG = LogFormatter
- .getLogger(SubcollectionIndexingFilter.class.getName());
+ public static final Log LOG = LogFactory.getLog(SubcollectionIndexingFilter.class);
/**
* "Mark" document to be a part of subcollection
Modified: lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java Mon Jun 12 13:51:40 2006
@@ -18,8 +18,10 @@
package org.apache.nutch.urlfilter.prefix;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.net.*;
import org.apache.nutch.util.PrefixStringMatcher;
@@ -36,7 +38,6 @@
import java.util.List;
import java.util.ArrayList;
-import java.util.logging.Logger;
/**
* Filters URLs based on a file of URL prefixes. The file is named by
@@ -48,8 +49,7 @@
*/
public class PrefixURLFilter implements URLFilter {
- private static final Logger LOG =
- LogFormatter.getLogger(PrefixURLFilter.class.getName());
+ private static final Log LOG = LogFactory.getLog(PrefixURLFilter.class);
// read in attribute "file" of this plugin.
private static String attributeFile = null;
@@ -134,7 +134,7 @@
LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+ " as " + attributeFile);
} else {
- // LOG.warning("Attribute \"file\" is not defined in plugin.xml for
+ // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
// plugin "+pluginName);
}
@@ -150,7 +150,7 @@
try {
trie = readConfigurationFile(reader);
} catch (IOException e) {
- LOG.severe(e.getMessage());
+ LOG.fatal(e.getMessage());
// TODO mb@media-style.com: throw Exception? Because broken api.
throw new RuntimeException(e.getMessage(), e);
}
Modified: lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=413742&r1=413741&r2=413742&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java Mon Jun 12 13:51:40 2006
@@ -17,7 +17,6 @@
package org.apache.nutch.urlfilter.suffix;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.net.*;
import org.apache.nutch.util.NutchConfiguration;
@@ -27,6 +26,9 @@
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginRepository;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import java.io.Reader;
import java.io.FileReader;
import java.io.BufferedReader;
@@ -35,7 +37,6 @@
import java.util.List;
import java.util.ArrayList;
-import java.util.logging.Logger;
/**
* Filters URLs based on a file of URL suffixes. The file is named by
@@ -118,7 +119,7 @@
*/
public class SuffixURLFilter implements URLFilter {
- private static final Logger LOG = LogFormatter.getLogger(SuffixURLFilter.class.getName());
+ private static final Log LOG = LogFactory.getLog(SuffixURLFilter.class);
// read in attribute "file" of this plugin.
private String attributeFile = null;
@@ -158,7 +159,7 @@
// handle missing config file
if (reader == null) {
- LOG.warning("Missing urlfilter.suffix.file, all URLs will be rejected!");
+ LOG.warn("Missing urlfilter.suffix.file, all URLs will be rejected!");
suffixes = new SuffixStringMatcher(new String[0]);
modeAccept = false;
ignoreCase = false;
@@ -241,7 +242,7 @@
if (attributeFile != null) {
LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile);
} else {
- // LOG.warning("Attribute \"file\" is not defined in plugin.xml for
+ // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
// plugin "+pluginName);
}
@@ -253,7 +254,7 @@
try {
readConfigurationFile(reader);
} catch (IOException e) {
- LOG.severe(e.getMessage());
+ LOG.fatal(e.getMessage());
throw new RuntimeException(e.getMessage(), e);
}
}
@@ -277,4 +278,4 @@
public void setIgnoreCase(boolean ignoreCase) {
this.ignoreCase = ignoreCase;
}
-}
\ No newline at end of file
+}
Added: lucene/nutch/trunk/src/test/log4j.properties
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/log4j.properties?rev=413742&view=auto
==============================================================================
--- lucene/nutch/trunk/src/test/log4j.properties (added)
+++ lucene/nutch/trunk/src/test/log4j.properties Mon Jun 12 13:51:40 2006
@@ -0,0 +1,7 @@
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=info,stdout
+log4j.threshhold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
Propchange: lucene/nutch/trunk/src/test/log4j.properties
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/nutch/trunk/src/web/log4j.properties
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/web/log4j.properties?rev=413742&view=auto
==============================================================================
--- lucene/nutch/trunk/src/web/log4j.properties (added)
+++ lucene/nutch/trunk/src/web/log4j.properties Mon Jun 12 13:51:40 2006
@@ -0,0 +1,7 @@
+# log4j configuration used by the front-end container
+
+log4j.rootLogger=info,stdout
+log4j.threshhold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n
Propchange: lucene/nutch/trunk/src/web/log4j.properties
------------------------------------------------------------------------------
svn:eol-style = native