You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/03/24 00:21:05 UTC
svn commit: r388293 -
/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
Author: jerome
Date: Thu Mar 23 15:21:03 2006
New Revision: 388293
URL: http://svn.apache.org/viewcvs?rev=388293&view=rev
Log:
Set the configuration of the parser used in the main method to fix NPEs
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=388293&r1=388292&r2=388293&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Thu Mar 23 15:21:03 2006
@@ -269,9 +269,11 @@
byte[] bytes = new byte[(int)file.length()];
DataInputStream in = new DataInputStream(new FileInputStream(file));
in.readFully(bytes);
- Parse parse = new HtmlParser().getParse(
- new Content(url, url, bytes, "text/html", new Metadata(),
- NutchConfiguration.create()));
+ Configuration conf = NutchConfiguration.create();
+ HtmlParser parser = new HtmlParser();
+ parser.setConf(conf);
+ Parse parse = parser.getParse(
+ new Content(url, url, bytes, "text/html", new Metadata(), conf));
System.out.println("data: "+parse.getData());
System.out.println("text: "+parse.getText());