You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/03/24 00:21:05 UTC

svn commit: r388293 - /lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java

Author: jerome
Date: Thu Mar 23 15:21:03 2006
New Revision: 388293

URL: http://svn.apache.org/viewcvs?rev=388293&view=rev
Log:
Set the configuration of the parser used in the main method to fix NPEs

Modified:
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java

Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=388293&r1=388292&r2=388293&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Thu Mar 23 15:21:03 2006
@@ -269,9 +269,11 @@
     byte[] bytes = new byte[(int)file.length()];
     DataInputStream in = new DataInputStream(new FileInputStream(file));
     in.readFully(bytes);
-    Parse parse = new HtmlParser().getParse(
-            new Content(url, url, bytes, "text/html", new Metadata(),
-                        NutchConfiguration.create()));
+    Configuration conf = NutchConfiguration.create();
+    HtmlParser parser = new HtmlParser();
+    parser.setConf(conf);
+    Parse parse = parser.getParse(
+            new Content(url, url, bytes, "text/html", new Metadata(), conf));
     System.out.println("data: "+parse.getData());
 
     System.out.println("text: "+parse.getText());