You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2009/03/10 08:07:23 UTC

svn commit: r752000 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/util/DomUtil.java src/plugin/build.xml src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java

Author: siren
Date: Tue Mar 10 07:07:22 2009
New Revision: 752000

URL: http://svn.apache.org/viewvc?rev=752000&view=rev
Log:
NUTCH-715 - Subcollection plugin doesn't work with default subcollections.xml file. Contributed by Dmitry Lihachev

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
    lucene/nutch/trunk/src/plugin/build.xml
    lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Mar 10 07:07:22 2009
@@ -378,6 +378,9 @@
 
 142. NUTCH-684 - Dedup support for Solr. (dogacan)
 
+143. NUTCH-715 - Subcollection plugin doesn't work with default
+     subcollections.xml file (Dmitry Lihachev via siren)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java Tue Mar 10 07:07:22 2009
@@ -60,7 +60,11 @@
       input = new InputSource(is);
       input.setEncoding("UTF-8");
       parser.parse(input);
-      element = (Element) parser.getDocument().getChildNodes().item(0);
+      int i = 0;
+      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+       i++;
+      } 
+      element = (Element)parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
       e.printStackTrace(LogUtil.getWarnStream(LOG));
     } catch (SAXException e) {

Modified: lucene/nutch/trunk/src/plugin/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Tue Mar 10 07:07:22 2009
@@ -112,6 +112,7 @@
      <ant dir="parse-swf" target="test"/>
      <ant dir="parse-zip" target="test"/>
      <ant dir="query-url" target="test"/>
+     <ant dir="subcollection" target="test"/>
      <ant dir="urlfilter-automaton" target="test"/>
      <ant dir="urlfilter-domain" target="test" />
      <ant dir="urlfilter-regex" target="test"/>

Modified: lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java (original)
+++ lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java Tue Mar 10 07:07:22 2009
@@ -49,6 +49,7 @@
   public void testInput(){
     StringBuffer xml=new StringBuffer();
     xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+    xml.append("<!-- just a comment -->");
     xml.append("<subcollections>");
     xml.append("<subcollection>");
     xml.append("<name>nutch collection</name>");