You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2009/03/10 08:07:23 UTC
svn commit: r752000 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/util/DomUtil.java src/plugin/build.xml
src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
Author: siren
Date: Tue Mar 10 07:07:22 2009
New Revision: 752000
URL: http://svn.apache.org/viewvc?rev=752000&view=rev
Log:
NUTCH-715 - Subcollection plugin doesn't work with default subcollections.xml file. Contributed by Dmitry Lihachev
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
lucene/nutch/trunk/src/plugin/build.xml
lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Mar 10 07:07:22 2009
@@ -378,6 +378,9 @@
142. NUTCH-684 - Dedup support for Solr. (dogacan)
+143. NUTCH-715 - Subcollection plugin doesn't work with default
+ subcollections.xml file (Dmitry Lihachev via siren)
+
Release 0.9 - 2007-04-02
1. Changed log4j confiquration to log to stdout on commandline
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java Tue Mar 10 07:07:22 2009
@@ -60,7 +60,11 @@
input = new InputSource(is);
input.setEncoding("UTF-8");
parser.parse(input);
- element = (Element) parser.getDocument().getChildNodes().item(0);
+ int i = 0;
+ while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+ i++;
+ }
+ element = (Element)parser.getDocument().getChildNodes().item(i);
} catch (FileNotFoundException e) {
e.printStackTrace(LogUtil.getWarnStream(LOG));
} catch (SAXException e) {
Modified: lucene/nutch/trunk/src/plugin/build.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Tue Mar 10 07:07:22 2009
@@ -112,6 +112,7 @@
<ant dir="parse-swf" target="test"/>
<ant dir="parse-zip" target="test"/>
<ant dir="query-url" target="test"/>
+ <ant dir="subcollection" target="test"/>
<ant dir="urlfilter-automaton" target="test"/>
<ant dir="urlfilter-domain" target="test" />
<ant dir="urlfilter-regex" target="test"/>
Modified: lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java?rev=752000&r1=751999&r2=752000&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java (original)
+++ lucene/nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java Tue Mar 10 07:07:22 2009
@@ -49,6 +49,7 @@
public void testInput(){
StringBuffer xml=new StringBuffer();
xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+ xml.append("<!-- just a comment -->");
xml.append("<subcollections>");
xml.append("<subcollection>");
xml.append("<name>nutch collection</name>");