You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/02 12:30:23 UTC
svn commit: r739962 - in /lucene/solr/trunk/contrib/dataimporthandler:
CHANGES.txt
src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
Author: shalin
Date: Mon Feb 2 11:30:18 2009
New Revision: 739962
URL: http://svn.apache.org/viewvc?rev=739962&view=rev
Log:
SOLR-999 -- XPathRecordReader fails on XMLs with nodes mixed with CDATA content
Modified:
lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=739962&r1=739961&r2=739962&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Mon Feb 2 11:30:18 2009
@@ -106,6 +106,9 @@
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
(Ryuuichi Kumai via shalin)
+14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content.
+ (Fergus McMenemie, Noble Paul via shalin)
+
Documentation
----------------------
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=739962&r1=739961&r2=739962&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java Mon Feb 2 11:30:18 2009
@@ -162,19 +162,20 @@
skipNextEvent = true;
String text = parser.getText();
event = parser.next();
- while (event == CDATA || event == CHARACTERS || event == SPACE) {
- text = text + parser.getText();
+
+ while (true) {
+ if(event == CDATA || event == CHARACTERS || event == SPACE) {
+ text = text + parser.getText();
+ } else if(event == START_ELEMENT) {
+ handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
+ } else {
+ break;
+ }
event = parser.next();
}
putText(values, text, fieldName, multiValued);
} else if (event == START_ELEMENT) {
- Node n = getMatchingChild(parser);
- if (n != null) {
- childrenFound.add(n);
- n.parse(parser, handler, values, stack, recordStarted);
- } else {
- skipTag(parser);
- }
+ handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
}
}
} finally {
@@ -193,6 +194,19 @@
}
}
+ private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
+ Handler handler, Map<String, Object> values,
+ Stack<Set<String>> stack, boolean recordStarted)
+ throws IOException, XMLStreamException {
+ Node n = getMatchingChild(parser);
+ if (n != null) {
+ childrenFound.add(n);
+ n.parse(parser, handler, values, stack, recordStarted);
+ } else {
+ skipTag(parser);
+ }
+ }
+
private Node getMatchingChild(XMLStreamReader parser) {
if (childNodes == null)
return null;
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java?rev=739962&r1=739961&r2=739962&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java Mon Feb 2 11:30:18 2009
@@ -25,9 +25,7 @@
import java.util.Map;
/**
- * <p>
- * Test for XPathRecordReader
- * </p>
+ * <p> Test for XPathRecordReader </p>
*
* @version $Id$
* @since solr 1.3
@@ -136,6 +134,28 @@
}
@Test
+ public void mixedContent() {
+ String xml = "<xhtml:p xmlns:xhtml=\"http://xhtml.com/\" >This text is \n" +
+ " <xhtml:b>bold</xhtml:b> and this text is \n" +
+ " <xhtml:u>underlined</xhtml:u>!\n" +
+ "</xhtml:p>";
+ XPathRecordReader rr = new XPathRecordReader("/p");
+ rr.addField("p", "/p", true);
+ rr.addField("b", "/p/b", true);
+ rr.addField("u", "/p/u", true);
+ List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
+ Map<String, Object> row = l.get(0);
+
+ Assert.assertEquals("bold", ((List) row.get("b")).get(0));
+ Assert.assertEquals("underlined", ((List) row.get("u")).get(0));
+ String p = (String) ((List) row.get("p")).get(0);
+ Assert.assertTrue(p.contains("This text is"));
+ Assert.assertTrue(p.contains("and this text is"));
+ Assert.assertTrue(p.contains("!"));
+
+ }
+
+ @Test
public void elems2LevelWithAttrib() {
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"