You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/26 13:41:08 UTC

svn commit: r748117 - in /lucene/solr/trunk/contrib/dataimporthandler: CHANGES.txt src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java

Author: shalin
Date: Thu Feb 26 12:41:08 2009
New Revision: 748117

URL: http://svn.apache.org/viewvc?rev=748117&view=rev
Log:
SOLR-1040 -- XPathEntityProcessor fails with an xpath like containing forward slash in a attribute selector's value

Modified:
    lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
    lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java

Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=748117&r1=748116&r2=748117&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Thu Feb 26 12:41:08 2009
@@ -155,6 +155,9 @@
 19.SOLR-1037: DIH should not add null values in a row returned by EntityProcessor to documents.
               (shalin)
 
+20.SOLR-1040: XPathEntityProcessor fails with an xpath like /feed/entry/link[@type='text/html']/@href
+              (Noble Paul via shalin)
+
 Documentation
 ----------------------
 

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java?rev=748117&r1=748116&r2=748117&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java Thu Feb 26 12:41:08 2009
@@ -67,7 +67,7 @@
 
   private void addField0(String xpath, String name, boolean multiValued,
                          boolean isRecord, int flags) {
-    List<String> paths = new LinkedList<String>(Arrays.asList(xpath.split("/")));
+    List<String> paths = splitEscapeQuote(xpath);
     if ("".equals(paths.get(0).trim()))
       paths.remove(0);
     rootNode.build(paths, name, multiValued, isRecord, flags);
@@ -367,6 +367,30 @@
     return result;
   }
 
+  /**
+   * Used for handling cases where there is a slash '/' character
+   * inside the attribute value e.g. x@html='text/html'. We need to split
+   * by '/' excluding the '/' which is a part of the attribute's value.
+   */
+  private static List<String> splitEscapeQuote(String str) {
+    List<String> result = new LinkedList<String>();
+    String[] ss = str.split("/");
+    for (int i = 0; i < ss.length; i++) {
+      if (ss[i].length() == 0 && result.size() == 0) continue;
+      StringBuilder sb = new StringBuilder();
+      int quoteCount = 0;
+      while (true) {
+        sb.append(ss[i]);
+        for (int j = 0; j < ss[i].length(); j++) if (ss[i].charAt(j) == '\'') quoteCount++;
+        if ((quoteCount % 2) == 0) break;
+        i++;
+        sb.append("/");
+      }
+      result.add(sb.toString());
+    }
+    return result;
+  }
+
   static XMLInputFactory factory = XMLInputFactory.newInstance();
   static{
     factory.setProperty(XMLInputFactory.IS_VALIDATING , Boolean.FALSE); 

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java?rev=748117&r1=748116&r2=748117&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java Thu Feb 26 12:41:08 2009
@@ -227,6 +227,19 @@
   }
 
   @Test
+  public void  attribValWithSlash() {
+    String xml = "<root><b>\n" +
+            "  <a x=\"a/b\" h=\"hello-A\"/>  \n" +
+            "</b></root>";
+    XPathRecordReader rr = new XPathRecordReader("/root/b");
+    rr.addField("x", "/root/b/a[@x='a/b']/@h", false);
+    List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
+    Assert.assertEquals(1, l.size());
+    Map<String, Object> m = l.get(0);
+    Assert.assertEquals("hello-A", m.get("x"));    
+  }
+
+  @Test
   public void another() {
     String xml = "<root>\n"
             + "       <contenido id=\"10097\" idioma=\"cat\">\n"