You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2014/06/06 17:43:59 UTC

svn commit: r1600935 - in /jackrabbit/oak/trunk/oak-solr-core/src: main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/ test/java/org/apache/jackrabbit/oak/jcr/query/ test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/

Author: tommaso
Date: Fri Jun  6 15:43:59 2014
New Revision: 1600935

URL: http://svn.apache.org/r1600935
Log:
OAK-1835 - Solr index support for relative properties

Modified:
    jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
    jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
    jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java

Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java?rev=1600935&r1=1600934&r2=1600935&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java Fri Jun  6 15:43:59 2014
@@ -16,13 +16,23 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.solr.query;
 
+import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
 import javax.annotation.CheckForNull;
 
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Queues;
+import com.google.common.collect.Sets;
 import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.api.PropertyValue;
 import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
 import org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration;
+import org.apache.jackrabbit.oak.query.QueryEngineSettings;
 import org.apache.jackrabbit.oak.query.QueryImpl;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
@@ -30,6 +40,7 @@ import org.apache.jackrabbit.oak.query.f
 import org.apache.jackrabbit.oak.query.fulltext.FullTextTerm;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor;
 import org.apache.jackrabbit.oak.spi.query.Cursor;
+import org.apache.jackrabbit.oak.spi.query.Cursors;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.query.IndexRow;
 import org.apache.jackrabbit.oak.spi.query.PropertyValues;
@@ -38,14 +49,15 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath;
+import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getName;
+import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 
 /**
  * A Solr based {@link QueryIndex}
@@ -107,8 +119,9 @@ public class SolrQueryIndex implements F
 
         StringBuilder queryBuilder = new StringBuilder();
 
-        if (filter.getFullTextConstraint() != null) {
-            queryBuilder.append(getFullTextQuery(filter.getFullTextConstraint()));
+        FullTextExpression ft = filter.getFullTextConstraint();
+        if (ft != null) {
+            queryBuilder.append(getFullTextQuery(ft));
             queryBuilder.append(' ');
         } else if (filter.getFulltextConditions() != null) {
             Collection<String> fulltextConditions = filter.getFulltextConditions();
@@ -324,6 +337,39 @@ public class SolrQueryIndex implements F
         return fullTextString.toString();
     }
 
+    /**
+     * Get the set of relative paths of a full-text condition. For example, for
+     * the condition "contains(a/b, 'hello') and contains(c/d, 'world'), the set
+     * { "a", "c" } is returned. If there are no relative properties, then one
+     * entry is returned (the empty string). If there is no expression, then an
+     * empty set is returned.
+     *
+     * @param ft the full-text expression
+     * @return the set of relative paths (possibly empty)
+     */
+    private static Set<String> getRelativePaths(FullTextExpression ft) {
+        final HashSet<String> relPaths = new HashSet<String>();
+        ft.accept(new FullTextVisitor.FullTextVisitorBase() {
+
+            @Override
+            public boolean visit(FullTextTerm term) {
+                String p = term.getPropertyName();
+                if (p == null) {
+                    relPaths.add("");
+                } else if (p.startsWith("../") || p.startsWith("./")) {
+                    throw new IllegalArgumentException("Relative parent is not supported:" + p);
+                } else if (getDepth(p) > 1) {
+                    String parent = getParentPath(p);
+                    relPaths.add(parent);
+                } else {
+                    relPaths.add("");
+                }
+                return true;
+            }
+        });
+        return relPaths;
+    }
+
     private boolean isSupportedHttpRequest(String nativeQueryString) {
         // the query string starts with ${supported-handler.selector}?
         return nativeQueryString.matches("(mlt|query|select|get)\\\\?.*");
@@ -331,7 +377,7 @@ public class SolrQueryIndex implements F
 
     private void setDefaults(SolrQuery solrQuery) {
         solrQuery.setParam("q.op", "AND");
-        solrQuery.setParam("fl", "* score");
+        solrQuery.setParam("fl", configuration.getPathField() + " score");
         String catchAllField = configuration.getCatchAllField();
         if (catchAllField != null && catchAllField.length() > 0) {
             solrQuery.setParam("df", catchAllField);
@@ -367,107 +413,197 @@ public class SolrQueryIndex implements F
     }
 
     @Override
-    public Cursor query(Filter filter, NodeState root) {
-        if (log.isDebugEnabled()) {
-            log.debug("converting filter {}", filter);
-        }
+    public Cursor query(final Filter filter, NodeState root) {
         Cursor cursor;
         try {
-            SolrQuery query = getQuery(filter);
-            if (log.isDebugEnabled()) {
-                log.debug("sending query {}", query);
-            }
-            QueryResponse queryResponse = solrServer.query(query);
-            if (log.isDebugEnabled()) {
-                log.debug("getting response {}", queryResponse);
-            }
-            cursor = new SolrCursor(queryResponse, query);
+            final Set<String> relPaths = filter.getFullTextConstraint() != null ? getRelativePaths(filter.getFullTextConstraint()) : Collections.<String>emptySet();
+            final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
+
+            final int parentDepth = getDepth(parent);
+
+
+            cursor = new SolrRowCursor(new AbstractIterator<SolrResultRow>() {
+                private final Set<String> seenPaths = Sets.newHashSet();
+                private final Deque<SolrResultRow> queue = Queues.newArrayDeque();
+                private SolrDocument lastDoc;
+                public int offset = 0;
+
+                @Override
+                protected SolrResultRow computeNext() {
+                    while (!queue.isEmpty() || loadDocs()) {
+                        return queue.remove();
+                    }
+                    return endOfData();
+                }
+
+                private SolrResultRow convertToRow(SolrDocument doc) throws IOException {
+                    String path = String.valueOf(doc.getFieldValue(configuration.getPathField()));
+                    if (path != null) {
+                        if ("".equals(path)) {
+                            path = "/";
+                        }
+                        if (!parent.isEmpty()) {
+                            path = getAncestorPath(path, parentDepth);
+                            // avoid duplicate entries
+                            if (seenPaths.contains(path)) {
+                                return null;
+                            }
+                            seenPaths.add(path);
+                        }
+
+                        float score = 0f;
+                        Object scoreObj = doc.get("score");
+                        if (scoreObj != null) {
+                            score = (Float) scoreObj;
+                        }
+                        return new SolrResultRow(path, score, doc);
+                    }
+                    return null;
+                }
+
+                /**
+                 * Loads the Solr documents in batches
+                 * @return true if any document is loaded
+                 */
+                private boolean loadDocs() {
+                    SolrDocument lastDocToRecord = null;
+
+                    try {
+                        if (log.isDebugEnabled()) {
+                            log.debug("converting filter {}", filter);
+                        }
+                        SolrQuery query = getQuery(filter);
+                        if (lastDoc != null) {
+                            offset++;
+                            int newOffset = offset * configuration.getRows();
+                            query.setParam("start", String.valueOf(newOffset));
+                        }
+                        if (log.isDebugEnabled()) {
+                            log.debug("sending query {}", query);
+                        }
+                        SolrDocumentList docs = solrServer.query(query).getResults();
+
+                        if (log.isDebugEnabled()) {
+                            log.debug("getting docs {}", docs);
+                        }
+
+                        for (SolrDocument doc : docs) {
+                            SolrResultRow row = convertToRow(doc);
+                            if (row != null) {
+                                queue.add(row);
+                            }
+                            lastDocToRecord = doc;
+                        }
+                    } catch (Exception e) {
+                        if (log.isWarnEnabled()) {
+                            log.warn("query via {} failed.", solrServer, e);
+                        }
+                    }
+                    if (lastDocToRecord != null) {
+                        this.lastDoc = lastDocToRecord;
+                    }
+
+                    return !queue.isEmpty();
+                }
+
+            }, filter.getQueryEngineSettings());
         } catch (Exception e) {
             throw new RuntimeException(e);
         }
         return cursor;
     }
 
+    static class SolrResultRow {
+        final String path;
+        final double score;
+        SolrDocument doc;
+
+        SolrResultRow(String path, double score) {
+            this.path = path;
+            this.score = score;
+        }
 
-    private class SolrCursor implements Cursor {
+        SolrResultRow(String path, double score, SolrDocument doc) {
+            this.path = path;
+            this.score = score;
+            this.doc = doc;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s (%1.2f)", path, score);
+        }
+    }
+
+    /**
+     * A cursor over Solr results. The result includes the path and the jcr:score pseudo-property as returned by Solr,
+     * plus, eventually, the returned stored values if {@link org.apache.solr.common.SolrDocument} is included in the
+     * {@link org.apache.jackrabbit.oak.plugins.index.solr.query.SolrQueryIndex.SolrResultRow}.
+     */
+    static class SolrRowCursor implements Cursor {
 
-        private SolrDocumentList results;
+        private final Cursor pathCursor;
+        SolrResultRow currentRow;
 
-        private SolrQuery query;
+        SolrRowCursor(final Iterator<SolrResultRow> it, QueryEngineSettings settings) {
+            Iterator<String> pathIterator = new Iterator<String>() {
 
-        private int counter;
-        private int offset;
+                @Override
+                public boolean hasNext() {
+                    return it.hasNext();
+                }
 
-        public SolrCursor(QueryResponse queryResponse, SolrQuery query) {
-            this.results = queryResponse.getResults();
-            this.counter = 0;
-            this.offset = 0;
-            this.query = query;
+                @Override
+                public String next() {
+                    currentRow = it.next();
+                    return currentRow.path;
+                }
+
+                @Override
+                public void remove() {
+                    it.remove();
+                }
+
+            };
+            pathCursor = new Cursors.PathCursor(pathIterator, true, settings);
         }
 
+
         @Override
         public boolean hasNext() {
-            return results != null && offset + counter < results.getNumFound();
+            return pathCursor.hasNext();
         }
 
         @Override
         public void remove() {
-            results.remove(counter);
+            pathCursor.remove();
         }
 
+        @Override
         public IndexRow next() {
-            if (counter < results.size() || updateResults()) {
-                final SolrDocument doc = results.get(counter);
-                counter++;
-                return new IndexRow() {
-                    @Override
-                    public String getPath() {
-                        return String.valueOf(doc.getFieldValue(
-                                configuration.getPathField()));
-                    }
-
-                    @Override
-                    public PropertyValue getValue(String columnName) {
-                        if (QueryImpl.JCR_SCORE.equals(columnName)) {
-                            float score = 0f;
-                            Object scoreObj = doc.get("score");
-                            if (scoreObj != null) {
-                                score = (Float) scoreObj;
-                            }
-                            return PropertyValues.newDouble((double) score);
-                        }
-                        Object o = doc.getFieldValue(columnName);
-                        return o == null ? null : PropertyValues.newString(o.toString());
-                    }
+            final IndexRow pathRow = pathCursor.next();
+            return new IndexRow() {
 
-                };
-            } else {
-                return null;
-            }
-        }
+                @Override
+                public String getPath() {
+                    return pathRow.getPath();
+                }
 
-        private boolean updateResults() {
-            int newOffset = offset + results.size();
-            query.setParam("start", String.valueOf(newOffset));
-            try {
-                QueryResponse queryResponse = solrServer.query(query);
-                SolrDocumentList localResults = queryResponse.getResults();
-                boolean hasMoreResults = localResults.size() > 0;
-                if (hasMoreResults) {
-                    counter = 0;
-                    offset = newOffset;
-                    results = localResults;
-                } else {
-                    query.setParam("start", String.valueOf(offset));
+                @Override
+                public PropertyValue getValue(String columnName) {
+                    // overlay the score
+                    if (QueryImpl.JCR_SCORE.equals(columnName)) {
+                        return PropertyValues.newDouble(currentRow.score);
+                    }
+                    // TODO : make inclusion of doc configurable
+                    return currentRow.doc != null ? PropertyValues.newString(
+                            String.valueOf(currentRow.doc.getFieldValue(columnName))) : null;
                 }
-                return hasMoreResults;
-            } catch (SolrServerException e) {
-                throw new RuntimeException("error retrieving paged results", e);
-            }
+
+            };
         }
     }
 
-
     @Override
     @CheckForNull
     public NodeAggregator getNodeAggregator() {

Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java?rev=1600935&r1=1600934&r2=1600935&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java Fri Jun  6 15:43:59 2014
@@ -88,24 +88,23 @@ public class QueryFulltextTest extends A
 
         q = qm.createQuery(sql2, Query.JCR_SQL2);
         assertEquals("/testroot/node2, /testroot/node3", getResult(q.execute(), "path"));
-//
-//        sql2 = "select [jcr:path] as [path] from [nt:base] "
-//                + "where contains([node1/text], 'hello') order by [jcr:path]";
-//        q = qm.createQuery(sql2, Query.JCR_SQL2);
-//        assertEquals("/testroot", getResult(q.execute(), "path"));
-//
-//        sql2 = "select [jcr:path] as [path] from [nt:base] "
-//                + "where contains([node2/text], 'hello OR hallo') order by [jcr:path]";
-//        q = qm.createQuery(sql2, Query.JCR_SQL2);
-//        assertEquals("/testroot", getResult(q.execute(), "path"));
 
-        // TODO OAK-890
-        // sql2 = "select [jcr:path] as [path] from [nt:base] "
-        // + "where contains([node1/text], 'hello') "
-        // + "and contains([node2/text], 'hallo') "
-        // + "order by [jcr:path]";
-        // q = qm.createQuery(sql2, Query.JCR_SQL2);
-        // assertEquals("/testroot", getResult(q.execute(), "path"));
+        sql2 = "select [jcr:path] as [path] from [nt:base] "
+                + "where contains([node1/text], 'hello') order by [jcr:path]";
+        q = qm.createQuery(sql2, Query.JCR_SQL2);
+        assertEquals("/testroot", getResult(q.execute(), "path"));
+
+        sql2 = "select [jcr:path] as [path] from [nt:base] "
+                + "where contains([node2/text], 'hello OR hallo') order by [jcr:path]";
+        q = qm.createQuery(sql2, Query.JCR_SQL2);
+        assertEquals("/testroot", getResult(q.execute(), "path"));
+
+        sql2 = "select [jcr:path] as [path] from [nt:base] "
+        + "where contains([node1/text], 'hello') "
+        + "and contains([node2/text], 'hallo') "
+        + "order by [jcr:path]";
+        q = qm.createQuery(sql2, Query.JCR_SQL2);
+        assertEquals("/testroot", getResult(q.execute(), "path"));
     }
 
     static String getResult(QueryResult result, String propertyName) throws RepositoryException {

Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java?rev=1600935&r1=1600934&r2=1600935&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java Fri Jun  6 15:43:59 2014
@@ -294,4 +294,133 @@ public class SolrIndexQueryTest extends 
         root.commit();
         assertQuery("//*[jcr:contains(., '美女')]", "xpath", ImmutableList.of(one.getPath()));
     }
+
+    @Test
+    public void testCompositeRepExcerpt() throws Exception {
+        String sqlQuery = "select [jcr:path], [jcr:score], [rep:excerpt] from [nt:base] as a " +
+                "where (contains([jcr:content/*], 'square') or contains([jcr:content/jcr:title], 'square')" +
+                " or contains([jcr:content/jcr:description], 'square')) and isdescendantnode(a, '/test') " +
+                "order by [jcr:score] desc";
+        Tree tree = root.getTree("/");
+        Tree test = tree.addChild("test");
+        Tree child = test.addChild("child");
+        Tree a = child.addChild("a");
+        a.setProperty("jcr:title", "Hello World, today square is nice");
+        Tree b = child.addChild("b");
+        b.setProperty("jcr:description", "Cheers World, today weather is squary nice");
+        Tree c = child.addChild("c");
+        c.setProperty("jcr:title", "Halo Welt, today sky is square");
+        root.commit();
+
+        Iterator<String> strings = executeQuery(sqlQuery, "JCR-SQL2").iterator();
+        assertTrue(strings.hasNext());
+        assertTrue(strings.next().startsWith("/test/child,"));
+        assertFalse(strings.hasNext());
+    }
+
+    @Test
+    public void contains() throws Exception {
+        String h = "Hello" + System.currentTimeMillis();
+        String w = "World" + System.currentTimeMillis();
+
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("name", asList(h, w), STRINGS);
+        test.addChild("b").setProperty("name", h);
+        root.commit();
+
+        // query 'hello'
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("/jcr:root//*[jcr:contains(., '").append(h);
+        stmt.append("')]");
+        assertQuery(stmt.toString(), "xpath",
+                ImmutableList.of("/test/a", "/test/b"));
+
+        // query 'world'
+        stmt = new StringBuffer();
+        stmt.append("/jcr:root//*[jcr:contains(., '").append(w);
+        stmt.append("')]");
+        assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
+
+    }
+
+    @Test
+    @Ignore("depends on chosen text_general tokenizer")
+    public void containsDash() throws Exception {
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("name", "hello-wor");
+        test.addChild("b").setProperty("name", "hello-world");
+        test.addChild("c").setProperty("name", "hello");
+        root.commit();
+
+        assertQuery("/jcr:root//*[jcr:contains(., 'hello-wor*')]", "xpath",
+                ImmutableList.of("/test/a", "/test/b"));
+        assertQuery("/jcr:root//*[jcr:contains(., '*hello-wor*')]", "xpath",
+                ImmutableList.of("/test/a", "/test/b"));
+
+    }
+
+    @Test
+    public void multiPhraseQuery() throws Exception {
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("dc:format", "type:application/pdf");
+        root.commit();
+
+        assertQuery(
+                "/jcr:root//*[jcr:contains(@dc:format, 'type:appli*')]",
+                "xpath", ImmutableList.of("/test/a"));
+
+    }
+
+    @Test
+    public void containsPath() throws Exception {
+
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("name", "/parent/child/node");
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., '/parent/child')]");
+        assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
+
+    }
+
+    @Test
+    public void containsPathNum() throws Exception {
+
+        Tree test = root.getTree("/").addChild("test");
+        Tree a = test.addChild("a");
+        a.setProperty("name", "/segment1/segment2/segment3");
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., '/segment1/segment2')]");
+        assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
+
+    }
+
+    /**
+     * OAK-1208 property existence constraints break queries
+     */
+    @Test
+    public void testOAK1208() throws Exception {
+        Tree t = root.getTree("/").addChild("containsWithMultipleOr");
+        Tree one = t.addChild("one");
+        one.setProperty("p", "dam/smartcollection");
+        one.setProperty("t", "media");
+
+        Tree two = t.addChild("two");
+        two.setProperty("p", "dam/collection");
+        two.setProperty("t", "media");
+
+        Tree three = t.addChild("three");
+        three.setProperty("p", "dam/hits");
+        three.setProperty("t", "media");
+
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., 'media') and (@p = 'dam/smartcollection' or @p = 'dam/collection') ]");
+        assertQuery(stmt.toString(), "xpath",
+                ImmutableList.of(one.getPath(), two.getPath()));
+    }
 }