You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/06/12 10:24:29 UTC

svn commit: r1492109 - in /jackrabbit/oak/trunk: oak-core/src/main/java/org/apache/jackrabbit/oak/query/ oak-jcr/ oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/

Author: alexparvulescu
Date: Wed Jun 12 08:24:28 2013
New Revision: 1492109

URL: http://svn.apache.org/r1492109
Log:
OAK-318 Excerpt support
 - introduced a simple excerpt provider that injects an excerpt if it is needed
 - enabled parts of the excerpt tests in jackrabbit

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
    jackrabbit/oak/trunk/oak-jcr/pom.xml
    jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java?rev=1492109&r1=1492108&r2=1492109&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java Wed Jun 12 08:24:28 2013
@@ -582,7 +582,7 @@ public class Query {
                 return i;
             }
         }
-        throw new IllegalArgumentException("Column not found: " + columnName);
+        return -1;
     }
 
     public PropertyValue getBindVariableValue(String bindVariableName) {

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1492109&r1=1492108&r2=1492109&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java Wed Jun 12 08:24:28 2013
@@ -16,10 +16,12 @@
  */
 package org.apache.jackrabbit.oak.query;
 
+import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.api.PropertyValue;
 import org.apache.jackrabbit.oak.api.ResultRow;
 import org.apache.jackrabbit.oak.query.ast.ColumnImpl;
 import org.apache.jackrabbit.oak.query.ast.SelectorImpl;
+import org.apache.jackrabbit.oak.spi.query.PropertyValues;
 
 /**
  * A query result row that keeps all data (for this row only) in memory.
@@ -59,7 +61,23 @@ public class ResultRowImpl implements Re
 
     @Override
     public PropertyValue getValue(String columnName) {
-        return values[query.getColumnIndex(columnName)];
+        int index = query.getColumnIndex(columnName);
+        if (index >= 0) {
+            return values[index];
+        }
+        if (JcrConstants.JCR_PATH.equals(columnName)) {
+            return PropertyValues.newString(getPath());
+        }
+        if (columnName.startsWith(Query.REP_EXCERPT)) {
+            String ex = new SimpleExcerptProvider().getExcerpt(getPath(),
+                    columnName, query, true);
+            // missing excerpt, generate a default value
+            if (ex != null) {
+                return PropertyValues.newString(ex);
+            }
+            return null;
+        }
+        throw new IllegalArgumentException("Column not found: " + columnName);
     }
 
     @Override

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java?rev=1492109&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java Wed Jun 12 08:24:28 2013
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.query;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.query.ast.AndImpl;
+import org.apache.jackrabbit.oak.query.ast.ConstraintImpl;
+import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
+import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
+import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
+
+public class SimpleExcerptProvider {
+
+    private static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+
+    private static int maxFragmentSize = 150;
+
+    public String getExcerpt(String path, String columnName, Query query,
+            boolean highlight) {
+        if (path == null) {
+            return null;
+        }
+        Tree t = query.getTree(path);
+        if (t == null || !t.exists()) {
+            return null;
+        }
+        String filter = null;
+        if (columnName.contains("/")) {
+            for (String p : PathUtils.elements(PathUtils
+                    .getParentPath(columnName))) {
+                if (t.hasChild(p)) {
+                    t = t.getChild(p);
+                } else {
+                    return null;
+                }
+            }
+            filter = extractExcerptProperty(PathUtils.getName(columnName));
+        } else {
+            filter = extractExcerptProperty(columnName);
+        }
+
+        StringBuilder text = new StringBuilder();
+        String separator = "";
+        for (PropertyState p : t.getProperties()) {
+            if (p.getType().tag() == Type.STRING.tag()
+                    && (filter == null || filter.equalsIgnoreCase(p.getName()))) {
+                text.append(separator);
+                separator = " ";
+                for (String v : p.getValue(Type.STRINGS)) {
+                    text.append(v);
+                }
+            }
+        }
+        String searchToken = extractFulltext(query.getConstraint());
+        if (highlight && searchToken != null) {
+            return highlight(text, searchToken);
+        }
+        return noHighlight(text);
+    }
+
+    private String extractExcerptProperty(String column) {
+        // most frequent case first
+        if (REP_EXCERPT_FN.equalsIgnoreCase(column)) {
+            return null;
+        }
+        return column.substring(column.indexOf("(") + 1, column.indexOf(")"));
+    }
+
+    private static String extractFulltext(ConstraintImpl c) {
+        if (c instanceof FullTextSearchImpl) {
+            FullTextSearchImpl f = (FullTextSearchImpl) c;
+            if (f.getFullTextSearchExpression() instanceof LiteralImpl) {
+                LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression();
+                return l.getLiteralValue().getValue(Type.STRING);
+            }
+            return null;
+        }
+        if (c instanceof AndImpl) {
+            AndImpl a = (AndImpl) c;
+            String t = extractFulltext(a.getConstraint1());
+            if (t == null) {
+                return extractFulltext(a.getConstraint2());
+            }
+            return t;
+        }
+        return null;
+    }
+
+    private static List<String> tokenize(String in) {
+        List<String> out = new ArrayList<String>();
+        StringBuilder token = new StringBuilder();
+        boolean quote = false;
+        for (int i = 0; i < in.length();) {
+            final int c = in.codePointAt(i);
+            int length = Character.charCount(c);
+            switch (c) {
+            case ' ':
+                if (quote) {
+                    token.append(' ');
+                } else if (token.length() > 0) {
+                    out.add(token.toString());
+                    token = new StringBuilder();
+                }
+                break;
+            case '"':
+            case '\'':
+                if (quote) {
+                    quote = false;
+                    if (token.length() > 0) {
+                        out.add(token.toString());
+                        token = new StringBuilder();
+                    }
+                } else {
+                    quote = true;
+                }
+                break;
+            default:
+                token.append(new String(Character.toChars(c)));
+            }
+            i += length;
+        }
+        if (token.length() > 0) {
+            out.add(token.toString());
+        }
+        return out;
+    }
+
+    private static String noHighlight(StringBuilder text) {
+        if (text.length() > maxFragmentSize) {
+            int lastSpace = text.lastIndexOf(" ", maxFragmentSize);
+            if (lastSpace != -1) {
+                text.setLength(lastSpace);
+            } else {
+                text.setLength(maxFragmentSize);
+            }
+            text.append(" ...");
+        }
+        StringBuilder excerpt = new StringBuilder("<div><span>");
+        excerpt.append(encodeIllegalXMLCharacters(text.toString()));
+        excerpt.append("</span></div>");
+        return excerpt.toString();
+    }
+
+    private static String highlight(StringBuilder text, String searchToken) {
+        List<String> tokens = tokenize(searchToken);
+        text = new StringBuilder(encodeIllegalXMLCharacters(text.toString()));
+        for (String token : tokens) {
+            text = replaceAll(text, token, "<strong>", "</strong>");
+        }
+
+        StringBuilder excerpt = new StringBuilder("<div><span>");
+        excerpt.append(text.toString());
+        excerpt.append("</span></div>");
+        return excerpt.toString();
+    }
+
+    private static StringBuilder replaceAll(StringBuilder in, String token,
+            String start, String end) {
+        boolean isLike = false;
+        if (token.endsWith("*")) {
+            token = token.substring(0, token.length() - 1);
+            isLike = true;
+        }
+        int index = in.indexOf(token);
+        while (index != -1) {
+            int endIndex = index + token.length();
+            if (isLike) {
+                int nextSpace = in.indexOf(" ", endIndex);
+                if (nextSpace != -1) {
+                    endIndex = nextSpace;
+                } else {
+                    endIndex = in.length();
+                }
+            }
+            String current = in.substring(index, endIndex);
+            StringBuilder newToken = new StringBuilder(start);
+            newToken.append(current);
+            newToken.append(end);
+            String newTokenS = newToken.toString();
+            in.replace(index, index + current.length(), newTokenS);
+            index = in.indexOf(token,
+                    in.lastIndexOf(newTokenS) + newTokenS.length());
+        }
+        return in;
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: jackrabbit/oak/trunk/oak-jcr/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/pom.xml?rev=1492109&r1=1492108&r2=1492109&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-jcr/pom.xml Wed Jun 12 08:24:28 2013
@@ -297,6 +297,13 @@
 
       org.apache.jackrabbit.oak.jcr.security.user.MemberNodeImportTest <!-- OAK-414, OAK-482 -->
       org.apache.jackrabbit.oak.jcr.security.user.UserImportTest#testImportGroupIntoUsersTree <!-- OAK-821 -->
+
+      org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtEnd                      <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtStart                    <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtStartAndEnd              <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment              <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots  <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase                           <!-- OAK-318 -->
     </known.issues>
   </properties>
 

Modified: jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java?rev=1492109&r1=1492108&r2=1492109&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java (original)
+++ jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java Wed Jun 12 08:24:28 2013
@@ -20,6 +20,7 @@ import junit.framework.Test;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 
+import org.apache.jackrabbit.core.query.ExcerptTest;
 import org.apache.jackrabbit.core.query.FulltextQueryTest;
 import org.apache.jackrabbit.core.query.FulltextSQL2QueryTest;
 import org.apache.jackrabbit.core.query.JoinTest;
@@ -52,12 +53,12 @@ public class QueryJcrTest extends TestCa
         suite.addTestSuite(SQL2OffsetLimitTest.class);
         suite.addTestSuite(LimitAndOffsetTest.class);
         suite.addTestSuite(OrderByTest.class);
+        suite.addTestSuite(ExcerptTest.class);
 
         // FAILURES
         //
         // suite.addTestSuite(QueryResultTest.class); // OAK-484
         // suite.addTestSuite(ParentNodeTest.class); // OAK-309
-        // suite.addTestSuite(ExcerptTest.class); // OAK-318
         // suite.addTestSuite(SimilarQueryTest.class); // OAK-319
         // suite.addTestSuite(DerefTest.class); // OAK-321
         // suite.addTestSuite(XPathAxisTest.class); // OAK-322