You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2018/04/26 07:52:54 UTC

svn commit: r1830170 - in /jackrabbit/oak/trunk: oak-doc/src/site/markdown/query/ oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/

Author: thomasm
Date: Thu Apr 26 07:52:54 2018
New Revision: 1830170

URL: http://svn.apache.org/viewvc?rev=1830170&view=rev
Log:
OAK-7437 SimpleExcerptProvider highlighting should be case insensitive

Modified:
    jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md
    jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java

Modified: jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md (original)
+++ jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md Thu Apr 26 07:52:54 2018
@@ -338,25 +338,46 @@ New applications should not rely on this
 
 ### Excerpts and Highlighting
 
-If excerpts and highlighting is needed, then queries should contains the "excerpt" property, as follows:
+The Lucene index can be configured to provide excerpts and highlighting.
+See <a href="lucene.html#Property_Definitions">useInExcerpt</a> for details
+on how to configure excerpt generation.
 
-    /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt())
+For queries to use those excerpts, the query needs to use the Lucene index where 
+this is configured. The queries also needs to contain the "excerpt" property, as follows:
+
+    /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(.))
     
-If the excerpt is read using `row.getValue("rep:excerpt(.)")`, 
-then excerpt and highlighting features of Lucene are used.
-That is, if a Lucene index is configured for the query, and excerpts are generated
-(see <a href="lucene.html#Property_Definitions">useInExcerpt</a>).
-
-On the other hand, if the excerpt is requested for properties
-that are not specified in the query, as in `row.getValue("rep:excerpt(@title)")`, 
-or if the query doesn't contain the excerpt property, for example
+The excerpt is then read using the JCR API call `row.getValue("rep:excerpt(.)")`.
+
+Since Oak version 1.10 (OAK-7151), optionally a property name can be specified in the query:
+
+    /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(@jcr:title) | rep:excerpt())
+
+The excerpt for the title is then read using `row.getValue("rep:excerpt(@title)")`,
+and the excerpt for the node using (as before) `row.getValue("rep:excerpt(.)")`.
+
+#### SimpleExcerptProvider
+
+The SimpleExcerptProvider is a fallback mechanism for excerpts and highlighting. 
+This mechanism has many limitations, and is generally is not recommended.
+The SimpleExcerptProvider ignores the index configuration,
+and specially highlighting is very limited 
+(stopwords are ignored, and highlighting is case sensitive).
+
+The SimpleExcerptProvider is used when reading an excerpt 
+if the query doesn't contain an excerpt property, as in:
 
     /jcr:root/content//*[jcr:contains(., 'test')]
 
-then the SimpleExcerptProvider utility is used, 
-which generates excerpt from the content, and does highlighting.
-Using the SimpleExcerptProvider is not recommended; specially highlighting is limited 
-(eg. the index configuration is ignored, stopwords are ignored, and highlighting is case sensitive).
+The SimpleExcerptProvider is also used if an excerpt is requested
+for a property that is not specified in the query. For example,
+when using `row.getValue("rep:excerpt(@title)")`, but the query does not contain
+this property as an excerpt property, as in:
+
+    /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(.))
+    
+The SimpleExcerptProvider is also used for queries that don't use
+a Lucene index, or if the query uses a Lucene index, but excerpts are not configured there.
 
 ### Native Queries
 

Modified: jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (original)
+++ jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java Thu Apr 26 07:52:54 2018
@@ -42,7 +42,7 @@ public class QueryFulltextTest extends A
     public QueryFulltextTest(NodeStoreFixture fixture) {
         super(fixture);
     }
-    
+
     @Test
     public void excerpt() throws Exception {
         Session session = getAdminSession();
@@ -60,9 +60,9 @@ public class QueryFulltextTest extends A
         RowIterator it;
         Row row;
         String s;
-        
+
         String xpath = "//*[jcr:contains(., 'hello')]/rep:excerpt(.) order by @jcr:path";
-        
+
         q = qm.createQuery(xpath, "xpath");
         it = q.execute().getRows();
         row = it.nextRow();
@@ -73,10 +73,9 @@ public class QueryFulltextTest extends A
         row = it.nextRow();
         path = row.getPath();
         s = row.getValue("rep:excerpt(.)").getString();
-        // TODO is this expected?
-        assertTrue(path + ":" + s + " (3)", s.indexOf("Hello World") >= 0);
+        assertTrue(path + ":" + s + " (3)", s.indexOf("<strong>Hello</strong> World") >= 0);
         assertTrue(path + ":" + s + " (4)", s.indexOf("Description") >= 0);
-        
+
         xpath = "//*[jcr:contains(., 'hello')]/rep:excerpt(.) order by @jcr:path";
 
         q = qm.createQuery(xpath, "xpath");
@@ -89,11 +88,10 @@ public class QueryFulltextTest extends A
         row = it.nextRow();
         path = row.getPath();
         s = row.getValue("rep:excerpt(text)").getString();
-        // TODO is this expected?
-        assertTrue(path + ":" + s + " (7)", s.indexOf("Hello World") >= 0);
+        assertTrue(path + ":" + s + " (7)", s.indexOf("<strong>Hello</strong> World") >= 0);
         assertTrue(path + ":" + s + " (8)", s.indexOf("Description") < 0);
     }
-    
+
     @Test
     public void fulltextOrWithinText() throws Exception {
         Session session = getAdminSession();
@@ -106,18 +104,18 @@ public class QueryFulltextTest extends A
         Node n3 = testRootNode.addNode("node3");
         n3.setProperty("text", "hello hallo");
         session.save();
-       
-        String sql2 = "select [jcr:path] as [path] from [nt:base] " + 
+
+        String sql2 = "select [jcr:path] as [path] from [nt:base] " +
                 "where contains([text], 'hello OR hallo') order by [jcr:path]";
-        
+
         Query q;
-        
+
         q = qm.createQuery("explain " + sql2, Query.JCR_SQL2);
 
-        assertEquals("[nt:base] as [nt:base] /* traverse \"*\" " + 
+        assertEquals("[nt:base] as [nt:base] /* traverse \"*\" " +
                 "where contains([nt:base].[text], 'hello OR hallo') */",
                 getResult(q.execute(), "plan"));
-        
+
         // verify the result
         // uppercase "OR" mean logical "or"
         q = qm.createQuery(sql2, Query.JCR_SQL2);
@@ -125,14 +123,14 @@ public class QueryFulltextTest extends A
                 getResult(q.execute(), "path"));
 
         // lowercase "or" mean search for the term "or"
-        sql2 = "select [jcr:path] as [path] from [nt:base] " + 
+        sql2 = "select [jcr:path] as [path] from [nt:base] " +
                 "where contains([text], 'hello or hallo') order by [jcr:path]";
         q = qm.createQuery(sql2, Query.JCR_SQL2);
-        assertEquals("", 
+        assertEquals("",
                 getResult(q.execute(), "path"));
 
     }
-    
+
     static String getResult(QueryResult result, String propertyName) throws RepositoryException {
         StringBuilder buff = new StringBuilder();
         RowIterator it = result.getRows();
@@ -144,5 +142,5 @@ public class QueryFulltextTest extends A
         }
         return buff.toString();
     }
-    
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java Thu Apr 26 07:52:54 2018
@@ -195,7 +195,7 @@ class OakStreamingIndexFile implements O
             if (blobInputStream == null) {
                 position = pos;
             } else if (pos < position) {
-                LOG.warn("Seeking back on streaming index file {}. Current position {}, requested position {}." +
+                LOG.warn("Seeking back on streaming index file {}. Current position {}, requested position {}. " +
                                 "Please make sure that CopyOnRead and prefetch of index files are enabled.",
                         getName(), position(), pos);