You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2018/04/26 07:52:54 UTC
svn commit: r1830170 - in /jackrabbit/oak/trunk:
oak-doc/src/site/markdown/query/
oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/
Author: thomasm
Date: Thu Apr 26 07:52:54 2018
New Revision: 1830170
URL: http://svn.apache.org/viewvc?rev=1830170&view=rev
Log:
OAK-7437 SimpleExcerptProvider highlighting should be case insensitive
Modified:
jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md
jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java
Modified: jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md (original)
+++ jackrabbit/oak/trunk/oak-doc/src/site/markdown/query/query-engine.md Thu Apr 26 07:52:54 2018
@@ -338,25 +338,46 @@ New applications should not rely on this
### Excerpts and Highlighting
-If excerpts and highlighting is needed, then queries should contains the "excerpt" property, as follows:
+The Lucene index can be configured to provide excerpts and highlighting.
+See <a href="lucene.html#Property_Definitions">useInExcerpt</a> for details
+on how to configure excerpt generation.
- /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt())
+For queries to use those excerpts, the query needs to use the Lucene index where
+this is configured. The queries also needs to contain the "excerpt" property, as follows:
+
+ /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(.))
-If the excerpt is read using `row.getValue("rep:excerpt(.)")`,
-then excerpt and highlighting features of Lucene are used.
-That is, if a Lucene index is configured for the query, and excerpts are generated
-(see <a href="lucene.html#Property_Definitions">useInExcerpt</a>).
-
-On the other hand, if the excerpt is requested for properties
-that are not specified in the query, as in `row.getValue("rep:excerpt(@title)")`,
-or if the query doesn't contain the excerpt property, for example
+The excerpt is then read using the JCR API call `row.getValue("rep:excerpt(.)")`.
+
+Since Oak version 1.10 (OAK-7151), optionally a property name can be specified in the query:
+
+ /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(@jcr:title) | rep:excerpt())
+
+The excerpt for the title is then read using `row.getValue("rep:excerpt(@title)")`,
+and the excerpt for the node using (as before) `row.getValue("rep:excerpt(.)")`.
+
+#### SimpleExcerptProvider
+
+The SimpleExcerptProvider is a fallback mechanism for excerpts and highlighting.
+This mechanism has many limitations, and is generally is not recommended.
+The SimpleExcerptProvider ignores the index configuration,
+and specially highlighting is very limited
+(stopwords are ignored, and highlighting is case sensitive).
+
+The SimpleExcerptProvider is used when reading an excerpt
+if the query doesn't contain an excerpt property, as in:
/jcr:root/content//*[jcr:contains(., 'test')]
-then the SimpleExcerptProvider utility is used,
-which generates excerpt from the content, and does highlighting.
-Using the SimpleExcerptProvider is not recommended; specially highlighting is limited
-(eg. the index configuration is ignored, stopwords are ignored, and highlighting is case sensitive).
+The SimpleExcerptProvider is also used if an excerpt is requested
+for a property that is not specified in the query. For example,
+when using `row.getValue("rep:excerpt(@title)")`, but the query does not contain
+this property as an excerpt property, as in:
+
+ /jcr:root/content//*[jcr:contains(., 'test')]/(rep:excerpt(.))
+
+The SimpleExcerptProvider is also used for queries that don't use
+a Lucene index, or if the query uses a Lucene index, but excerpts are not configured there.
### Native Queries
Modified: jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (original)
+++ jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java Thu Apr 26 07:52:54 2018
@@ -42,7 +42,7 @@ public class QueryFulltextTest extends A
public QueryFulltextTest(NodeStoreFixture fixture) {
super(fixture);
}
-
+
@Test
public void excerpt() throws Exception {
Session session = getAdminSession();
@@ -60,9 +60,9 @@ public class QueryFulltextTest extends A
RowIterator it;
Row row;
String s;
-
+
String xpath = "//*[jcr:contains(., 'hello')]/rep:excerpt(.) order by @jcr:path";
-
+
q = qm.createQuery(xpath, "xpath");
it = q.execute().getRows();
row = it.nextRow();
@@ -73,10 +73,9 @@ public class QueryFulltextTest extends A
row = it.nextRow();
path = row.getPath();
s = row.getValue("rep:excerpt(.)").getString();
- // TODO is this expected?
- assertTrue(path + ":" + s + " (3)", s.indexOf("Hello World") >= 0);
+ assertTrue(path + ":" + s + " (3)", s.indexOf("<strong>Hello</strong> World") >= 0);
assertTrue(path + ":" + s + " (4)", s.indexOf("Description") >= 0);
-
+
xpath = "//*[jcr:contains(., 'hello')]/rep:excerpt(.) order by @jcr:path";
q = qm.createQuery(xpath, "xpath");
@@ -89,11 +88,10 @@ public class QueryFulltextTest extends A
row = it.nextRow();
path = row.getPath();
s = row.getValue("rep:excerpt(text)").getString();
- // TODO is this expected?
- assertTrue(path + ":" + s + " (7)", s.indexOf("Hello World") >= 0);
+ assertTrue(path + ":" + s + " (7)", s.indexOf("<strong>Hello</strong> World") >= 0);
assertTrue(path + ":" + s + " (8)", s.indexOf("Description") < 0);
}
-
+
@Test
public void fulltextOrWithinText() throws Exception {
Session session = getAdminSession();
@@ -106,18 +104,18 @@ public class QueryFulltextTest extends A
Node n3 = testRootNode.addNode("node3");
n3.setProperty("text", "hello hallo");
session.save();
-
- String sql2 = "select [jcr:path] as [path] from [nt:base] " +
+
+ String sql2 = "select [jcr:path] as [path] from [nt:base] " +
"where contains([text], 'hello OR hallo') order by [jcr:path]";
-
+
Query q;
-
+
q = qm.createQuery("explain " + sql2, Query.JCR_SQL2);
- assertEquals("[nt:base] as [nt:base] /* traverse \"*\" " +
+ assertEquals("[nt:base] as [nt:base] /* traverse \"*\" " +
"where contains([nt:base].[text], 'hello OR hallo') */",
getResult(q.execute(), "plan"));
-
+
// verify the result
// uppercase "OR" mean logical "or"
q = qm.createQuery(sql2, Query.JCR_SQL2);
@@ -125,14 +123,14 @@ public class QueryFulltextTest extends A
getResult(q.execute(), "path"));
// lowercase "or" mean search for the term "or"
- sql2 = "select [jcr:path] as [path] from [nt:base] " +
+ sql2 = "select [jcr:path] as [path] from [nt:base] " +
"where contains([text], 'hello or hallo') order by [jcr:path]";
q = qm.createQuery(sql2, Query.JCR_SQL2);
- assertEquals("",
+ assertEquals("",
getResult(q.execute(), "path"));
}
-
+
static String getResult(QueryResult result, String propertyName) throws RepositoryException {
StringBuilder buff = new StringBuilder();
RowIterator it = result.getRows();
@@ -144,5 +142,5 @@ public class QueryFulltextTest extends A
}
return buff.toString();
}
-
+
}
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java?rev=1830170&r1=1830169&r2=1830170&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/OakStreamingIndexFile.java Thu Apr 26 07:52:54 2018
@@ -195,7 +195,7 @@ class OakStreamingIndexFile implements O
if (blobInputStream == null) {
position = pos;
} else if (pos < position) {
- LOG.warn("Seeking back on streaming index file {}. Current position {}, requested position {}." +
+ LOG.warn("Seeking back on streaming index file {}. Current position {}, requested position {}. " +
"Please make sure that CopyOnRead and prefetch of index files are enabled.",
getName(), position(), pos);