You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2011/09/23 12:43:39 UTC
svn commit: r1174642 - in /jackrabbit/branches/2.0: ./
jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/
jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/
jackrabbit-jcr-servlet/
Author: jukka
Date: Fri Sep 23 10:43:39 2011
New Revision: 1174642
URL: http://svn.apache.org/viewvc?rev=1174642&view=rev
Log:
2.0: Merged revision 1174530 (JCR-3077, JCR-3075)
Modified:
jackrabbit/branches/2.0/ (props changed)
jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
jackrabbit/branches/2.0/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
jackrabbit/branches/2.0/jackrabbit-jcr-servlet/ (props changed)
Propchange: jackrabbit/branches/2.0/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Sep 23 10:43:39 2011
@@ -1,7 +1,7 @@
/jackrabbit/branches/1.5:794012,794100,794102
-/jackrabbit/branches/2.1:955309,955314,982266,982277,982505,998310,1025933,1025957,1025962,1025964,1025981,1025985,1025990,1025995,1025999,1026002,1038594,1038599,1039335,1040102,1089463,1127002
-/jackrabbit/branches/2.2:1089453,1126998
+/jackrabbit/branches/2.1:955309,955314,982266,982277,982505,998310,1025933,1025957,1025962,1025964,1025981,1025985,1025990,1025995,1025999,1026002,1038594,1038599,1039335,1040102,1089463,1127002,1174630
+/jackrabbit/branches/2.2:1089453,1126998,1173250,1173706
/jackrabbit/sandbox/JCR-1456:774917-886178
/jackrabbit/sandbox/JCR-2170:812417-816332
/jackrabbit/sandbox/tripod-JCR-2209:795441-795863
-/jackrabbit/trunk:891595,891629,892253,892263,894150-894151,896408,896513,896532,896857,896870,896876,896908,896940,896942-896943,896969,896977,897071,897836,897842,897858,897935,897983,897992-897993,897996,898002,898042,898267,898325,898540,898677,898699,898701,898715,898872,899102,899181,899391,899393-899394,899583,899594,899643,900305,900310,900314,900453,900702,900736,900762-900763,900767,900782,901095,901122,901139,901144,901170,901176,901191,901193,901196,901216,901228,901285,902058,902062,926324,928888,936668,955222,955229,955307,955852,965539,995411-995412,996810,999298-999299,999965,1000947,1001707,1002065-1002066,1002084,1002101-1002102,1002168,1002170,1002589,1002608,1002657,1002729,1003423,1003470,1003542,1003773,1004182,1004184,1004223-1004224,1004652,1005057,1005112,1036117,1036336-1036337,1038201,1039064,1040090,1087304,1089436,1104027
+/jackrabbit/trunk:891595,891629,892253,892263,894150-894151,896408,896513,896532,896857,896870,896876,896908,896940,896942-896943,896969,896977,897071,897836,897842,897858,897935,897983,897992-897993,897996,898002,898042,898267,898325,898540,898677,898699,898701,898715,898872,899102,899181,899391,899393-899394,899583,899594,899643,900305,900310,900314,900453,900702,900736,900762-900763,900767,900782,901095,901122,901139,901144,901170,901176,901191,901193,901196,901216,901228,901285,902058,902062,926324,928888,936668,955222,955229,955307,955852,965539,995411-995412,996810,999298-999299,999965,1000947,1001707,1002065-1002066,1002084,1002101-1002102,1002168,1002170,1002589,1002608,1002657,1002729,1003423,1003470,1003542,1003773,1004182,1004184,1004223-1004224,1004652,1005057,1005112,1036117,1036336-1036337,1038201,1039064,1040090,1087304,1089436,1104027,1173196
Modified: jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java?rev=1174642&r1=1174641&r2=1174642&view=diff
==============================================================================
--- jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (original)
+++ jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java Fri Sep 23 10:43:39 2011
@@ -16,29 +16,32 @@
*/
package org.apache.jackrabbit.core.query.lucene;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.lucene.search.Query;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.jackrabbit.core.id.NodeId;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.jackrabbit.core.id.NodeId;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.io.Reader;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.TreeMap;
-import java.util.SortedMap;
-import java.util.Arrays;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <code>AbstractExcerpt</code> implements base functionality for an excerpt
@@ -176,10 +179,32 @@ public abstract class AbstractExcerpt im
/**
* @return the extracted terms from the query.
*/
- protected final Set<Term> getQueryTerms() {
- Set<Term> extractedTerms = new HashSet<Term>();
- Set<Term> relevantTerms = new HashSet<Term>();
- query.extractTerms(extractedTerms);
+ protected final Set<Term[]> getQueryTerms() {
+ Set<Term[]> relevantTerms = new HashSet<Term[]>();
+ getQueryTerms(query, relevantTerms);
+ return relevantTerms;
+ }
+
+ private static void getQueryTerms(Query q, Set<Term[]> relevantTerms) {
+ if (q instanceof BooleanQuery) {
+ final BooleanQuery bq = (BooleanQuery) q;
+ for (BooleanClause clause : bq.getClauses()) {
+ getQueryTerms(clause.getQuery(), relevantTerms);
+ }
+ return;
+ }
+ //need to preserve insertion order
+ Set<Term> extractedTerms = new LinkedHashSet<Term>();
+ q.extractTerms(extractedTerms);
+ Set<Term> filteredTerms = filterRelevantTerms(extractedTerms);
+ if (!filteredTerms.isEmpty()) {
+ relevantTerms.add(filteredTerms.toArray(new Term[] {}));
+ }
+ }
+
+ private static Set<Term> filterRelevantTerms(Set<Term> extractedTerms) {
+ //need to preserve insertion order
+ Set<Term> relevantTerms = new LinkedHashSet<Term>();
// only keep terms for fulltext fields
for (Term t : extractedTerms) {
if (t.field().equals(FieldNames.FULLTEXT)) {
Modified: jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java?rev=1174642&r1=1174641&r2=1174642&view=diff
==============================================================================
--- jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java (original)
+++ jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java Fri Sep 23 10:43:39 2011
@@ -19,17 +19,19 @@ package org.apache.jackrabbit.core.query
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Set;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.jackrabbit.util.Text;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.index.Term;
-import org.apache.jackrabbit.util.Text;
/**
* This is an adapted version of the <code>FulltextHighlighter</code> posted in
@@ -93,7 +95,7 @@ public class DefaultHighlighter {
* highlighted
*/
public static String highlight(TermPositionVector tvec,
- Set<Term> queryTerms,
+ Set<Term[]> queryTerms,
String text,
String excerptStart,
String excerptEnd,
@@ -120,7 +122,7 @@ public class DefaultHighlighter {
* highlighted
*/
public static String highlight(TermPositionVector tvec,
- Set<Term> queryTerms,
+ Set<Term[]> queryTerms,
String text,
int maxFragments,
int surround)
@@ -134,7 +136,7 @@ public class DefaultHighlighter {
* @see #highlight(TermPositionVector, Set, String, String, String, String, String, String, String, int, int)
*/
protected String doHighlight(TermPositionVector tvec,
- Set<Term> queryTerms,
+ Set<Term[]> queryTerms,
String text,
String excerptStart,
String excerptEnd,
@@ -144,21 +146,102 @@ public class DefaultHighlighter {
String hlEnd,
int maxFragments,
int surround) throws IOException {
- String[] terms = new String[queryTerms.size()];
- Iterator<Term> it = queryTerms.iterator();
- for (int i = 0; it.hasNext(); i++) {
- terms[i] = it.next().text();
- }
- List<TermVectorOffsetInfo> list = new ArrayList<TermVectorOffsetInfo>();
- int[] tvecindexes = tvec.indexesOf(terms, 0, terms.length);
- for (int tvecindex : tvecindexes) {
- TermVectorOffsetInfo[] termoffsets = tvec.getOffsets(tvecindex);
- list.addAll(Arrays.asList(termoffsets));
+
+ List<TermVectorOffsetInfo> termOffsetInfo = new ArrayList<TermVectorOffsetInfo>();
+
+ Iterator<Term[]> it = queryTerms.iterator();
+ while (it.hasNext()) {
+ Term[] qt = it.next();
+ final int qtLen = qt.length;
+ if (qt == null || qtLen == 0) {
+ continue;
+ }
+ String[] qtText = new String[qtLen];
+ for (int i = 0; i < qtLen; i++) {
+ qtText[i] = qt[i].text();
+ }
+ int[] tvecindexes = tvec.indexesOf(qtText, 0, qtText.length);
+ Map<Integer, TermVectorOffsetInfo[]> localTermOffsetInfo = new HashMap<Integer, TermVectorOffsetInfo[]>();
+ for (int tvecindex : tvecindexes) {
+ TermVectorOffsetInfo[] termoffsets = tvec.getOffsets(tvecindex);
+ if (termoffsets == null || termoffsets.length == 0) {
+ continue;
+ }
+ localTermOffsetInfo.put(tvecindex, termoffsets);
+ }
+
+ // to keep the order of the keys, use tvecindexes,
+ // if a term is not found tvecindexes[] = -1
+ // when dealing with multiple terms that have to exist, just check
+ // if the first one is there
+ if (tvecindexes.length > 0 && tvecindexes[0] >= 0) {
+ // we have to build one interval TermVectorOffsetInfo for each
+ // hit;
+ List<TermVectorOffsetInfo> intervalTermOffsetInfo = new ArrayList<TermVectorOffsetInfo>();
+
+ // pick all the first key's hist as interval start
+ TermVectorOffsetInfo[] firstKeyTermOffsets = localTermOffsetInfo
+ .get(tvecindexes[0]);
+ Arrays.sort(firstKeyTermOffsets,
+ new TermVectorOffsetInfoSorter());
+ intervalTermOffsetInfo.addAll(Arrays
+ .asList(firstKeyTermOffsets));
+
+ // check if each key is part of an interval, if not, it is
+ // dropped from the list
+ for (int i = 1; i < tvecindexes.length; i++) {
+ final Integer key = tvecindexes[i];
+ TermVectorOffsetInfo[] termoffsets = localTermOffsetInfo
+ .get(key);
+ if (termoffsets == null) {
+ continue;
+ }
+ Arrays.sort(termoffsets, new TermVectorOffsetInfoSorter());
+
+ Iterator<TermVectorOffsetInfo> intervalIterator = intervalTermOffsetInfo
+ .iterator();
+
+ int index = 0;
+ while (intervalIterator.hasNext()) {
+ TermVectorOffsetInfo intervalOI = intervalIterator
+ .next();
+ if (index >= termoffsets.length) {
+ intervalIterator.remove();
+ continue;
+ }
+ boolean matchSearch = true;
+ boolean matchFound = false;
+ while (matchSearch) {
+ TermVectorOffsetInfo localOI = termoffsets[index];
+ // check interval match
+ // CJK languages will have the tokens from the PhraseQuery glued together (see LUCENE-2458)
+ int diff = localOI.getStartOffset()
+ - intervalOI.getEndOffset();
+ // TODO we'll probably have to remove 'diff == 0'
+ // after upgrading to lucene 3.1
+ if (diff == 1 || diff == 0) {
+ intervalOI.setEndOffset(localOI.getEndOffset());
+ matchSearch = false;
+ matchFound = true;
+ }
+ index++;
+ if (index >= termoffsets.length) {
+ matchSearch = false;
+ }
+ }
+ if (!matchFound) {
+ index--;
+ intervalIterator.remove();
+ }
+ }
+ }
+ termOffsetInfo.addAll(intervalTermOffsetInfo);
+ }
}
- TermVectorOffsetInfo[] offsets = list.toArray(new TermVectorOffsetInfo[list.size()]);
+ TermVectorOffsetInfo[] offsets = termOffsetInfo.toArray(new TermVectorOffsetInfo[termOffsetInfo.size()]);
// sort offsets
- if (terms.length > 1) {
+ if (offsets != null && offsets.length > 1) {
Arrays.sort(offsets, new TermVectorOffsetInfoSorter());
}
@@ -248,8 +331,8 @@ public class DefaultHighlighter {
if (skippedChars > surround) {
skippedChars = surround;
}
- sb.append(Text.encodeIllegalXMLCharacters(
- new String(cbuf, 0, surround - skippedChars)));
+ sb.append(escape(new String(cbuf, 0, surround
+ - skippedChars)));
sb.append(fragmentEnd);
}
}
@@ -296,8 +379,8 @@ public class DefaultHighlighter {
if (!sentenceStart) {
sb.append("... ");
}
- sb.append(Text.encodeIllegalXMLCharacters(
- new String(cbuf, skippedChars, cbuf.length - skippedChars)));
+ sb.append(escape(new String(cbuf, skippedChars, cbuf.length
+ - skippedChars)));
// iterate terms
for (Iterator iter = fi.iterator(); iter.hasNext();) {
@@ -307,7 +390,7 @@ public class DefaultHighlighter {
cbuf = new char[nextStart - pos];
int charsRead = reader.read(cbuf, 0, nextStart - pos);
pos += (nextStart - pos);
- sb.append(cbuf, 0, charsRead);
+ sb.append(escape(new String(cbuf, 0, charsRead)));
}
sb.append(hlStart);
nextStart = ti.getEndOffset();
@@ -315,7 +398,7 @@ public class DefaultHighlighter {
cbuf = new char[nextStart - pos];
reader.read(cbuf, 0, nextStart - pos);
pos += (nextStart - pos);
- sb.append(cbuf);
+ sb.append(escape(new String(cbuf)));
sb.append(hlEnd);
}
}
@@ -341,8 +424,8 @@ public class DefaultHighlighter {
} else {
skippedChars = 0;
}
- sb.append(Text.encodeIllegalXMLCharacters(
- new String(cbuf, 0, EOF ? skip : (surround - skippedChars))));
+ sb.append(escape(new String(cbuf, 0, EOF ? skip
+ : (surround - skippedChars))));
if (!EOF) {
char lastChar = sb.charAt(sb.length() - 1);
if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
@@ -362,7 +445,7 @@ public class DefaultHighlighter {
* @param text the text.
* @param excerptStart the excerpt start.
* @param excerptEnd the excerpt end.
- * @param fragmentStart the fragement start.
+ * @param fragmentStart the fragment start.
* @param fragmentEnd the fragment end.
* @param maxLength the maximum length of the fragment.
* @return a default excerpt.
@@ -391,10 +474,24 @@ public class DefaultHighlighter {
}
}
}
- excerpt.append(Text.encodeIllegalXMLCharacters(tmp.toString()));
+ excerpt.append(escape(tmp.toString()));
excerpt.append(fragmentEnd).append(excerptEnd);
return excerpt.toString();
}
+
+
+ /**
+ * Escapes input text suitable for the output format.
+ * <p>
+ * By default does XML-escaping. Can be overridden for
+ * other formats.
+ *
+ * @param input raw text.
+ * @return text suitably escaped.
+ */
+ protected String escape(String input) {
+ return Text.encodeIllegalXMLCharacters(input);
+ }
private static class FragmentInfo {
List<TermVectorOffsetInfo> offsetInfosList;
Modified: jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java?rev=1174642&r1=1174641&r2=1174642&view=diff
==============================================================================
--- jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java (original)
+++ jackrabbit/branches/2.0/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java Fri Sep 23 10:43:39 2011
@@ -77,7 +77,7 @@ public class WeightedHighlighter extends
* highlighted
*/
public static String highlight(TermPositionVector tvec,
- Set<Term> queryTerms,
+ Set<Term[]> queryTerms,
String text,
String excerptStart,
String excerptEnd,
@@ -103,7 +103,7 @@ public class WeightedHighlighter extends
* highlighted
*/
public static String highlight(TermPositionVector tvec,
- Set<Term> queryTerms,
+ Set<Term[]> queryTerms,
String text,
int maxFragments,
int surround) throws IOException {
@@ -194,14 +194,17 @@ public class WeightedHighlighter extends
TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next();
if (lastOffsetInfo != null) {
// fill in text between terms
- sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()));
+ sb.append(escape(text.substring(
+ lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
}
sb.append(hlStart);
- sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset()));
+ sb.append(escape(text.substring(oi.getStartOffset(),
+ oi.getEndOffset())));
sb.append(hlEnd);
lastOffsetInfo = oi;
}
- limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
+ limit = Math.min(text.length(), fi.getStartOffset() - len
+ + (surround * 2));
endFragment(sb, text, fi.getEndOffset(), limit);
sb.append(fragmentEnd);
}
@@ -222,10 +225,10 @@ public class WeightedHighlighter extends
* @return the length of the start fragment that was appended to
* <code>sb</code>.
*/
- private static int startFragment(StringBuffer sb, String text, int offset, int limit) {
+ private int startFragment(StringBuffer sb, String text, int offset, int limit) {
if (limit == 0) {
// append all
- sb.append(text.substring(0, offset));
+ sb.append(escape(text.substring(0, offset)));
return offset;
}
String intro = "... ";
@@ -241,7 +244,7 @@ public class WeightedHighlighter extends
}
}
}
- sb.append(intro).append(text.substring(start, offset));
+ sb.append(intro).append(escape(text.substring(start, offset)));
return offset - start;
}
@@ -255,10 +258,10 @@ public class WeightedHighlighter extends
* @param offset the end offset of the last matching term in the fragment.
* @param limit do not go further than <code>limit</code>.
*/
- private static void endFragment(StringBuffer sb, String text, int offset, int limit) {
+ private void endFragment(StringBuffer sb, String text, int offset, int limit) {
if (limit == text.length()) {
// append all
- sb.append(text.substring(offset));
+ sb.append(escape(text.substring(offset)));
return;
}
int end = offset;
@@ -268,7 +271,7 @@ public class WeightedHighlighter extends
end = i;
}
}
- sb.append(text.substring(offset, end)).append(" ...");
+ sb.append(escape(text.substring(offset, end))).append(" ...");
}
private static class FragmentInfo {
Modified: jackrabbit/branches/2.0/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.0/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java?rev=1174642&r1=1174641&r2=1174642&view=diff
==============================================================================
--- jackrabbit/branches/2.0/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java (original)
+++ jackrabbit/branches/2.0/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java Fri Sep 23 10:43:39 2011
@@ -16,9 +16,9 @@
*/
package org.apache.jackrabbit.core.query;
+import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Value;
-import javax.jcr.Node;
import javax.jcr.query.QueryResult;
import javax.jcr.query.Row;
import javax.jcr.query.RowIterator;
@@ -104,6 +104,10 @@ public class ExcerptTest extends Abstrac
"apache jackrabbit");
}
+ /**
+ * Verifies character encoding on a node property that does not contain any
+ * excerpt info
+ */
public void testEncodeIllegalCharsNoHighlights() throws RepositoryException {
String text = "bla <strong>bla</strong> bla";
String excerpt = createExcerpt("bla <strong>bla</strong> bla");
@@ -116,9 +120,126 @@ public class ExcerptTest extends Abstrac
QueryResult result = executeQuery(stmt);
RowIterator rows = result.getRows();
assertEquals(1, rows.getSize());
- assertEquals(excerpt, rows.nextRow().getValue("rep:excerpt(text)").getString());
+ String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+ assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
}
+ /**
+ * Verifies character encoding on a node property that contains excerpt info
+ */
+ public void testEncodeIllegalCharsHighlights() throws RepositoryException {
+ checkExcerpt("bla <strong>bla</strong> foo",
+ "bla <strong>bla</strong> <strong>foo</strong>",
+ "foo");
+ }
+
+ /**
+ * test for https://issues.apache.org/jira/browse/JCR-3077
+ *
+ * when given a quoted phrase, the excerpt should evaluate it whole as a
+ * token (not break is down)
+ *
+ */
+ public void testQuotedPhrase() throws RepositoryException {
+ checkExcerpt("one two three four",
+ "one <strong>two three</strong> four", "\"two three\"");
+ }
+
+ /**
+ * Verifies excerpt generation on a node property that does not contain any
+ * excerpt info for a quoted phrase
+ */
+ public void testQuotedPhraseNoMatch() throws RepositoryException {
+ String text = "one two three four";
+ String excerpt = createExcerpt("one two three four");
+ String terms = "\"five six\"";
+
+ Node n = testRootNode.addNode(nodeName1);
+ n.setProperty("text", text);
+ n.setProperty("other", terms);
+ superuser.save();
+
+ String stmt = getStatement(terms);
+ QueryResult result = executeQuery(stmt);
+ RowIterator rows = result.getRows();
+ assertEquals(1, rows.getSize());
+ String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+ assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+ }
+
+ /**
+ *
+ * Verifies excerpt generation on a node property that contains the exact
+ * quoted phrase but with scrambled words.
+ *
+ * More clearly it actually checks that the order of tokens is respected for
+ * a quoted phrase.
+ */
+ public void testQuotedPhraseNoMatchScrambled() throws RepositoryException {
+ String text = "one two three four";
+ String excerpt = createExcerpt("one two three four");
+ String terms = "\"three two\"";
+
+ Node n = testRootNode.addNode(nodeName1);
+ n.setProperty("text", text);
+ n.setProperty("other", terms);
+ superuser.save();
+
+ String stmt = getStatement(terms);
+ QueryResult result = executeQuery(stmt);
+ RowIterator rows = result.getRows();
+ assertEquals(1, rows.getSize());
+ String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+ assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+ }
+
+ /**
+ * Verifies excerpt generation on a node property that does not contain the
+ * exact quoted phrase, but contains fragments of it.
+ *
+ */
+ public void testQuotedPhraseNoMatchGap() throws RepositoryException {
+ String text = "one two three four";
+ String excerpt = createExcerpt("one two three four");
+ String terms = "\"two four\"";
+
+ Node n = testRootNode.addNode(nodeName1);
+ n.setProperty("text", text);
+ n.setProperty("other", terms);
+ superuser.save();
+
+ String stmt = getStatement(terms);
+ QueryResult result = executeQuery(stmt);
+ RowIterator rows = result.getRows();
+ assertEquals(1, rows.getSize());
+ String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+ assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+ }
+
+ /**
+ * test for https://issues.apache.org/jira/browse/JCR-3077
+ *
+ * JA search acts as a PhraseQuery, thanks to LUCENE-2458. so it should be
+ * covered by the QuotedTest search.
+ *
+ */
+ public void testHighlightJa() throws RepositoryException {
+
+ // http://translate.google.com/#auto|en|%E3%82%B3%E3%83%B3%E3%83%86%E3%83%B3%E3%83%88
+ String jContent = "\u30b3\u30fe\u30c6\u30f3\u30c8";
+ // http://translate.google.com/#auto|en|%E3%83%86%E3%82%B9%E3%83%88
+ String jTest = "\u30c6\u30b9\u30c8";
+
+ String content = "some text with japanese: " + jContent + " (content)"
+ + " and " + jTest + " (test).";
+
+ // expected excerpt; note this may change if excerpt providers change
+ String expectedExcerpt = "some text with japanese: " + jContent
+ + " (content) and <strong>" + jTest + "</strong> (test).";
+ checkExcerpt(content, expectedExcerpt, jTest);
+ }
+
+
private void checkExcerpt(String text, String fragmentText, String terms)
throws RepositoryException {
String excerpt = createExcerpt(fragmentText);
@@ -137,7 +258,7 @@ public class ExcerptTest extends Abstrac
private void createTestData(String text) throws RepositoryException {
Node n = testRootNode.addNode(nodeName1);
n.setProperty("text", text);
- testRootNode.save();
+ superuser.save();
}
private String getExcerpt(Row row) throws RepositoryException {
Propchange: jackrabbit/branches/2.0/jackrabbit-jcr-servlet/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Sep 23 10:43:39 2011
@@ -1 +1 @@
-/jackrabbit/branches/2.1/jackrabbit-jcr-servlet:1089463,1127002
+/jackrabbit/branches/2.1/jackrabbit-jcr-servlet:1089463,1127002,1174630