You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2007/05/11 12:08:38 UTC

svn commit: r537155 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene: AbstractExcerpt.java DefaultHTMLExcerpt.java DefaultHighlighter.java DefaultXMLExcerpt.java ExcerptProvider.java SearchIndex.java

Author: mreutegg
Date: Fri May 11 03:08:36 2007
New Revision: 537155

URL: http://svn.apache.org/viewvc?view=rev&rev=537155
Log:
JCR-907: Create HTML excerpt provider

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java   (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java?view=auto&rev=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java Fri May 11 03:08:36 2007
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.jackrabbit.core.NodeId;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Iterator;
+
+/**
+ * <code>AbstractExcerpt</code> implements base functionality for an excerpt
+ * provider.
+ */
+public abstract class AbstractExcerpt implements ExcerptProvider {
+
+    /**
+     * Logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(DefaultXMLExcerpt.class);
+
+    /**
+     * The search index.
+     */
+    protected SearchIndex index;
+
+    /**
+     * The current query.
+     */
+    protected Query query;
+
+    /**
+     * Indicates whether the query is already rewritten.
+     */
+    private boolean rewritten = false;
+
+    /**
+     * {@inheritDoc}
+     */
+    public void init(Query query, SearchIndex index) throws IOException {
+        this.index = index;
+        this.query = query;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize)
+            throws IOException {
+        IndexReader reader = index.getIndexReader();
+        try {
+            if (!rewritten) {
+                query = query.rewrite(reader);
+                rewritten = true;
+            }
+            Term idTerm = new Term(FieldNames.UUID, id.getUUID().toString());
+            TermDocs tDocs = reader.termDocs(idTerm);
+            int docNumber;
+            Document doc;
+            try {
+                if (tDocs.next()) {
+                    docNumber = tDocs.doc();
+                    doc = reader.document(docNumber);
+                } else {
+                    // node not found in index
+                    return null;
+                }
+            } finally {
+                tDocs.close();
+            }
+            Field[] fields = doc.getFields(FieldNames.FULLTEXT);
+            if (fields == null) {
+                log.debug("Fulltext field not stored, using {}",
+                        SimpleExcerptProvider.class.getName());
+                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
+                exProvider.init(query, index);
+                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
+            }
+            StringBuffer text = new StringBuffer();
+            String separator = "";
+            for (int i = 0; i < fields.length; i++) {
+                text.append(separator);
+                text.append(fields[i].stringValue());
+                // this is a hack! in general multiple fields with the same
+                // name are handled properly, that is, offset and position is
+                // calculated correctly. there is one case however where
+                // the offset gets wrong:
+                // if a term text ends with characters that are considered noise
+                // then the offset of the next field will be off by the number
+                // of noise characters.
+                // therefore we delete noise characters at the end of the text.
+                // this process is required for all but the last field
+                if (i < fields.length - 1) {
+                    for (int j = text.length() - 1; j >= 0; j--) {
+                        if (Character.isLetterOrDigit(text.charAt(j))) {
+                            break;
+                        } else {
+                            text.deleteCharAt(j);
+                        }
+                    }
+                }
+                separator = " ";
+            }
+            TermFreqVector tfv = reader.getTermFreqVector(
+                    docNumber, FieldNames.FULLTEXT);
+            if (tfv instanceof TermPositionVector) {
+                return createExcerpt((TermPositionVector) tfv, text.toString(),
+                        maxFragments, maxFragmentSize);
+            } else {
+                log.debug("No TermPositionVector on Fulltext field, using {}",
+                        SimpleExcerptProvider.class.getName());
+                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
+                exProvider.init(query, index);
+                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
+            }
+        } finally {
+            reader.close();
+        }
+    }
+
+    /**
+     * Creates an excerpt for the given <code>text</code> using token offset
+     * information provided by <code>tpv</code>.
+     *
+     * @param tpv             the term position vector for the fulltext field.
+     * @param text            the original text.
+     * @param maxFragments    the maximum number of fragments to create.
+     * @param maxFragmentSize the maximum number of characters in a fragment.
+     * @return the xml excerpt.
+     * @throws IOException if an error occurs while creating the excerpt.
+     */
+    protected abstract String createExcerpt(TermPositionVector tpv,
+                                            String text,
+                                            int maxFragments,
+                                            int maxFragmentSize)
+            throws IOException;
+
+    /**
+     * @return the extracted terms from the query.
+     */
+    protected final Set getQueryTerms() {
+        Set extractedTerms = new HashSet();
+        Set relevantTerms = new HashSet();
+        query.extractTerms(extractedTerms);
+        // only keep terms for fulltext fields
+        for (Iterator it = extractedTerms.iterator(); it.hasNext(); ) {
+            Term t = (Term) it.next();
+            if (t.field().equals(FieldNames.FULLTEXT)) {
+                relevantTerms.add(t);
+            } else {
+                int idx = t.field().indexOf(FieldNames.FULLTEXT_PREFIX);
+                if (idx != -1) {
+                    relevantTerms.add(new Term(FieldNames.FULLTEXT, t.text()));
+                }
+            }
+        }
+        return relevantTerms;
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java?view=auto&rev=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java Fri May 11 03:08:36 2007
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.lucene.index.TermPositionVector;
+
+import java.io.IOException;
+
+/**
+ * <code>DefaultHTMLExcerpt</code> creates a HTML excerpt with the following
+ * format:
+ * <pre>
+ * &lt;div>
+ *     &lt;span>&lt;strong>Jackrabbit&lt;/strong> implements both the mandatory XPath and optional SQL &lt;strong>query&lt;/strong> syntax.&lt;/span>
+ *     &lt;span>Before parsing the XPath &lt;strong>query&lt;/strong> in &lt;strong>Jackrabbit&lt;/strong>, the statement is surrounded&lt;/span>
+ * &lt;/div>
+ * </pre>
+ */
+public class DefaultHTMLExcerpt extends AbstractExcerpt {
+
+    /**
+     * {@inheritDoc}
+     */
+    protected String createExcerpt(TermPositionVector tpv,
+                                   String text,
+                                   int maxFragments,
+                                   int maxFragmentSize) throws IOException {
+        return DefaultHighlighter.highlight(tpv, getQueryTerms(), text,
+                "<div>", "</div>", "<span>", "</span>", "<strong>", "</strong>",
+                maxFragments, maxFragmentSize / 2);
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHTMLExcerpt.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java?view=diff&rev=537155&r1=537154&r2=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java Fri May 11 03:08:36 2007
@@ -62,8 +62,6 @@
 
     public static final String END_FRAGMENT_SEPARATOR = "</fragment>";
 
-    public static final String EMPTY_EXCERPT = "<excerpt/>";
-
     private DefaultHighlighter() {
     }
 
@@ -89,24 +87,34 @@
     }
 
     /**
-     * @param tvec         the term position vector for this hit
-     * @param queryTerms   the query terms.
-     * @param text         the original text that was used to create the tokens.
-     * @param prepend      the string used to prepend a highlighted token, for
-     *                     example <tt>&quot;&lt;b&gt;&quot;</tt>
-     * @param append       the string used to append a highlighted token, for
-     *                     example <tt>&quot;&lt;/b&gt;&quot;</tt>
-     * @param maxFragments the maximum number of fragments
-     * @param surround     the maximum number of chars surrounding a highlighted
-     *                     token
+     * @param tvec          the term position vector for this hit
+     * @param queryTerms    the query terms.
+     * @param text          the original text that was used to create the
+     *                      tokens.
+     * @param excerptStart  this string is prepended to the excerpt
+     * @param excerptEnd    this string is appended to the excerpt
+     * @param fragmentStart this string is prepended to every fragment
+     * @param fragmentEnd   this string is appended to the end of every
+     *                      fragement.
+     * @param hlStart       the string used to prepend a highlighted token, for
+     *                      example <tt>&quot;&lt;b&gt;&quot;</tt>
+     * @param hlEnd         the string used to append a highlighted token, for
+     *                      example <tt>&quot;&lt;/b&gt;&quot;</tt>
+     * @param maxFragments  the maximum number of fragments
+     * @param surround      the maximum number of chars surrounding a
+     *                      highlighted token
      * @return a String with text fragments where tokens from the query are
      *         highlighted
      */
     public static String highlight(TermPositionVector tvec,
                                    Set queryTerms,
                                    String text,
-                                   String prepend,
-                                   String append,
+                                   String excerptStart,
+                                   String excerptEnd,
+                                   String fragmentStart,
+                                   String fragmentEnd,
+                                   String hlStart,
+                                   String hlEnd,
                                    int maxFragments,
                                    int surround)
             throws IOException {
@@ -130,20 +138,52 @@
             java.util.Arrays.sort(offsets, new TermVectorOffsetInfoSorter());
         }
 
-        return mergeFragments(offsets, new StringReader(text), prepend,
+        return mergeFragments(offsets, new StringReader(text), excerptStart,
+                excerptEnd, fragmentStart, fragmentEnd, hlStart, hlEnd,
+                maxFragments, surround);
+    }
+
+    /**
+     * @param tvec         the term position vector for this hit
+     * @param queryTerms   the query terms.
+     * @param text         the original text that was used to create the tokens.
+     * @param prepend      the string used to prepend a highlighted token, for
+     *                     example <tt>&quot;&lt;b&gt;&quot;</tt>
+     * @param append       the string used to append a highlighted token, for
+     *                     example <tt>&quot;&lt;/b&gt;&quot;</tt>
+     * @param maxFragments the maximum number of fragments
+     * @param surround     the maximum number of chars surrounding a highlighted
+     *                     token
+     * @return a String with text fragments where tokens from the query are
+     *         highlighted
+     */
+    public static String highlight(TermPositionVector tvec,
+                                   Set queryTerms,
+                                   String text,
+                                   String prepend,
+                                   String append,
+                                   int maxFragments,
+                                   int surround)
+            throws IOException {
+        return highlight(tvec, queryTerms, text, START_EXCERPT, END_EXCERPT,
+                START_FRAGMENT_SEPARATOR, END_FRAGMENT_SEPARATOR, prepend,
                 append, maxFragments, surround);
     }
 
     private static String mergeFragments(TermVectorOffsetInfo[] offsets,
                                          StringReader reader,
-                                         String prefix,
-                                         String suffix,
+                                         String excerptStart,
+                                         String excerptEnd,
+                                         String fragmentStart,
+                                         String fragmentEnd,
+                                         String hlStart,
+                                         String hlEnd,
                                          int maxFragments,
                                          int surround)
             throws IOException {
         if (offsets == null || offsets.length == 0) {
             // nothing to highlight
-            return EMPTY_EXCERPT;
+            return excerptStart + excerptEnd;
         }
         int lastOffset = offsets.length; // Math.min(10, offsets.length); // 10 terms is plenty?
         ArrayList fragmentInfoList = new ArrayList();
@@ -170,7 +210,7 @@
         java.util.Collections.sort(bestFragmentsList, new FragmentInfoPositionSorter());
 
         // merge #maxFragments fragments
-        StringBuffer sb = new StringBuffer(START_EXCERPT);
+        StringBuffer sb = new StringBuffer(excerptStart);
         int pos = 0;
         char[] cbuf;
         int skip;
@@ -201,7 +241,7 @@
                     }
                     sb.append(Text.encodeIllegalXMLCharacters(
                             new String(cbuf, 0, surround - skippedChars)));
-                    sb.append(END_FRAGMENT_SEPARATOR);
+                    sb.append(fragmentEnd);
                 }
             }
 
@@ -219,7 +259,7 @@
             firstWhitespace = skippedChars;
             reader.read(cbuf, 0, nextStart - pos);
             pos += (nextStart - pos);
-            sb.append(START_FRAGMENT_SEPARATOR);
+            sb.append(fragmentStart);
             // find last period followed by whitespace
             if (cbuf.length > 0) {
                 for (; skippedChars >= 0; skippedChars--) {
@@ -260,14 +300,14 @@
                     pos += (nextStart - pos);
                     sb.append(cbuf, 0, charsRead);
                 }
-                sb.append(prefix);
+                sb.append(hlStart);
                 nextStart = ti.getEndOffset();
                 // print term
                 cbuf = new char[nextStart - pos];
                 reader.read(cbuf, 0, nextStart - pos);
                 pos += (nextStart - pos);
                 sb.append(cbuf);
-                sb.append(suffix);
+                sb.append(hlEnd);
             }
         }
         if (pos != 0) {
@@ -298,10 +338,10 @@
                 if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
                     sb.append(" ...");
                 }
-                sb.append(END_FRAGMENT_SEPARATOR);
+                sb.append(fragmentEnd);
             }
         }
-        sb.append(END_EXCERPT);
+        sb.append(excerptEnd);
         return sb.toString();
     }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java?view=diff&rev=537155&r1=537154&r2=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java Fri May 11 03:08:36 2007
@@ -16,166 +16,33 @@
  */
 package org.apache.jackrabbit.core.query.lucene;
 
-import org.apache.jackrabbit.core.NodeId;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Iterator;
 
 /**
- * <code>DefaultXMLExcerpt</code> implements an ExcerptProvider.
+ * <code>DefaultXMLExcerpt</code> creates an XML excerpt of a matching node.
+ * <br/>
+ * E.g. if you search for 'jackrabbit' and 'query' you may get the following
+ * result for a node:
+ * <pre>
+ * &lt;excerpt>
+ *     &lt;fragment>&lt;highlight>Jackrabbit&lt;/highlight> implements both the mandatory XPath and optional SQL &lt;highlight>query&lt;/highlight> syntax.&lt;/fragment>
+ *     &lt;fragment>Before parsing the XPath &lt;highlight>query&lt;/highlight> in &lt;highlight>Jackrabbit&lt;/highlight>, the statement is surrounded&lt;/fragment>
+ * &lt;/excerpt>
+ * </pre>
  */
-class DefaultXMLExcerpt implements ExcerptProvider {
-
-    /**
-     * Logger instance for this class.
-     */
-    private static final Logger log = LoggerFactory.getLogger(DefaultXMLExcerpt.class);
-
-    /**
-     * The search index.
-     */
-    private SearchIndex index;
-
-    /**
-     * The current query.
-     */
-    private Query query;
-
-    /**
-     * Indicates whether the query is already rewritten.
-     */
-    private boolean rewritten = false;
-
-    /**
-     * {@inheritDoc}
-     */
-    public void init(Query query, SearchIndex index) throws IOException {
-        this.index = index;
-        this.query = query;
-    }
+class DefaultXMLExcerpt extends AbstractExcerpt {
 
     /**
      * {@inheritDoc}
      */
-    public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize)
-            throws IOException {
-        IndexReader reader = index.getIndexReader();
-        try {
-            if (!rewritten) {
-                query = query.rewrite(reader);
-                rewritten = true;
-            }
-            Term idTerm = new Term(FieldNames.UUID, id.getUUID().toString());
-            TermDocs tDocs = reader.termDocs(idTerm);
-            int docNumber;
-            Document doc;
-            try {
-                if (tDocs.next()) {
-                    docNumber = tDocs.doc();
-                    doc = reader.document(docNumber);
-                } else {
-                    // node not found in index
-                    return null;
-                }
-            } finally {
-                tDocs.close();
-            }
-            Field[] fields = doc.getFields(FieldNames.FULLTEXT);
-            if (fields == null) {
-                log.debug("Fulltext field not stored, using {}",
-                        SimpleExcerptProvider.class.getName());
-                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
-                exProvider.init(query, index);
-                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
-            }
-            StringBuffer text = new StringBuffer();
-            String separator = "";
-            for (int i = 0; i < fields.length; i++) {
-                text.append(separator);
-                text.append(fields[i].stringValue());
-                // this is a hack! in general multiple fields with the same
-                // name are handled properly, that is, offset and position is
-                // calculated correctly. there is one case however where
-                // the offset gets wrong:
-                // if a term text ends with characters that are considered noise
-                // then the offset of the next field will be off by the number
-                // of noise characters.
-                // therefore we delete noise characters at the end of the text.
-                // this process is required for all but the last field
-                if (i < fields.length - 1) {
-                    for (int j = text.length() - 1; j >= 0; j--) {
-                        if (Character.isLetterOrDigit(text.charAt(j))) {
-                            break;
-                        } else {
-                            text.deleteCharAt(j);
-                        }
-                    }
-                }
-                separator = " ";
-            }
-            TermFreqVector tfv = reader.getTermFreqVector(
-                    docNumber, FieldNames.FULLTEXT);
-            if (tfv instanceof TermPositionVector) {
-                return createExcerpt((TermPositionVector) tfv, text.toString(),
-                        maxFragments, maxFragmentSize);
-            } else {
-                log.debug("No TermPositionVector on Fulltext field, using {}",
-                        SimpleExcerptProvider.class.getName());
-                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
-                exProvider.init(query, index);
-                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
-            }
-        } finally {
-            reader.close();
-        }
-    }
-
-    /**
-     * Creates an excerpt for the given <code>text</code> using token offset
-     * information provided by <code>tpv</code>.
-     *
-     * @param tpv             the term position vector for the fulltext field.
-     * @param text            the original text.
-     * @param maxFragments    the maximum number of fragments to create.
-     * @param maxFragmentSize the maximum number of characters in a fragment.
-     * @return the xml excerpt.
-     * @throws IOException if an error occurs while creating the excerpt.
-     */
-    private String createExcerpt(TermPositionVector tpv,
+    protected String createExcerpt(TermPositionVector tpv,
                                  String text,
                                  int maxFragments,
                                  int maxFragmentSize)
             throws IOException {
-
-        Set extractedTerms = new HashSet();
-        Set relevantTerms = new HashSet();
-        query.extractTerms(extractedTerms);
-        // only keep terms for fulltext fields
-        for (Iterator it = extractedTerms.iterator(); it.hasNext(); ) {
-            Term t = (Term) it.next();
-            if (t.field().equals(FieldNames.FULLTEXT)) {
-                relevantTerms.add(t);
-            } else {
-                int idx = t.field().indexOf(FieldNames.FULLTEXT_PREFIX);
-                if (idx != -1) {
-                    relevantTerms.add(new Term(FieldNames.FULLTEXT, t.text()));
-                }
-            }
-        }
-
-        return DefaultHighlighter.highlight(tpv, relevantTerms, text,
+        return DefaultHighlighter.highlight(tpv, getQueryTerms(), text,
                 "<highlight>", "</highlight>", maxFragments, maxFragmentSize / 2);
     }
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java?view=diff&rev=537155&r1=537154&r2=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java Fri May 11 03:08:36 2007
@@ -23,16 +23,8 @@
 import java.io.IOException;
 
 /**
- * <code>ExcerptProvider</code> defines an interface to get an XML excerpt
- * of a matching node.<br/>
- * E.g. if you search for 'jackrabbit' and 'query' you may get the following
- * result for a node:
- * <pre>
- * &lt;excerpt>
- *     &lt;fragment>&lt;highlight>Jackrabbit&lt;/highlight> implements both the mandatory XPath and optional SQL &lt;highlight>query&lt;/highlight> syntax.&lt;/fragment>
- *     &lt;fragment>Before parsing the XPath &lt;highlight>query&lt;/highlight> in &lt;highlight>Jackrabbit&lt;/highlight>, the statement is surrounded&lt;/fragment>
- * &lt;/excerpt>
- * </pre>
+ * <code>ExcerptProvider</code> defines an interface to create an excerpt for
+ * a matching node. The format of the excerpt is implementation specific. 
  */
 public interface ExcerptProvider {
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=537155&r1=537154&r2=537155
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Fri May 11 03:08:36 2007
@@ -251,7 +251,7 @@
     /**
      * The excerpt provider class. Implements {@link ExcerptProvider}.
      */
-    private Class excerptProviderClass = DefaultXMLExcerpt.class;
+    private Class excerptProviderClass = DefaultHTMLExcerpt.class;
 
     /**
      * The path to the indexing configuration file.