You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2011/12/17 22:55:21 UTC

[lucy-commits] svn commit: r1220281 - in /incubator/lucy/trunk: core/Lucy/Search/ core/Lucy/Test/Highlight/ perl/ perl/lib/Lucy/ perl/t/ perl/t/binding/ perl/t/core/

Author: nwellnhof
Date: Sat Dec 17 21:55:20 2011
New Revision: 1220281

URL: http://svn.apache.org/viewvc?rev=1220281&view=rev
Log:
LUCY-200 Port most of the highlighter tests to C

Split t/303-highlighter.t into core and binding tests

Added:
    incubator/lucy/trunk/core/Lucy/Test/Highlight/
    incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c
    incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh
    incubator/lucy/trunk/perl/t/binding/303-highlighter.t
    incubator/lucy/trunk/perl/t/core/303-highlighter.t
Removed:
    incubator/lucy/trunk/perl/t/303-highlighter.t
Modified:
    incubator/lucy/trunk/core/Lucy/Search/Span.c
    incubator/lucy/trunk/core/Lucy/Search/Span.cfh
    incubator/lucy/trunk/perl/MANIFEST
    incubator/lucy/trunk/perl/lib/Lucy/Test.pm

Modified: incubator/lucy/trunk/core/Lucy/Search/Span.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Search/Span.c?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Search/Span.c (original)
+++ incubator/lucy/trunk/core/Lucy/Search/Span.c Sat Dec 17 21:55:20 2011
@@ -64,6 +64,17 @@ Span_set_weight(Span *self, float weight
     self->weight = weight;
 }
 
+bool_t
+Span_equals(Span *self, Obj *other) {
+    Span *twin = (Span*)other;
+    if (self == twin)                 { return true; }
+    if (!Obj_Is_A(other, SPAN))       { return false; }
+    if (self->offset != twin->offset) { return false; }
+    if (self->length != twin->length) { return false; }
+    if (self->weight != twin->weight) { return false; }
+    return true;
+}
+
 int32_t
 Span_compare_to(Span *self, Obj *other) {
     Span *competitor = (Span*)CERTIFY(other, SPAN);

Modified: incubator/lucy/trunk/core/Lucy/Search/Span.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Search/Span.cfh?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Search/Span.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Search/Span.cfh Sat Dec 17 21:55:20 2011
@@ -74,6 +74,9 @@ class Lucy::Search::Span inherits Lucy::
     public void
     Set_Weight(Span *self, float weight);
 
+    public bool_t
+    Equals(Span *self, Obj *other);
+
     public int32_t
     Compare_To(Span *self, Obj *other);
 }

Added: incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c (added)
+++ incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c Sat Dec 17 21:55:20 2011
@@ -0,0 +1,492 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_LUCY_TESTHIGHLIGHTER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Highlight/TestHighlighter.h"
+#include "Lucy/Highlight/Highlighter.h"
+
+#include "Lucy/Analysis/RegexTokenizer.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Highlight/HeatMap.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Plan/FullTextType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Search/Hits.h"
+#include "Lucy/Search/IndexSearcher.h"
+#include "Lucy/Search/Span.h"
+#include "Lucy/Search/TermQuery.h"
+#include "Lucy/Store/RAMFolder.h"
+
+#define PHI      "\xCE\xA6"
+#define ELLIPSIS "\xE2\x80\xA6"
+
+#define TEST_STRING \
+    "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+    "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+    "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+    "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+    PHI " a b c d x y z h i j k " \
+    "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+    "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+    "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+    "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+    "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 "
+#define TEST_STRING_LEN 425
+
+static void
+test_Find_Best_Fragment(TestBatch *batch, Searcher *searcher, Obj *query) {
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Highlighter *highlighter = Highlighter_new(searcher, query, content, 3);
+    ViewCharBuf *target = (ViewCharBuf*)ZCB_BLANK();
+
+    VArray *spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(2, 1, 1.0f));
+    HeatMap *heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    CharBuf *field_val = (CharBuf *)ZCB_WRAP_STR("a " PHI " " PHI " b c", 11);
+    int32_t top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+                                                 target, heat_map);
+    TEST_TRUE(batch,
+              CB_Equals_Str((CharBuf *)target, PHI " " PHI " b", 7),
+              "Find_Best_Fragment");
+    TEST_TRUE(batch,
+              top == 2,
+              "correct offset returned by Find_Best_Fragment");
+    field_val = (CharBuf *)ZCB_WRAP_STR("aa" PHI, 4);
+    top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+                                         target, heat_map);
+    TEST_TRUE(batch,
+              CB_Equals_Str((CharBuf *)target, "aa" PHI, 4),
+              "Find_Best_Fragment returns whole field when field is short");
+    TEST_TRUE(batch,
+              top == 0,
+              "correct offset");
+    DECREF(heat_map);
+
+    spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(6, 2, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    field_val = (CharBuf *)ZCB_WRAP_STR("aaaab" PHI PHI, 9);
+    top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+                                         target, heat_map);
+    TEST_TRUE(batch,
+              CB_Equals_Str((CharBuf *)target, "b" PHI PHI, 5),
+              "Find_Best_Fragment shifts left to deal with overrun");
+    TEST_TRUE(batch,
+              top == 4,
+              "correct offset");
+    DECREF(heat_map);
+
+    spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(0, 1, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    field_val = (CharBuf *)ZCB_WRAP_STR("a" PHI "bcde", 7);
+    top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+                                         target, heat_map);
+    TEST_TRUE(batch,
+              CB_Equals_Str((CharBuf *)target, "a" PHI "bcd", 6),
+              "Find_Best_Fragment start at field beginning");
+    TEST_TRUE(batch,
+              top == 0,
+              "correct offset");
+    DECREF(heat_map);
+
+    DECREF(highlighter);
+}
+
+static void
+test_Raw_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query) {
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Highlighter *highlighter = Highlighter_new(searcher, query, content, 6);
+
+    CharBuf *field_val   = (CharBuf *)ZCB_WRAP_STR("Ook.  Urk.  Ick.  ", 18);
+    CharBuf *fragment    = (CharBuf *)ZCB_WRAP_STR("Ook.  Urk.", 10);
+    CharBuf *raw_excerpt = CB_new(0);
+    VArray *spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(0, 18, 1.0f));
+    HeatMap *heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    VArray *sentences = VA_new(2);
+    VA_Push(sentences, (Obj*)Span_new(0, 4, 0.0f));
+    VA_Push(sentences, (Obj*)Span_new(6, 4, 0.0f));
+    int32_t top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+                                          raw_excerpt, 0, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, "Ook.", 4),
+              "Raw_Excerpt at top");
+    TEST_TRUE(batch,
+              top == 0,
+              "top still 0");
+    DECREF(sentences);
+    DECREF(raw_excerpt);
+
+    fragment    = (CharBuf *)ZCB_WRAP_STR(".  Urk.  I", 10);
+    raw_excerpt = CB_new(0);
+    sentences   = VA_new(2);
+    VA_Push(sentences, (Obj*)Span_new(6, 4, 0.0f));
+    VA_Push(sentences, (Obj*)Span_new(12, 4, 0.0f));
+    top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+                                  raw_excerpt, 3, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, "Urk.", 4),
+              "Raw_Excerpt in middle, with 2 bounds");
+    TEST_TRUE(batch,
+              top == 6,
+              "top in the middle modified by Raw_Excerpt");
+    DECREF(sentences);
+    DECREF(heat_map);
+    DECREF(raw_excerpt);
+
+    field_val   = (CharBuf *)ZCB_WRAP_STR("Ook urk ick i.", 14);
+    fragment    = (CharBuf *)ZCB_WRAP_STR("ick i.", 6);
+    raw_excerpt = CB_new(0);
+    spans       = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(0, 14, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    sentences = VA_new(1);
+    VA_Push(sentences, (Obj*)Span_new(0, 14, 0.0f));
+    top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+                                  raw_excerpt, 8, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, ELLIPSIS " i.", 6),
+              "Ellipsis at top");
+    TEST_TRUE(batch,
+              top == 10,
+              "top correct when leading ellipsis inserted");
+    DECREF(sentences);
+    DECREF(heat_map);
+    DECREF(raw_excerpt);
+
+    field_val   = (CharBuf *)ZCB_WRAP_STR("Urk.  Iz no good.", 17);
+    fragment    = (CharBuf *)ZCB_WRAP_STR("  Iz no go", 10);
+    raw_excerpt = CB_new(0);
+    spans       = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(0, 17, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    sentences = VA_new(1);
+    VA_Push(sentences, (Obj*)Span_new(6, 11, 0.0f));
+    top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+                                  raw_excerpt, 4, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, "Iz no" ELLIPSIS, 8),
+              "Ellipsis at end");
+    TEST_TRUE(batch,
+              top == 6,
+              "top trimmed");
+    DECREF(sentences);
+    DECREF(heat_map);
+    DECREF(raw_excerpt);
+
+    DECREF(highlighter);
+}
+
+static void
+test_Highlight_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query) {
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Highlighter *highlighter = Highlighter_new(searcher, query, content, 3);
+
+    VArray *spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(2, 1, 0.0f));
+    CharBuf *raw_excerpt = (CharBuf *)ZCB_WRAP_STR("a b c", 5);
+    CharBuf *highlighted = CB_new(0);
+    Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+                                  highlighted, 0);
+    TEST_TRUE(batch,
+              CB_Equals_Str(highlighted, "a <strong>b</strong> c", 22),
+              "basic Highlight_Excerpt");
+    DECREF(highlighted);
+    DECREF(spans);
+
+    spans = VA_new(2);
+    VA_Push(spans, (Obj*)Span_new(0, 1, 1.0f));
+    VA_Push(spans, (Obj*)Span_new(10, 10, 1.0f));
+    raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI, 2);
+    highlighted = CB_new(0);
+    Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+                                  highlighted, 0);
+    TEST_TRUE(batch,
+              CB_Equals_Str(highlighted, "<strong>&#934;</strong>", 23),
+              "don't surround spans off end of raw excerpt.");
+    DECREF(highlighted);
+    DECREF(spans);
+
+    spans = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(3, 1, 1.0f));
+    raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI " " PHI " " PHI, 8);
+    highlighted = CB_new(0);
+    Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+                                  highlighted, 1);
+    TEST_TRUE(batch,
+              CB_Equals_Str(highlighted,
+                            "&#934; <strong>&#934;</strong> &#934;", 37),
+              "Highlight_Excerpt pays attention to offset");
+    DECREF(highlighted);
+    DECREF(spans);
+
+    spans = VA_new(4);
+    VA_Push(spans, (Obj*)Span_new(2, 10, 1.0f));
+    VA_Push(spans, (Obj*)Span_new(2,  4, 1.0f));
+    VA_Push(spans, (Obj*)Span_new(8,  9, 1.0f));
+    VA_Push(spans, (Obj*)Span_new(8,  4, 1.0f));
+    raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI " Oook. Urk. Ick. " PHI, 21);
+    highlighted = CB_new(0);
+    Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+                                  highlighted, 0);
+    TEST_TRUE(batch,
+              CB_Equals_Str(highlighted,
+                            "&#934; <strong>Oook. Urk. Ick.</strong> &#934;",
+                            46),
+              "Highlight_Excerpt works with overlapping spans");
+    DECREF(highlighted);
+    DECREF(spans);
+
+    DECREF(highlighter);
+}
+
+static void
+test_Create_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query,
+                    Hits *hits) {
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Highlighter *highlighter = Highlighter_new(searcher, query, content, 200);
+
+    HitDoc *hit = Hits_Next(hits);
+    CharBuf *excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt,
+                          "<strong>&#934;</strong> a b c d <strong>x y z</strong>",
+                          54) >= 0,
+              "highlighter tagged phrase and single term");
+    DECREF(excerpt);
+
+    CharBuf *pre_tag = (CharBuf*)ZCB_WRAP_STR("\x1B[1m", 4);
+    Highlighter_Set_Pre_Tag(highlighter, pre_tag);
+    CharBuf *post_tag = (CharBuf*)ZCB_WRAP_STR("\x1B[0m", 4);
+    Highlighter_Set_Post_Tag(highlighter, post_tag);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt,
+                          "\x1B[1m&#934;\x1B[0m a b c d \x1B[1mx y z\x1B[0m",
+                          36) >= 0,
+              "set_pre_tag and set_post_tag");
+    DECREF(excerpt);
+    DECREF(hit);
+
+    hit = Hits_Next(hits);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt, "x", 1) >= 0,
+              "excerpt field with partial hit doesn't cause highlighter freakout");
+    DECREF(excerpt);
+    DECREF(hit);
+    DECREF(highlighter);
+
+    query = (Obj*)ZCB_WRAP_STR("x \"x y z\" AND b", 15);
+    hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+    highlighter = Highlighter_new(searcher, query, content, 200);
+    hit = Hits_Next(hits);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt,
+                          "<strong>b</strong> c d <strong>x y z</strong>",
+                          45) >= 0,
+              "query with same word in both phrase and term doesn't cause freakout");
+    DECREF(excerpt);
+    DECREF(hit);
+    DECREF(highlighter);
+    DECREF(hits);
+
+    query = (Obj*)ZCB_WRAP_STR("blind", 5);
+    hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+    highlighter = Highlighter_new(searcher, query, content, 200);
+    hit = Hits_Next(hits);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt, "&quot;", 6) >= 0,
+              "HTML entity encoded properly");
+    DECREF(excerpt);
+    DECREF(hit);
+    DECREF(highlighter);
+    DECREF(hits);
+
+    query = (Obj*)ZCB_WRAP_STR("why", 3);
+    hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+    highlighter = Highlighter_new(searcher, query, content, 200);
+    hit = Hits_Next(hits);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt, "&#934;", 6) == -1,
+              "no ellipsis for short excerpt");
+    DECREF(excerpt);
+    DECREF(hit);
+    DECREF(highlighter);
+    DECREF(hits);
+
+    Obj *term = (Obj*)ZCB_WRAP_STR("x", 1);
+    query = (Obj*)TermQuery_new(content, term);
+    hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+    hit = Hits_Next(hits);
+    highlighter = Highlighter_new(searcher, query, content, 200);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt, "strong", 5) >= 0,
+              "specify field highlights correct field...");
+    DECREF(excerpt);
+    DECREF(highlighter);
+    CharBuf *alt = (CharBuf*)ZCB_WRAP_STR("alt", 3);
+    highlighter = Highlighter_new(searcher, query, alt, 200);
+    excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+    TEST_TRUE(batch,
+              CB_Find_Str(excerpt, "strong", 5) == -1,
+              "... but not another field");
+    DECREF(excerpt);
+    DECREF(highlighter);
+    DECREF(hit);
+    DECREF(hits);
+}
+
+static void
+test_Find_Sentences(TestBatch *batch, Searcher *searcher, Obj *query) {
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Highlighter *highlighter = Highlighter_new(searcher, query, content, 200);
+    CharBuf *text = (CharBuf*)ZCB_WRAP_STR(
+                        "This is a sentence. This is a sentence. This is a sentence. "
+                        "This is a sentence. This is a sentence. This is a sentence. "
+                        "This is a sentence. This is a sentence. This is a sentence. "
+                        "This is a sentence. This is a sentence. This is a sentence. "
+                        "This is a sentence. This is a sentence. This is a sentence. ",
+                        300);
+
+    VArray *got = Highlighter_Find_Sentences(highlighter, text, 101, 50);
+    VArray *wanted = VA_new(2);
+    VA_push(wanted, (Obj*)Span_new(120, 19, 0.0f));
+    VA_push(wanted, (Obj*)Span_new(140, 19, 0.0f));
+    TEST_TRUE(batch,
+              VA_Equals(got, (Obj*)wanted),
+              "find_sentences with explicit args");
+    DECREF(wanted);
+    DECREF(got);
+
+    got = Highlighter_Find_Sentences(highlighter, text, 101, 4);
+    TEST_TRUE(batch,
+              VA_Get_Size(got) == 0,
+              "find_sentences with explicit args, finding nothing");
+    DECREF(got);
+
+    got = Highlighter_Find_Sentences(highlighter, text, 0, 0);
+    wanted = VA_new(15);
+    for (int i = 0; i < 15; ++i) {
+        VA_push(wanted, (Obj*)Span_new(i * 20, 19, 0.0f));
+    }
+    TEST_TRUE(batch,
+              VA_Equals(got, (Obj*)wanted),
+              "find_sentences with default offset and length");
+    DECREF(wanted);
+    DECREF(got);
+
+    text = (CharBuf*)ZCB_WRAP_STR(" Foo", 4);
+    got = Highlighter_Find_Sentences(highlighter, text, 0, 0);
+    wanted = VA_new(1);
+    VA_push(wanted, (Obj*)Span_new(1, 3, 0.0f));
+    TEST_TRUE(batch,
+              VA_Equals(got, (Obj*)wanted),
+              "Skip leading whitespace but get first sentence");
+    DECREF(wanted);
+    DECREF(got);
+
+    DECREF(highlighter);
+}
+
+static void
+test_highlighting(TestBatch *batch) {
+    Schema *schema = Schema_new();
+    RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
+    FullTextType *plain_type = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType_Set_Highlightable(plain_type, true);
+    FullTextType *dunked_type = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType_Set_Highlightable(dunked_type, true);
+    FullTextType_Set_Boost(dunked_type, 0.1f);
+    CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+    Schema_Spec_Field(schema, content, (FieldType*)plain_type);
+    CharBuf *alt = (CharBuf*)ZCB_WRAP_STR("alt", 3);
+    Schema_Spec_Field(schema, alt, (FieldType*)dunked_type);
+    DECREF(plain_type);
+    DECREF(dunked_type);
+    DECREF(tokenizer);
+
+    RAMFolder *folder = RAMFolder_new(NULL);
+    Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
+
+    Doc *doc = Doc_new(NULL, 0);
+    CharBuf *string = (CharBuf *)ZCB_WRAP_STR(TEST_STRING, TEST_STRING_LEN);
+    Doc_Store(doc, content, (Obj*)string);
+    Indexer_Add_Doc(indexer, doc, 1.0f);
+    DECREF(doc);
+
+    doc = Doc_new(NULL, 0);
+    string = (CharBuf *)ZCB_WRAP_STR("\"I see,\" said the blind man.", 28);
+    Doc_Store(doc, content, (Obj*)string);
+    Indexer_Add_Doc(indexer, doc, 1.0f);
+    DECREF(doc);
+
+    doc = Doc_new(NULL, 0);
+    string = (CharBuf *)ZCB_WRAP_STR("x but not why or 2ee", 20);
+    Doc_Store(doc, content, (Obj*)string);
+    string = (CharBuf *)ZCB_WRAP_STR(TEST_STRING
+                                     " and extra stuff so it scores lower",
+                                     TEST_STRING_LEN + 35);
+    Doc_Store(doc, alt, (Obj*)string);
+    Indexer_Add_Doc(indexer, doc, 1.0f);
+    DECREF(doc);
+
+    Indexer_Commit(indexer);
+    DECREF(indexer);
+
+    Searcher *searcher = (Searcher*)IxSearcher_new((Obj*)folder);
+    Obj *query = (Obj*)ZCB_WRAP_STR("\"x y z\" AND " PHI, 14);
+    Hits *hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+
+    test_Find_Best_Fragment(batch, searcher, query);
+    test_Raw_Excerpt(batch, searcher, query);
+    test_Highlight_Excerpt(batch, searcher, query);
+    test_Create_Excerpt(batch, searcher, query, hits);
+    test_Find_Sentences(batch, searcher, query);
+
+    DECREF(hits);
+    DECREF(searcher);
+    DECREF(folder);
+    DECREF(schema);
+}
+
+void
+TestHighlighter_run_tests() {
+    TestBatch *batch = TestBatch_new(32);
+
+    TestBatch_Plan(batch);
+
+    test_highlighting(batch);
+
+    DECREF(batch);
+}
+
+

Added: incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh (added)
+++ incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh Sat Dec 17 21:55:20 2011
@@ -0,0 +1,24 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+inert class Lucy::Test::Highlight::TestHighlighter {
+    inert void
+    run_tests();
+}
+
+

Modified: incubator/lucy/trunk/perl/MANIFEST
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/MANIFEST?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/MANIFEST (original)
+++ incubator/lucy/trunk/perl/MANIFEST Sat Dec 17 21:55:20 2011
@@ -265,7 +265,6 @@ t/221-sort_writer.t
 t/224-lex_reader.t
 t/233-background_merger.t
 t/302-many_fields.t
-t/303-highlighter.t
 t/304-verify_utf8.t
 t/305-indexer.t
 t/306-dynamic_schema.t
@@ -326,6 +325,7 @@ t/binding/156-snowball_stemmer.t
 t/binding/157-normalizer.t
 t/binding/158-standard_tokenizer.t
 t/binding/206-snapshot.t
+t/binding/303-highlighter.t
 t/binding/506-collector.t
 t/binding/702-sample.t
 t/binding/800-stack.t
@@ -388,6 +388,7 @@ t/core/231-blob_type.t
 t/core/232-numeric_type.t
 t/core/234-field_type.t
 t/core/301-segment.t
+t/core/303-highlighter.t
 t/core/501-termquery.t
 t/core/502-phrasequery.t
 t/core/515-range_query.t

Modified: incubator/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Test.pm?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Test.pm Sat Dec 17 21:55:20 2011
@@ -252,6 +252,10 @@ PPCODE:
     else if (strEQ(package, "TestVArray")) {
         lucy_TestVArray_run_tests();
     }
+    // Lucy::Highlight
+    else if (strEQ(package, "TestHighlighter")) {
+        lucy_TestHighlighter_run_tests();
+    }
     else {
         THROW(LUCY_ERR, "Unknown test id: %s", package);
     }

Added: incubator/lucy/trunk/perl/t/binding/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/binding/303-highlighter.t?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/perl/t/binding/303-highlighter.t (added)
+++ incubator/lucy/trunk/perl/t/binding/303-highlighter.t Sat Dec 17 21:55:20 2011
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use lib 'buildlib';
+
+package MySchema;
+use base qw( Lucy::Plan::Schema );
+use Lucy::Analysis::RegexTokenizer;
+
+sub new {
+    my $class      = shift;
+    my $self       = $class->SUPER::new(@_);
+    my $tokenizer  = Lucy::Analysis::RegexTokenizer->new;
+    my $plain_type = Lucy::Plan::FullTextType->new(
+        analyzer      => $tokenizer,
+        highlightable => 1,
+    );
+    my $dunked_type = Lucy::Plan::FullTextType->new(
+        analyzer      => $tokenizer,
+        highlightable => 1,
+        boost         => 0.1,
+    );
+    $self->spec_field( name => 'content', type => $plain_type );
+    $self->spec_field( name => 'alt',     type => $dunked_type );
+    return $self;
+}
+
+package MyHighlighter;
+use base qw( Lucy::Highlight::Highlighter );
+
+sub encode {
+    my ( $self, $text ) = @_;
+    $text =~ s/blind/wise/;
+    return $text;
+}
+
+sub highlight {
+    my ( $self, $text ) = @_;
+    return "*$text*";
+}
+
+package main;
+
+use Test::More tests => 6;
+use Lucy::Test;
+
+binmode( STDOUT, ":utf8" );
+
+my $phi         = "\x{03a6}";
+my $encoded_phi = "&#934;";
+
+my $string = '1 2 3 4 5 ' x 20;    # 200 characters
+$string .= "$phi a b c d x y z h i j k ";
+$string .= '6 7 8 9 0 ' x 20;
+my $with_quotes = '"I see," said the blind man.';
+
+my $folder  = Lucy::Store::RAMFolder->new;
+my $indexer = Lucy::Index::Indexer->new(
+    index  => $folder,
+    schema => MySchema->new,
+);
+
+$indexer->add_doc( { content => $_ } ) for ( $string, $with_quotes );
+$indexer->add_doc(
+    {   content => "x but not why or 2ee",
+        alt     => $string . " and extra stuff so it scores lower",
+    }
+);
+$indexer->commit;
+
+my $searcher = Lucy::Search::IndexSearcher->new( index => $folder );
+
+my $q    = qq|"x y z" AND $phi|;
+my $hits = $searcher->hits( query => $q );
+my $hl   = Lucy::Highlight::Highlighter->new(
+    searcher => $searcher,
+    query    => $q,
+    field    => 'content',
+);
+
+my $hit     = $hits->next;
+my $excerpt = $hl->create_excerpt($hit);
+like( $excerpt, qr#<strong>x y z</strong>#, "highlighter tagged the phrase" );
+like(
+    $excerpt,
+    qr#<strong>$encoded_phi</strong>#i,
+    "highlighter tagged the single term"
+);
+
+$hl->set_pre_tag("\e[1m");
+$hl->set_post_tag("\e[0m");
+like(
+    $hl->create_excerpt($hit),
+    qr#\e\[1m$encoded_phi\e\[0m#i, "set_pre_tag and set_post_tag",
+);
+
+$q = $searcher->glean_query("foo");
+my $compiler = $q->make_compiler( searcher => $searcher );
+$hl = Lucy::Highlight::Highlighter->new(
+    searcher => $searcher,
+    query    => $compiler,
+    field    => 'content',
+);
+is( $$compiler, ${ $hl->get_query },
+    "Highlighter accepts Compiler as Query" );
+is( $$compiler, ${ $hl->get_compiler },
+    "Highlighter uses supplied Compiler" );
+
+$hl = MyHighlighter->new(
+    searcher => $searcher,
+    query    => "blind",
+    field    => 'content',
+);
+$hits = $searcher->hits( query => 'blind' );
+$hit = $hits->next;
+like( $hl->create_excerpt($hit),
+    qr/\*wise\*/, "override both Encode() and Highlight()" );
+

Added: incubator/lucy/trunk/perl/t/core/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/core/303-highlighter.t?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/perl/t/core/303-highlighter.t (added)
+++ incubator/lucy/trunk/perl/t/core/303-highlighter.t Sat Dec 17 21:55:20 2011
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestHighlighter");
+