You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2011/12/17 22:55:21 UTC
[lucy-commits] svn commit: r1220281 - in /incubator/lucy/trunk: core/Lucy/Search/
core/Lucy/Test/Highlight/ perl/ perl/lib/Lucy/ perl/t/ perl/t/binding/
perl/t/core/
Author: nwellnhof
Date: Sat Dec 17 21:55:20 2011
New Revision: 1220281
URL: http://svn.apache.org/viewvc?rev=1220281&view=rev
Log:
LUCY-200 Port most of the highlighter tests to C
Split t/303-highlighter.t into core and binding tests
Added:
incubator/lucy/trunk/core/Lucy/Test/Highlight/
incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c
incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh
incubator/lucy/trunk/perl/t/binding/303-highlighter.t
incubator/lucy/trunk/perl/t/core/303-highlighter.t
Removed:
incubator/lucy/trunk/perl/t/303-highlighter.t
Modified:
incubator/lucy/trunk/core/Lucy/Search/Span.c
incubator/lucy/trunk/core/Lucy/Search/Span.cfh
incubator/lucy/trunk/perl/MANIFEST
incubator/lucy/trunk/perl/lib/Lucy/Test.pm
Modified: incubator/lucy/trunk/core/Lucy/Search/Span.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Search/Span.c?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Search/Span.c (original)
+++ incubator/lucy/trunk/core/Lucy/Search/Span.c Sat Dec 17 21:55:20 2011
@@ -64,6 +64,17 @@ Span_set_weight(Span *self, float weight
self->weight = weight;
}
+bool_t
+Span_equals(Span *self, Obj *other) {
+ Span *twin = (Span*)other;
+ if (self == twin) { return true; }
+ if (!Obj_Is_A(other, SPAN)) { return false; }
+ if (self->offset != twin->offset) { return false; }
+ if (self->length != twin->length) { return false; }
+ if (self->weight != twin->weight) { return false; }
+ return true;
+}
+
int32_t
Span_compare_to(Span *self, Obj *other) {
Span *competitor = (Span*)CERTIFY(other, SPAN);
Modified: incubator/lucy/trunk/core/Lucy/Search/Span.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Search/Span.cfh?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Search/Span.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Search/Span.cfh Sat Dec 17 21:55:20 2011
@@ -74,6 +74,9 @@ class Lucy::Search::Span inherits Lucy::
public void
Set_Weight(Span *self, float weight);
+ public bool_t
+ Equals(Span *self, Obj *other);
+
public int32_t
Compare_To(Span *self, Obj *other);
}
Added: incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c (added)
+++ incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c Sat Dec 17 21:55:20 2011
@@ -0,0 +1,492 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_LUCY_TESTHIGHLIGHTER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Highlight/TestHighlighter.h"
+#include "Lucy/Highlight/Highlighter.h"
+
+#include "Lucy/Analysis/RegexTokenizer.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Highlight/HeatMap.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Plan/FullTextType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Search/Hits.h"
+#include "Lucy/Search/IndexSearcher.h"
+#include "Lucy/Search/Span.h"
+#include "Lucy/Search/TermQuery.h"
+#include "Lucy/Store/RAMFolder.h"
+
+#define PHI "\xCE\xA6"
+#define ELLIPSIS "\xE2\x80\xA6"
+
+#define TEST_STRING \
+ "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+ "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+ "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+ "1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 " \
+ PHI " a b c d x y z h i j k " \
+ "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+ "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+ "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+ "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 " \
+ "6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 6 7 8 9 0 "
+#define TEST_STRING_LEN 425
+
+static void
+test_Find_Best_Fragment(TestBatch *batch, Searcher *searcher, Obj *query) {
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Highlighter *highlighter = Highlighter_new(searcher, query, content, 3);
+ ViewCharBuf *target = (ViewCharBuf*)ZCB_BLANK();
+
+ VArray *spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(2, 1, 1.0f));
+ HeatMap *heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ CharBuf *field_val = (CharBuf *)ZCB_WRAP_STR("a " PHI " " PHI " b c", 11);
+ int32_t top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+ target, heat_map);
+ TEST_TRUE(batch,
+ CB_Equals_Str((CharBuf *)target, PHI " " PHI " b", 7),
+ "Find_Best_Fragment");
+ TEST_TRUE(batch,
+ top == 2,
+ "correct offset returned by Find_Best_Fragment");
+ field_val = (CharBuf *)ZCB_WRAP_STR("aa" PHI, 4);
+ top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+ target, heat_map);
+ TEST_TRUE(batch,
+ CB_Equals_Str((CharBuf *)target, "aa" PHI, 4),
+ "Find_Best_Fragment returns whole field when field is short");
+ TEST_TRUE(batch,
+ top == 0,
+ "correct offset");
+ DECREF(heat_map);
+
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(6, 2, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ field_val = (CharBuf *)ZCB_WRAP_STR("aaaab" PHI PHI, 9);
+ top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+ target, heat_map);
+ TEST_TRUE(batch,
+ CB_Equals_Str((CharBuf *)target, "b" PHI PHI, 5),
+ "Find_Best_Fragment shifts left to deal with overrun");
+ TEST_TRUE(batch,
+ top == 4,
+ "correct offset");
+ DECREF(heat_map);
+
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(0, 1, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ field_val = (CharBuf *)ZCB_WRAP_STR("a" PHI "bcde", 7);
+ top = Highlighter_Find_Best_Fragment(highlighter, field_val,
+ target, heat_map);
+ TEST_TRUE(batch,
+ CB_Equals_Str((CharBuf *)target, "a" PHI "bcd", 6),
+ "Find_Best_Fragment start at field beginning");
+ TEST_TRUE(batch,
+ top == 0,
+ "correct offset");
+ DECREF(heat_map);
+
+ DECREF(highlighter);
+}
+
+static void
+test_Raw_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query) {
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Highlighter *highlighter = Highlighter_new(searcher, query, content, 6);
+
+ CharBuf *field_val = (CharBuf *)ZCB_WRAP_STR("Ook. Urk. Ick. ", 18);
+ CharBuf *fragment = (CharBuf *)ZCB_WRAP_STR("Ook. Urk.", 10);
+ CharBuf *raw_excerpt = CB_new(0);
+ VArray *spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(0, 18, 1.0f));
+ HeatMap *heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ VArray *sentences = VA_new(2);
+ VA_Push(sentences, (Obj*)Span_new(0, 4, 0.0f));
+ VA_Push(sentences, (Obj*)Span_new(6, 4, 0.0f));
+ int32_t top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+ raw_excerpt, 0, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, "Ook.", 4),
+ "Raw_Excerpt at top");
+ TEST_TRUE(batch,
+ top == 0,
+ "top still 0");
+ DECREF(sentences);
+ DECREF(raw_excerpt);
+
+ fragment = (CharBuf *)ZCB_WRAP_STR(". Urk. I", 10);
+ raw_excerpt = CB_new(0);
+ sentences = VA_new(2);
+ VA_Push(sentences, (Obj*)Span_new(6, 4, 0.0f));
+ VA_Push(sentences, (Obj*)Span_new(12, 4, 0.0f));
+ top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+ raw_excerpt, 3, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, "Urk.", 4),
+ "Raw_Excerpt in middle, with 2 bounds");
+ TEST_TRUE(batch,
+ top == 6,
+ "top in the middle modified by Raw_Excerpt");
+ DECREF(sentences);
+ DECREF(heat_map);
+ DECREF(raw_excerpt);
+
+ field_val = (CharBuf *)ZCB_WRAP_STR("Ook urk ick i.", 14);
+ fragment = (CharBuf *)ZCB_WRAP_STR("ick i.", 6);
+ raw_excerpt = CB_new(0);
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(0, 14, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ sentences = VA_new(1);
+ VA_Push(sentences, (Obj*)Span_new(0, 14, 0.0f));
+ top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+ raw_excerpt, 8, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, ELLIPSIS " i.", 6),
+ "Ellipsis at top");
+ TEST_TRUE(batch,
+ top == 10,
+ "top correct when leading ellipsis inserted");
+ DECREF(sentences);
+ DECREF(heat_map);
+ DECREF(raw_excerpt);
+
+ field_val = (CharBuf *)ZCB_WRAP_STR("Urk. Iz no good.", 17);
+ fragment = (CharBuf *)ZCB_WRAP_STR(" Iz no go", 10);
+ raw_excerpt = CB_new(0);
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(0, 17, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ sentences = VA_new(1);
+ VA_Push(sentences, (Obj*)Span_new(6, 11, 0.0f));
+ top = Highlighter_Raw_Excerpt(highlighter, field_val, fragment,
+ raw_excerpt, 4, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, "Iz no" ELLIPSIS, 8),
+ "Ellipsis at end");
+ TEST_TRUE(batch,
+ top == 6,
+ "top trimmed");
+ DECREF(sentences);
+ DECREF(heat_map);
+ DECREF(raw_excerpt);
+
+ DECREF(highlighter);
+}
+
+static void
+test_Highlight_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query) {
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Highlighter *highlighter = Highlighter_new(searcher, query, content, 3);
+
+ VArray *spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(2, 1, 0.0f));
+ CharBuf *raw_excerpt = (CharBuf *)ZCB_WRAP_STR("a b c", 5);
+ CharBuf *highlighted = CB_new(0);
+ Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+ highlighted, 0);
+ TEST_TRUE(batch,
+ CB_Equals_Str(highlighted, "a <strong>b</strong> c", 22),
+ "basic Highlight_Excerpt");
+ DECREF(highlighted);
+ DECREF(spans);
+
+ spans = VA_new(2);
+ VA_Push(spans, (Obj*)Span_new(0, 1, 1.0f));
+ VA_Push(spans, (Obj*)Span_new(10, 10, 1.0f));
+ raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI, 2);
+ highlighted = CB_new(0);
+ Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+ highlighted, 0);
+ TEST_TRUE(batch,
+ CB_Equals_Str(highlighted, "<strong>Φ</strong>", 23),
+ "don't surround spans off end of raw excerpt.");
+ DECREF(highlighted);
+ DECREF(spans);
+
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(3, 1, 1.0f));
+ raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI " " PHI " " PHI, 8);
+ highlighted = CB_new(0);
+ Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+ highlighted, 1);
+ TEST_TRUE(batch,
+ CB_Equals_Str(highlighted,
+ "Φ <strong>Φ</strong> Φ", 37),
+ "Highlight_Excerpt pays attention to offset");
+ DECREF(highlighted);
+ DECREF(spans);
+
+ spans = VA_new(4);
+ VA_Push(spans, (Obj*)Span_new(2, 10, 1.0f));
+ VA_Push(spans, (Obj*)Span_new(2, 4, 1.0f));
+ VA_Push(spans, (Obj*)Span_new(8, 9, 1.0f));
+ VA_Push(spans, (Obj*)Span_new(8, 4, 1.0f));
+ raw_excerpt = (CharBuf *)ZCB_WRAP_STR(PHI " Oook. Urk. Ick. " PHI, 21);
+ highlighted = CB_new(0);
+ Highlighter_Highlight_Excerpt(highlighter, spans, raw_excerpt,
+ highlighted, 0);
+ TEST_TRUE(batch,
+ CB_Equals_Str(highlighted,
+ "Φ <strong>Oook. Urk. Ick.</strong> Φ",
+ 46),
+ "Highlight_Excerpt works with overlapping spans");
+ DECREF(highlighted);
+ DECREF(spans);
+
+ DECREF(highlighter);
+}
+
+static void
+test_Create_Excerpt(TestBatch *batch, Searcher *searcher, Obj *query,
+ Hits *hits) {
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Highlighter *highlighter = Highlighter_new(searcher, query, content, 200);
+
+ HitDoc *hit = Hits_Next(hits);
+ CharBuf *excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt,
+ "<strong>Φ</strong> a b c d <strong>x y z</strong>",
+ 54) >= 0,
+ "highlighter tagged phrase and single term");
+ DECREF(excerpt);
+
+ CharBuf *pre_tag = (CharBuf*)ZCB_WRAP_STR("\x1B[1m", 4);
+ Highlighter_Set_Pre_Tag(highlighter, pre_tag);
+ CharBuf *post_tag = (CharBuf*)ZCB_WRAP_STR("\x1B[0m", 4);
+ Highlighter_Set_Post_Tag(highlighter, post_tag);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt,
+ "\x1B[1mΦ\x1B[0m a b c d \x1B[1mx y z\x1B[0m",
+ 36) >= 0,
+ "set_pre_tag and set_post_tag");
+ DECREF(excerpt);
+ DECREF(hit);
+
+ hit = Hits_Next(hits);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt, "x", 1) >= 0,
+ "excerpt field with partial hit doesn't cause highlighter freakout");
+ DECREF(excerpt);
+ DECREF(hit);
+ DECREF(highlighter);
+
+ query = (Obj*)ZCB_WRAP_STR("x \"x y z\" AND b", 15);
+ hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+ highlighter = Highlighter_new(searcher, query, content, 200);
+ hit = Hits_Next(hits);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt,
+ "<strong>b</strong> c d <strong>x y z</strong>",
+ 45) >= 0,
+ "query with same word in both phrase and term doesn't cause freakout");
+ DECREF(excerpt);
+ DECREF(hit);
+ DECREF(highlighter);
+ DECREF(hits);
+
+ query = (Obj*)ZCB_WRAP_STR("blind", 5);
+ hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+ highlighter = Highlighter_new(searcher, query, content, 200);
+ hit = Hits_Next(hits);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt, """, 6) >= 0,
+ "HTML entity encoded properly");
+ DECREF(excerpt);
+ DECREF(hit);
+ DECREF(highlighter);
+ DECREF(hits);
+
+ query = (Obj*)ZCB_WRAP_STR("why", 3);
+ hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+ highlighter = Highlighter_new(searcher, query, content, 200);
+ hit = Hits_Next(hits);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt, "Φ", 6) == -1,
+ "no ellipsis for short excerpt");
+ DECREF(excerpt);
+ DECREF(hit);
+ DECREF(highlighter);
+ DECREF(hits);
+
+ Obj *term = (Obj*)ZCB_WRAP_STR("x", 1);
+ query = (Obj*)TermQuery_new(content, term);
+ hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+ hit = Hits_Next(hits);
+ highlighter = Highlighter_new(searcher, query, content, 200);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt, "strong", 5) >= 0,
+ "specify field highlights correct field...");
+ DECREF(excerpt);
+ DECREF(highlighter);
+ CharBuf *alt = (CharBuf*)ZCB_WRAP_STR("alt", 3);
+ highlighter = Highlighter_new(searcher, query, alt, 200);
+ excerpt = Highlighter_Create_Excerpt(highlighter, hit);
+ TEST_TRUE(batch,
+ CB_Find_Str(excerpt, "strong", 5) == -1,
+ "... but not another field");
+ DECREF(excerpt);
+ DECREF(highlighter);
+ DECREF(hit);
+ DECREF(hits);
+}
+
+static void
+test_Find_Sentences(TestBatch *batch, Searcher *searcher, Obj *query) {
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Highlighter *highlighter = Highlighter_new(searcher, query, content, 200);
+ CharBuf *text = (CharBuf*)ZCB_WRAP_STR(
+ "This is a sentence. This is a sentence. This is a sentence. "
+ "This is a sentence. This is a sentence. This is a sentence. "
+ "This is a sentence. This is a sentence. This is a sentence. "
+ "This is a sentence. This is a sentence. This is a sentence. "
+ "This is a sentence. This is a sentence. This is a sentence. ",
+ 300);
+
+ VArray *got = Highlighter_Find_Sentences(highlighter, text, 101, 50);
+ VArray *wanted = VA_new(2);
+ VA_push(wanted, (Obj*)Span_new(120, 19, 0.0f));
+ VA_push(wanted, (Obj*)Span_new(140, 19, 0.0f));
+ TEST_TRUE(batch,
+ VA_Equals(got, (Obj*)wanted),
+ "find_sentences with explicit args");
+ DECREF(wanted);
+ DECREF(got);
+
+ got = Highlighter_Find_Sentences(highlighter, text, 101, 4);
+ TEST_TRUE(batch,
+ VA_Get_Size(got) == 0,
+ "find_sentences with explicit args, finding nothing");
+ DECREF(got);
+
+ got = Highlighter_Find_Sentences(highlighter, text, 0, 0);
+ wanted = VA_new(15);
+ for (int i = 0; i < 15; ++i) {
+ VA_push(wanted, (Obj*)Span_new(i * 20, 19, 0.0f));
+ }
+ TEST_TRUE(batch,
+ VA_Equals(got, (Obj*)wanted),
+ "find_sentences with default offset and length");
+ DECREF(wanted);
+ DECREF(got);
+
+ text = (CharBuf*)ZCB_WRAP_STR(" Foo", 4);
+ got = Highlighter_Find_Sentences(highlighter, text, 0, 0);
+ wanted = VA_new(1);
+ VA_push(wanted, (Obj*)Span_new(1, 3, 0.0f));
+ TEST_TRUE(batch,
+ VA_Equals(got, (Obj*)wanted),
+ "Skip leading whitespace but get first sentence");
+ DECREF(wanted);
+ DECREF(got);
+
+ DECREF(highlighter);
+}
+
+static void
+test_highlighting(TestBatch *batch) {
+ Schema *schema = Schema_new();
+ RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
+ FullTextType *plain_type = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType_Set_Highlightable(plain_type, true);
+ FullTextType *dunked_type = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType_Set_Highlightable(dunked_type, true);
+ FullTextType_Set_Boost(dunked_type, 0.1f);
+ CharBuf *content = (CharBuf*)ZCB_WRAP_STR("content", 7);
+ Schema_Spec_Field(schema, content, (FieldType*)plain_type);
+ CharBuf *alt = (CharBuf*)ZCB_WRAP_STR("alt", 3);
+ Schema_Spec_Field(schema, alt, (FieldType*)dunked_type);
+ DECREF(plain_type);
+ DECREF(dunked_type);
+ DECREF(tokenizer);
+
+ RAMFolder *folder = RAMFolder_new(NULL);
+ Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
+
+ Doc *doc = Doc_new(NULL, 0);
+ CharBuf *string = (CharBuf *)ZCB_WRAP_STR(TEST_STRING, TEST_STRING_LEN);
+ Doc_Store(doc, content, (Obj*)string);
+ Indexer_Add_Doc(indexer, doc, 1.0f);
+ DECREF(doc);
+
+ doc = Doc_new(NULL, 0);
+ string = (CharBuf *)ZCB_WRAP_STR("\"I see,\" said the blind man.", 28);
+ Doc_Store(doc, content, (Obj*)string);
+ Indexer_Add_Doc(indexer, doc, 1.0f);
+ DECREF(doc);
+
+ doc = Doc_new(NULL, 0);
+ string = (CharBuf *)ZCB_WRAP_STR("x but not why or 2ee", 20);
+ Doc_Store(doc, content, (Obj*)string);
+ string = (CharBuf *)ZCB_WRAP_STR(TEST_STRING
+ " and extra stuff so it scores lower",
+ TEST_STRING_LEN + 35);
+ Doc_Store(doc, alt, (Obj*)string);
+ Indexer_Add_Doc(indexer, doc, 1.0f);
+ DECREF(doc);
+
+ Indexer_Commit(indexer);
+ DECREF(indexer);
+
+ Searcher *searcher = (Searcher*)IxSearcher_new((Obj*)folder);
+ Obj *query = (Obj*)ZCB_WRAP_STR("\"x y z\" AND " PHI, 14);
+ Hits *hits = Searcher_Hits(searcher, query, 0, 10, NULL);
+
+ test_Find_Best_Fragment(batch, searcher, query);
+ test_Raw_Excerpt(batch, searcher, query);
+ test_Highlight_Excerpt(batch, searcher, query);
+ test_Create_Excerpt(batch, searcher, query, hits);
+ test_Find_Sentences(batch, searcher, query);
+
+ DECREF(hits);
+ DECREF(searcher);
+ DECREF(folder);
+ DECREF(schema);
+}
+
+void
+TestHighlighter_run_tests() {
+ TestBatch *batch = TestBatch_new(32);
+
+ TestBatch_Plan(batch);
+
+ test_highlighting(batch);
+
+ DECREF(batch);
+}
+
+
Added: incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh (added)
+++ incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.cfh Sat Dec 17 21:55:20 2011
@@ -0,0 +1,24 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+inert class Lucy::Test::Highlight::TestHighlighter {
+ inert void
+ run_tests();
+}
+
+
Modified: incubator/lucy/trunk/perl/MANIFEST
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/MANIFEST?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/MANIFEST (original)
+++ incubator/lucy/trunk/perl/MANIFEST Sat Dec 17 21:55:20 2011
@@ -265,7 +265,6 @@ t/221-sort_writer.t
t/224-lex_reader.t
t/233-background_merger.t
t/302-many_fields.t
-t/303-highlighter.t
t/304-verify_utf8.t
t/305-indexer.t
t/306-dynamic_schema.t
@@ -326,6 +325,7 @@ t/binding/156-snowball_stemmer.t
t/binding/157-normalizer.t
t/binding/158-standard_tokenizer.t
t/binding/206-snapshot.t
+t/binding/303-highlighter.t
t/binding/506-collector.t
t/binding/702-sample.t
t/binding/800-stack.t
@@ -388,6 +388,7 @@ t/core/231-blob_type.t
t/core/232-numeric_type.t
t/core/234-field_type.t
t/core/301-segment.t
+t/core/303-highlighter.t
t/core/501-termquery.t
t/core/502-phrasequery.t
t/core/515-range_query.t
Modified: incubator/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Test.pm?rev=1220281&r1=1220280&r2=1220281&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Test.pm Sat Dec 17 21:55:20 2011
@@ -252,6 +252,10 @@ PPCODE:
else if (strEQ(package, "TestVArray")) {
lucy_TestVArray_run_tests();
}
+ // Lucy::Highlight
+ else if (strEQ(package, "TestHighlighter")) {
+ lucy_TestHighlighter_run_tests();
+ }
else {
THROW(LUCY_ERR, "Unknown test id: %s", package);
}
Added: incubator/lucy/trunk/perl/t/binding/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/binding/303-highlighter.t?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/perl/t/binding/303-highlighter.t (added)
+++ incubator/lucy/trunk/perl/t/binding/303-highlighter.t Sat Dec 17 21:55:20 2011
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use lib 'buildlib';
+
+package MySchema;
+use base qw( Lucy::Plan::Schema );
+use Lucy::Analysis::RegexTokenizer;
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
+ my $plain_type = Lucy::Plan::FullTextType->new(
+ analyzer => $tokenizer,
+ highlightable => 1,
+ );
+ my $dunked_type = Lucy::Plan::FullTextType->new(
+ analyzer => $tokenizer,
+ highlightable => 1,
+ boost => 0.1,
+ );
+ $self->spec_field( name => 'content', type => $plain_type );
+ $self->spec_field( name => 'alt', type => $dunked_type );
+ return $self;
+}
+
+package MyHighlighter;
+use base qw( Lucy::Highlight::Highlighter );
+
+sub encode {
+ my ( $self, $text ) = @_;
+ $text =~ s/blind/wise/;
+ return $text;
+}
+
+sub highlight {
+ my ( $self, $text ) = @_;
+ return "*$text*";
+}
+
+package main;
+
+use Test::More tests => 6;
+use Lucy::Test;
+
+binmode( STDOUT, ":utf8" );
+
+my $phi = "\x{03a6}";
+my $encoded_phi = "Φ";
+
+my $string = '1 2 3 4 5 ' x 20; # 200 characters
+$string .= "$phi a b c d x y z h i j k ";
+$string .= '6 7 8 9 0 ' x 20;
+my $with_quotes = '"I see," said the blind man.';
+
+my $folder = Lucy::Store::RAMFolder->new;
+my $indexer = Lucy::Index::Indexer->new(
+ index => $folder,
+ schema => MySchema->new,
+);
+
+$indexer->add_doc( { content => $_ } ) for ( $string, $with_quotes );
+$indexer->add_doc(
+ { content => "x but not why or 2ee",
+ alt => $string . " and extra stuff so it scores lower",
+ }
+);
+$indexer->commit;
+
+my $searcher = Lucy::Search::IndexSearcher->new( index => $folder );
+
+my $q = qq|"x y z" AND $phi|;
+my $hits = $searcher->hits( query => $q );
+my $hl = Lucy::Highlight::Highlighter->new(
+ searcher => $searcher,
+ query => $q,
+ field => 'content',
+);
+
+my $hit = $hits->next;
+my $excerpt = $hl->create_excerpt($hit);
+like( $excerpt, qr#<strong>x y z</strong>#, "highlighter tagged the phrase" );
+like(
+ $excerpt,
+ qr#<strong>$encoded_phi</strong>#i,
+ "highlighter tagged the single term"
+);
+
+$hl->set_pre_tag("\e[1m");
+$hl->set_post_tag("\e[0m");
+like(
+ $hl->create_excerpt($hit),
+ qr#\e\[1m$encoded_phi\e\[0m#i, "set_pre_tag and set_post_tag",
+);
+
+$q = $searcher->glean_query("foo");
+my $compiler = $q->make_compiler( searcher => $searcher );
+$hl = Lucy::Highlight::Highlighter->new(
+ searcher => $searcher,
+ query => $compiler,
+ field => 'content',
+);
+is( $$compiler, ${ $hl->get_query },
+ "Highlighter accepts Compiler as Query" );
+is( $$compiler, ${ $hl->get_compiler },
+ "Highlighter uses supplied Compiler" );
+
+$hl = MyHighlighter->new(
+ searcher => $searcher,
+ query => "blind",
+ field => 'content',
+);
+$hits = $searcher->hits( query => 'blind' );
+$hit = $hits->next;
+like( $hl->create_excerpt($hit),
+ qr/\*wise\*/, "override both Encode() and Highlight()" );
+
Added: incubator/lucy/trunk/perl/t/core/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/core/303-highlighter.t?rev=1220281&view=auto
==============================================================================
--- incubator/lucy/trunk/perl/t/core/303-highlighter.t (added)
+++ incubator/lucy/trunk/perl/t/core/303-highlighter.t Sat Dec 17 21:55:20 2011
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestHighlighter");
+