You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2012/01/12 10:23:27 UTC
[lucy-commits] svn commit: r1230449 - in /incubator/lucy/branches/0.3/core/Lucy:
Highlight/Highlighter.c Test/Highlight/TestHighlighter.c
Author: nwellnhof
Date: Thu Jan 12 09:23:27 2012
New Revision: 1230449
URL: http://svn.apache.org/viewvc?rev=1230449&view=rev
Log:
LUCY-199 Quick fix
Truncate words longer than excerpt_len
Modified:
incubator/lucy/branches/0.3/core/Lucy/Highlight/Highlighter.c
incubator/lucy/branches/0.3/core/Lucy/Test/Highlight/TestHighlighter.c
Modified: incubator/lucy/branches/0.3/core/Lucy/Highlight/Highlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/0.3/core/Lucy/Highlight/Highlighter.c?rev=1230449&r1=1230448&r2=1230449&view=diff
==============================================================================
--- incubator/lucy/branches/0.3/core/Lucy/Highlight/Highlighter.c (original)
+++ incubator/lucy/branches/0.3/core/Lucy/Highlight/Highlighter.c Thu Jan 12 09:23:27 2012
@@ -379,6 +379,11 @@ Highlighter_raw_excerpt(Highlighter *sel
// not need to make room.
this_excerpt_len += ELLIPSIS_LEN;
+ // Remember original position
+ ZombieCharBuf *orig_temp = ZCB_WRAP((CharBuf*)field_val);
+ int32_t orig_start = start;
+ int32_t orig_len = this_excerpt_len;
+
// Move the start back one in case the character right before the
// excerpt starts is whitespace.
if (start) {
@@ -404,6 +409,20 @@ Highlighter_raw_excerpt(Highlighter *sel
}
} while (ZCB_Get_Size(temp));
+ if (ZCB_Get_Size(temp) == 0) {
+ // Word is longer than excerpt_length. Reset to original position
+ // truncating the word.
+ temp = orig_temp;
+ start = orig_start;
+ this_excerpt_len = orig_len;
+ int32_t diff = this_excerpt_len - self->excerpt_length;
+ if (diff > 0) {
+ ZCB_Nip(temp, diff);
+ start += diff;
+ this_excerpt_len -= diff;
+ }
+ }
+
ZCB_Truncate(temp, self->excerpt_length - ELLIPSIS_LEN);
CB_Cat_Char(raw_excerpt, ELLIPSIS_CODE_POINT);
CB_Cat_Char(raw_excerpt, ' ');
@@ -416,6 +435,11 @@ Highlighter_raw_excerpt(Highlighter *sel
CB_Truncate(raw_excerpt, end - start);
}
else {
+ // Remember original excerpt
+ CharBuf *orig_raw_excerpt = CB_Clone(raw_excerpt);
+ // Check for prepended ellipsis
+ int32_t min_size = found_starting_edge ? 0 : 4;
+
do {
uint32_t code_point = CB_Code_Point_From(raw_excerpt, 1);
CB_Chop(raw_excerpt, 1);
@@ -438,8 +462,18 @@ Highlighter_raw_excerpt(Highlighter *sel
break;
}
- } while (CB_Get_Size(raw_excerpt));
+ } while (CB_Get_Size(raw_excerpt) > min_size);
+
+ if (CB_Get_Size(raw_excerpt) == min_size) {
+ // Word is longer than excerpt_length. Reset to original excerpt
+ // truncating the word.
+ CB_Mimic(raw_excerpt, (Obj*)orig_raw_excerpt);
+ CB_Chop(raw_excerpt, 1);
+ }
+
CB_Cat_Char(raw_excerpt, ELLIPSIS_CODE_POINT);
+
+ DECREF(orig_raw_excerpt);
}
return start;
Modified: incubator/lucy/branches/0.3/core/Lucy/Test/Highlight/TestHighlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/0.3/core/Lucy/Test/Highlight/TestHighlighter.c?rev=1230449&r1=1230448&r2=1230449&view=diff
==============================================================================
--- incubator/lucy/branches/0.3/core/Lucy/Test/Highlight/TestHighlighter.c (original)
+++ incubator/lucy/branches/0.3/core/Lucy/Test/Highlight/TestHighlighter.c Thu Jan 12 09:23:27 2012
@@ -198,6 +198,40 @@ test_Raw_Excerpt(TestBatch *batch, Searc
DECREF(heat_map);
DECREF(raw_excerpt);
+ // Words longer than excerpt len
+
+ field_val = (CharBuf *)ZCB_WRAP_STR("abc/def/ghi/jkl/mno", 19);
+ sentences = VA_new(1);
+ VA_Push(sentences, (Obj*)Span_new(0, 19, 0.0f));
+
+ raw_excerpt = CB_new(0);
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(0, 3, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ top = Highlighter_Raw_Excerpt(highlighter, field_val, field_val,
+ raw_excerpt, 0, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, "abc/d" ELLIPSIS, 8),
+ "Long word");
+ DECREF(heat_map);
+ DECREF(raw_excerpt);
+
+ raw_excerpt = CB_new(0);
+ spans = VA_new(1);
+ VA_Push(spans, (Obj*)Span_new(8, 3, 1.0f));
+ heat_map = HeatMap_new(spans, 133);
+ DECREF(spans);
+ top = Highlighter_Raw_Excerpt(highlighter, field_val, field_val,
+ raw_excerpt, 0, heat_map, sentences);
+ TEST_TRUE(batch,
+ CB_Equals_Str(raw_excerpt, ELLIPSIS " c/d" ELLIPSIS, 10),
+ "Long word");
+ DECREF(heat_map);
+ DECREF(raw_excerpt);
+
+ DECREF(sentences);
+
DECREF(highlighter);
}
@@ -481,7 +515,7 @@ test_highlighting(TestBatch *batch) {
void
TestHighlighter_run_tests() {
- TestBatch *batch = TestBatch_new(32);
+ TestBatch *batch = TestBatch_new(34);
TestBatch_Plan(batch);