You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2012/01/12 09:53:05 UTC

[lucy-commits] svn commit: r1230443 - in /incubator/lucy/trunk/core/Lucy: Highlight/Highlighter.c Test/Highlight/TestHighlighter.c

Author: nwellnhof
Date: Thu Jan 12 08:53:04 2012
New Revision: 1230443

URL: http://svn.apache.org/viewvc?rev=1230443&view=rev
Log:
LUCY-199 Quick fix

Truncate words longer than excerpt_len

Modified:
    incubator/lucy/trunk/core/Lucy/Highlight/Highlighter.c
    incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c

Modified: incubator/lucy/trunk/core/Lucy/Highlight/Highlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Highlight/Highlighter.c?rev=1230443&r1=1230442&r2=1230443&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Highlight/Highlighter.c (original)
+++ incubator/lucy/trunk/core/Lucy/Highlight/Highlighter.c Thu Jan 12 08:53:04 2012
@@ -379,6 +379,11 @@ Highlighter_raw_excerpt(Highlighter *sel
         // not need to make room.
         this_excerpt_len += ELLIPSIS_LEN;
 
+        // Remember original position
+        ZombieCharBuf *orig_temp = ZCB_WRAP((CharBuf*)field_val);
+        int32_t orig_start = start;
+        int32_t orig_len   = this_excerpt_len;
+
         // Move the start back one in case the character right before the
         // excerpt starts is whitespace.
         if (start) {
@@ -404,6 +409,20 @@ Highlighter_raw_excerpt(Highlighter *sel
             }
         } while (ZCB_Get_Size(temp));
 
+        if (ZCB_Get_Size(temp) == 0) {
+            // Word is longer than excerpt_length. Reset to original position
+            // truncating the word.
+            temp             = orig_temp;
+            start            = orig_start;
+            this_excerpt_len = orig_len;
+            int32_t diff = this_excerpt_len - self->excerpt_length;
+            if (diff > 0) {
+                ZCB_Nip(temp, diff);
+                start            += diff;
+                this_excerpt_len -= diff;
+            }
+        }
+
         ZCB_Truncate(temp, self->excerpt_length - ELLIPSIS_LEN);
         CB_Cat_Char(raw_excerpt, ELLIPSIS_CODE_POINT);
         CB_Cat_Char(raw_excerpt, ' ');
@@ -416,6 +435,11 @@ Highlighter_raw_excerpt(Highlighter *sel
         CB_Truncate(raw_excerpt, end - start);
     }
     else {
+        // Remember original excerpt
+        CharBuf *orig_raw_excerpt = CB_Clone(raw_excerpt);
+        // Check for prepended ellipsis
+        int32_t min_size = found_starting_edge ? 0 : 4;
+
         do {
             uint32_t code_point = CB_Code_Point_From(raw_excerpt, 1);
             CB_Chop(raw_excerpt, 1);
@@ -438,8 +462,18 @@ Highlighter_raw_excerpt(Highlighter *sel
 
                 break;
             }
-        } while (CB_Get_Size(raw_excerpt));
+        } while (CB_Get_Size(raw_excerpt) > min_size);
+
+        if (CB_Get_Size(raw_excerpt) == min_size) {
+            // Word is longer than excerpt_length. Reset to original excerpt
+            // truncating the word.
+            CB_Mimic(raw_excerpt, (Obj*)orig_raw_excerpt);
+            CB_Chop(raw_excerpt, 1);
+        }
+
         CB_Cat_Char(raw_excerpt, ELLIPSIS_CODE_POINT);
+
+        DECREF(orig_raw_excerpt);
     }
 
     return start;

Modified: incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c?rev=1230443&r1=1230442&r2=1230443&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Highlight/TestHighlighter.c Thu Jan 12 08:53:04 2012
@@ -198,6 +198,40 @@ test_Raw_Excerpt(TestBatch *batch, Searc
     DECREF(heat_map);
     DECREF(raw_excerpt);
 
+    // Words longer than excerpt len
+
+    field_val   = (CharBuf *)ZCB_WRAP_STR("abc/def/ghi/jkl/mno", 19);
+    sentences = VA_new(1);
+    VA_Push(sentences, (Obj*)Span_new(0, 19, 0.0f));
+
+    raw_excerpt = CB_new(0);
+    spans       = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(0, 3, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    top = Highlighter_Raw_Excerpt(highlighter, field_val, field_val,
+                                  raw_excerpt, 0, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, "abc/d" ELLIPSIS, 8),
+              "Long word");
+    DECREF(heat_map);
+    DECREF(raw_excerpt);
+
+    raw_excerpt = CB_new(0);
+    spans       = VA_new(1);
+    VA_Push(spans, (Obj*)Span_new(8, 3, 1.0f));
+    heat_map = HeatMap_new(spans, 133);
+    DECREF(spans);
+    top = Highlighter_Raw_Excerpt(highlighter, field_val, field_val,
+                                  raw_excerpt, 0, heat_map, sentences);
+    TEST_TRUE(batch,
+              CB_Equals_Str(raw_excerpt, ELLIPSIS " c/d" ELLIPSIS, 10),
+              "Long word");
+    DECREF(heat_map);
+    DECREF(raw_excerpt);
+
+    DECREF(sentences);
+
     DECREF(highlighter);
 }
 
@@ -481,7 +515,7 @@ test_highlighting(TestBatch *batch) {
 
 void
 TestHighlighter_run_tests() {
-    TestBatch *batch = TestBatch_new(32);
+    TestBatch *batch = TestBatch_new(34);
 
     TestBatch_Plan(batch);