You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2011/12/06 01:42:12 UTC
[lucy-commits] svn commit: r1210723 -
/incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c
Author: nwellnhof
Date: Tue Dec 6 00:42:11 2011
New Revision: 1210723
URL: http://svn.apache.org/viewvc?rev=1210723&view=rev
Log:
Make sure to test 4 byte UTF-8 characters
Modified:
incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c
Modified: incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c?rev=1210723&r1=1210722&r2=1210723&view=diff
==============================================================================
--- incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c (original)
+++ incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c Tue Dec 6 00:42:11 2011
@@ -46,9 +46,10 @@ test_tokenizer(TestBatch *batch) {
":"
"1,02\xC2\xADZ4.38"
"\xE0\xB8\x81\xC2\xAD\xC2\xAD"
- "\xE0\xB8\x82"
+ "\xF0\xA0\x80\x80"
+ "a"
"/",
- 33);
+ 35);
VArray *got = StandardTokenizer_Split(tokenizer, (CharBuf*)word);
CharBuf *token = (CharBuf*)VA_Fetch(got, 0);
TEST_TRUE(batch,
@@ -72,7 +73,13 @@ test_tokenizer(TestBatch *batch) {
TEST_TRUE(batch,
token
&& CB_Is_A(token, CHARBUF)
- && CB_Equals_Str(token, "\xE0\xB8\x82", 3),
+ && CB_Equals_Str(token, "\xF0\xA0\x80\x80", 4),
+ "Token: %s", CB_Get_Ptr8(token));
+ token = (CharBuf*)VA_Fetch(got, 4);
+ TEST_TRUE(batch,
+ token
+ && CB_Is_A(token, CHARBUF)
+ && CB_Equals_Str(token, "a", 1),
"Token: %s", CB_Get_Ptr8(token));
DECREF(got);
DECREF(tokenizer);
@@ -80,7 +87,7 @@ test_tokenizer(TestBatch *batch) {
void
TestStandardTokenizer_run_tests() {
- TestBatch *batch = TestBatch_new(5);
+ TestBatch *batch = TestBatch_new(6);
TestBatch_Plan(batch);