You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2011/12/05 22:01:26 UTC

[lucy-commits] svn commit: r1210626 - in /incubator/lucy/branches/LUCY-196-uax-tokenizer: core/Lucy/Test/Analysis/TestStandardTokenizer.c core/Lucy/Test/Analysis/TestStandardTokenizer.cfh perl/lib/Lucy/Test.pm perl/t/core/158-standard-tokenizer.t

Author: nwellnhof
Date: Mon Dec  5 21:01:26 2011
New Revision: 1210626

URL: http://svn.apache.org/viewvc?rev=1210626&view=rev
Log:
Add tests for StandardTokenizer

Added:
    incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c
    incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.cfh
    incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/t/core/158-standard-tokenizer.t
Modified:
    incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/lib/Lucy/Test.pm

Added: incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c?rev=1210626&view=auto
==============================================================================
--- incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c (added)
+++ incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.c Mon Dec  5 21:01:26 2011
@@ -0,0 +1,93 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_LUCY_TESTSTANDARDTOKENIZER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Analysis/TestStandardTokenizer.h"
+#include "Lucy/Analysis/StandardTokenizer.h"
+
+
+static void
+test_Dump_Load_and_Equals(TestBatch *batch) {
+    StandardTokenizer *tokenizer = StandardTokenizer_new();
+    Obj *dump  = StandardTokenizer_Dump(tokenizer);
+    StandardTokenizer *clone = (StandardTokenizer*)StandardTokenizer_Load(tokenizer, dump);
+
+    TEST_TRUE(batch,
+              StandardTokenizer_Equals(tokenizer, (Obj*)clone),
+              "Dump => Load round trip");
+
+    DECREF(tokenizer);
+    DECREF(dump);
+    DECREF(clone);
+}
+
+static void
+test_tokenizer(TestBatch *batch) {
+    StandardTokenizer *tokenizer = StandardTokenizer_new();
+    ZombieCharBuf *word = ZCB_WRAP_STR(
+        " ."
+        "tha\xCC\x82t's"
+        ":"
+        "1,02\xC2\xADZ4.38"
+        "\xE0\xB8\x81\xC2\xAD\xC2\xAD"
+        "\xE0\xB8\x82"
+        "/",
+        33);
+    VArray *got = StandardTokenizer_Split(tokenizer, (CharBuf*)word);
+    CharBuf *token = (CharBuf*)VA_Fetch(got, 0);
+    TEST_TRUE(batch,
+              token
+              && CB_Is_A(token, CHARBUF)
+              && CB_Equals_Str(token, "tha\xcc\x82t's", 8),
+              "Token: %s", CB_Get_Ptr8(token));
+    token = (CharBuf*)VA_Fetch(got, 1);
+    TEST_TRUE(batch,
+              token
+              && CB_Is_A(token, CHARBUF)
+              && CB_Equals_Str(token, "1,02\xC2\xADZ4.38", 11),
+              "Token: %s", CB_Get_Ptr8(token));
+    token = (CharBuf*)VA_Fetch(got, 2);
+    TEST_TRUE(batch,
+              token
+              && CB_Is_A(token, CHARBUF)
+              && CB_Equals_Str(token, "\xE0\xB8\x81\xC2\xAD\xC2\xAD", 7),
+              "Token: %s", CB_Get_Ptr8(token));
+    token = (CharBuf*)VA_Fetch(got, 3);
+    TEST_TRUE(batch,
+              token
+              && CB_Is_A(token, CHARBUF)
+              && CB_Equals_Str(token, "\xE0\xB8\x82", 3),
+              "Token: %s", CB_Get_Ptr8(token));
+    DECREF(got);
+    DECREF(tokenizer);
+}
+
+void
+TestStandardTokenizer_run_tests() {
+    TestBatch *batch = TestBatch_new(5);
+
+    TestBatch_Plan(batch);
+
+    test_Dump_Load_and_Equals(batch);
+    test_tokenizer(batch);
+
+    DECREF(batch);
+}
+
+

Added: incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.cfh?rev=1210626&view=auto
==============================================================================
--- incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.cfh (added)
+++ incubator/lucy/branches/LUCY-196-uax-tokenizer/core/Lucy/Test/Analysis/TestStandardTokenizer.cfh Mon Dec  5 21:01:26 2011
@@ -0,0 +1,24 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+inert class Lucy::Test::Analysis::TestStandardTokenizer {
+    inert void
+    run_tests();
+}
+
+

Modified: incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/lib/Lucy/Test.pm?rev=1210626&r1=1210625&r2=1210626&view=diff
==============================================================================
--- incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/lib/Lucy/Test.pm Mon Dec  5 21:01:26 2011
@@ -78,6 +78,9 @@ PPCODE:
     else if (strEQ(package, "TestRegexTokenizer")) {
         lucy_TestRegexTokenizer_run_tests();
     }
+    else if (strEQ(package, "TestStandardTokenizer")) {
+        lucy_TestStandardTokenizer_run_tests();
+    }
     // Lucy::Object
     else if (strEQ(package, "TestObj")) {
         lucy_TestObj_run_tests();

Added: incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/t/core/158-standard-tokenizer.t
URL: http://svn.apache.org/viewvc/incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/t/core/158-standard-tokenizer.t?rev=1210626&view=auto
==============================================================================
--- incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/t/core/158-standard-tokenizer.t (added)
+++ incubator/lucy/branches/LUCY-196-uax-tokenizer/perl/t/core/158-standard-tokenizer.t Mon Dec  5 21:01:26 2011
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestStandardTokenizer");
+