You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/06/11 01:54:53 UTC

svn commit: r953505 - in /lucene/lucy/trunk: core/Lucy/Plan/TextType.bp core/Lucy/Plan/TextType.c core/Lucy/Test/Plan/TestTextType.bp core/Lucy/Test/Plan/TestTextType.c perl/lib/Lucy/Test.pm perl/t/core/230-text_type.t

Author: marvin
Date: Thu Jun 10 23:54:52 2010
New Revision: 953505

URL: http://svn.apache.org/viewvc?rev=953505&view=rev
Log:
Add Lucy::Plan::TextType. (LUCY-106)

Added:
    lucene/lucy/trunk/core/Lucy/Plan/TextType.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Plan/TextType.c   (with props)
    lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c   (with props)
    lucene/lucy/trunk/perl/t/core/230-text_type.t   (with props)
Modified:
    lucene/lucy/trunk/perl/lib/Lucy/Test.pm

Added: lucene/lucy/trunk/core/Lucy/Plan/TextType.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Plan/TextType.bp?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Plan/TextType.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Plan/TextType.bp Thu Jun 10 23:54:52 2010
@@ -0,0 +1,97 @@
+parcel Lucy;
+
+/** Text field type.
+ * 
+ * Lucy::Plan::TextType is an implementation of
+ * L<Lucy::Plan::FieldType> tuned for unicode string values.
+ *
+ * It is typical to use TextType fields with an
+ * L<Analyzer|Lucy::Analysis::Analyzer> to tokenize and normalize the
+ * text so that it may be searched for individual "words".  If no Analyzer is
+ * supplied to TextType's constructor, the complete field value will be
+ * treated as a single exact-match token.
+ */
+class Lucy::Plan::TextType extends Lucy::Plan::FieldType 
+    : dumpable {
+
+    bool_t      highlightable;
+    Analyzer   *analyzer;
+
+    /** 
+     * @param analyzer An Analyzer, which will optionally be used to tokenize
+     * and normalize field values.
+     * @param similarity A Similarity.
+     * @param boost floating point per-field boost.
+     * @param indexed boolean indicating whether the field should be indexed.
+     * @param stored boolean indicating whether the field should be stored.
+     * @param sortable boolean indicating whether the field should be sortable.
+     * @param highlightable boolean indicating whether the field should be
+     * highlightable.
+     */
+    public inert TextType* 
+    init(TextType *self, Analyzer *analyzer = NULL, 
+         Similarity *similarity = NULL);
+
+    inert TextType* 
+    init2(TextType *self, Analyzer *analyzer = NULL, 
+          Similarity *similarity = NULL, float boost = 1.0,
+          bool_t indexed = true, bool_t stored = true, 
+          bool_t sortable = false, bool_t highlightable = false);
+
+    public inert incremented TextType* 
+    new(Analyzer *analyzer, Similarity *similarity = NULL);
+
+    public void
+    Destroy(TextType *self);
+
+    /** Indicate whether to store data required by
+     * L<Lucy::Highlight::Highlighter> for excerpt selection and search
+     * term highlighting. 
+     */ 
+    public void
+    Set_Highlightable(TextType *self, bool_t highlightable);
+
+    /** Accessor for "highlightable" property. 
+     */
+    public bool_t
+    Highlightable(TextType *self);
+
+    /** Accessor for "analyzer" member. 
+     */
+    public nullable Analyzer*
+    Get_Analyzer(TextType *self);
+
+    incremented CharBuf*
+    Make_Blank(TextType *self);
+
+    uint8_t
+    Scalar_Type_ID(TextType *self);
+
+    incremented Hash*
+    Dump_For_Schema(TextType *self);
+
+    public incremented Hash*
+    Dump(TextType *self);
+
+    public incremented TextType*
+    Load(TextType *self, Obj *dump);
+
+    public bool_t
+    Equals(TextType *self, Obj *other);
+} 
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Plan/TextType.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Plan/TextType.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Plan/TextType.c?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Plan/TextType.c (added)
+++ lucene/lucy/trunk/core/Lucy/Plan/TextType.c Thu Jun 10 23:54:52 2010
@@ -0,0 +1,220 @@
+#define C_LUCY_TEXTTYPE
+#define C_LUCY_TEXTTERMSTEPPER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Plan/TextType.h"
+#include "Lucy/Analysis/Analyzer.h"
+#include "Lucy/Index/Similarity.h"
+#include "Lucy/Index/Similarity/LuceneSimilarity.h"
+#include "Lucy/Store/InStream.h"
+#include "Lucy/Store/OutStream.h"
+#include "Lucy/Util/StringHelper.h"
+
+TextType*
+TextType_new(Analyzer *analyzer, Similarity *similarity)
+{
+    TextType *self = (TextType*)VTable_Make_Obj(TEXTTYPE);
+    return TextType_init(self, analyzer, similarity);
+}
+
+TextType*
+TextType_init(TextType *self, Analyzer *analyzer, Similarity *similarity)
+{
+    return TextType_init2(self, analyzer, similarity, 1.0, true, true, 
+        false, false);
+}
+
+TextType*
+TextType_init2(TextType *self, Analyzer *analyzer, Similarity *similarity, 
+               float boost, bool_t indexed, bool_t stored, bool_t sortable,
+               bool_t highlightable)
+{
+    FType_init2((FieldType*)self, similarity, boost, indexed, stored,
+        sortable);
+    self->highlightable = highlightable;
+    self->analyzer      = (Analyzer*)INCREF(analyzer);
+    return self;
+}
+
+void
+TextType_destroy(TextType *self)
+{
+    DECREF(self->analyzer);
+    SUPER_DESTROY(self, TEXTTYPE);
+}
+
+void
+TextType_set_highlightable(TextType *self, bool_t highlightable)
+{ 
+    self->highlightable = highlightable; 
+}
+
+Analyzer*
+TextType_get_analyzer(TextType *self)  { return self->analyzer; }
+bool_t
+TextType_highlightable(TextType *self) { return self->highlightable; }
+
+CharBuf*
+TextType_make_blank(TextType *self)
+{
+    UNUSED_VAR(self);
+    return CB_new(0);
+}
+
+uint8_t
+TextType_scalar_type_id(TextType *self)
+{
+    UNUSED_VAR(self);
+    return Obj_TEXT;
+}
+
+bool_t
+TextType_equals(TextType *self, Obj *other)
+{
+    TextType *evil_twin = (TextType*)other;
+    TextType_equals_t super_equals = (TextType_equals_t)SUPER_METHOD(
+        TEXTTYPE, TextType, Equals);
+    if (!other) return false; 
+    if (evil_twin == self) return true;
+    if (!Obj_Is_A(other, TEXTTYPE)) return false;
+    if (!super_equals(self, other)) return false;
+    if (!!self->highlightable != !!evil_twin->highlightable) return false;
+    if (!!self->analyzer != !!evil_twin->analyzer) return false;
+    if (self->analyzer) {
+        if (!Analyzer_Equals(self->analyzer, (Obj*)evil_twin->analyzer)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+Hash*
+TextType_dump_for_schema(TextType *self) 
+{
+    Hash *dump = Hash_new(0);
+    Hash_Store_Str(dump, "type", 4, (Obj*)CB_newf("text"));
+
+    // Store attributes that override the defaults. 
+    if (self->boost != 1.0) {
+        Hash_Store_Str(dump, "boost", 5, (Obj*)CB_newf("%f64", self->boost));
+    }
+    if (!self->indexed) {
+        Hash_Store_Str(dump, "indexed", 7, (Obj*)CB_newf("0"));
+    }
+    if (!self->stored) {
+        Hash_Store_Str(dump, "stored", 6, (Obj*)CB_newf("0"));
+    }
+    if (self->sortable) {
+        Hash_Store_Str(dump, "sortable", 8, (Obj*)CB_newf("1"));
+    }
+    if (self->highlightable) {
+        Hash_Store_Str(dump, "highlightable", 13, (Obj*)CB_newf("1"));
+    }
+
+    return dump;
+}
+
+Hash*
+TextType_dump(TextType *self)
+{
+    Hash *dump = TextType_Dump_For_Schema(self);
+    Hash_Store_Str(dump, "_class", 6, 
+        (Obj*)CB_Clone(TextType_Get_Class_Name(self)));
+    if (self->analyzer) {
+        Hash_Store_Str(dump, "analyzer", 8, 
+            (Obj*)Analyzer_Dump(self->analyzer));
+    }
+    Hash_Store_Str(dump, "similarity", 10, (Obj*)Sim_Dump(self->sim));
+    DECREF(Hash_Delete_Str(dump, "type", 4));
+    return dump;
+}
+
+TextType*
+TextType_load(TextType *self, Obj *dump)
+{
+    UNUSED_VAR(self);
+    Hash *source = (Hash*)CERTIFY(dump, HASH);
+    CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
+    CharBuf *type_str   = (CharBuf*)Hash_Fetch_Str(source, "type", 4);
+    VTable *vtable = NULL;
+    if (class_name && Obj_Is_A((Obj*)class_name, CHARBUF)) { 
+         vtable = VTable_singleton(class_name, NULL);
+    }
+    else if (type_str && Obj_Is_A((Obj*)type_str, CHARBUF)) { 
+        if (   CB_Equals_Str(type_str, "text", 4)
+            || CB_Equals_Str(type_str, "fulltext", 8)
+            || CB_Equals_Str(type_str, "string", 6)
+        ) {
+            vtable = TEXTTYPE;
+        }
+    }
+    if (!vtable) {
+        THROW(ERR, "Unknown class or type");
+    }
+    TextType *loaded = (TextType*)VTable_Make_Obj(vtable);
+
+    // Extract boost.
+    Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5);
+    float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f;
+
+    // Find boolean properties.
+    Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7);
+    Obj *stored_dump  = Hash_Fetch_Str(source, "stored", 6);
+    Obj *sort_dump    = Hash_Fetch_Str(source, "sortable", 8);
+    Obj *hl_dump      = Hash_Fetch_Str(source, "highlightable", 13);
+    bool_t indexed  = indexed_dump ? (bool_t)Obj_To_I64(indexed_dump) : true;
+    bool_t stored   = stored_dump  ? (bool_t)Obj_To_I64(stored_dump)  : true;
+    bool_t sortable = sort_dump    ? (bool_t)Obj_To_I64(sort_dump)    : false;
+    bool_t hl       = hl_dump      ? (bool_t)Obj_To_I64(hl_dump)      : false;
+
+    // Extract an Analyzer.  
+    Obj *analyzer_dump = Hash_Fetch_Str(source, "analyzer", 8);
+    Analyzer *analyzer = NULL;
+    if (analyzer_dump) {
+        if (Obj_Is_A(analyzer_dump, ANALYZER)) {
+            // Schema munged the dump and installed a shared analyzer.
+            analyzer = (Analyzer*)INCREF(analyzer_dump);
+        }
+        else {
+            analyzer = (Analyzer*)Obj_Load(analyzer_dump, analyzer_dump);
+        }
+        CERTIFY(analyzer, ANALYZER);
+    }
+
+    // Extract a Similarity.
+    Similarity *sim = NULL;
+    Obj *sim_dump = Hash_Fetch_Str(source, "similarity", 10);
+    if (sim_dump) {
+        if (Obj_Is_A(sim_dump, SIMILARITY)) {
+            // Schema munged the dump and installed a shared sim.
+            sim = (Similarity*)INCREF(sim_dump);
+        }
+        else {
+            sim = (Similarity*)CERTIFY(
+                Obj_Load(sim_dump, sim_dump), SIMILARITY);
+        }
+    }
+
+    TextType_init2(loaded, analyzer, sim, boost, indexed, stored,
+        sortable, hl);
+
+    DECREF(sim);
+    DECREF(analyzer);
+    return loaded;
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Plan/TextType.c
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp Thu Jun 10 23:54:52 2010
@@ -0,0 +1,22 @@
+parcel Lucy;
+
+inert class Lucy::Test::Plan::TestTextType {
+    inert void
+    run_tests();
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c Thu Jun 10 23:54:52 2010
@@ -0,0 +1,104 @@
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Plan/TestTextType.h"
+#include "Lucy/Test/Plan/TestFieldType.h"
+#include "Lucy/Test/Index/Similarity/DummySimilarity.h"
+#include "Lucy/Plan/TextType.h"
+#include "Lucy/Test/Analysis/DummyAnalyzer.h"
+
+static void
+test_Equals(TestBatch *batch)
+{
+    Analyzer *analyzer = (Analyzer*)DummyAnalyzer_new(1);
+    TextType *type     = TextType_new(analyzer, NULL);
+
+    ASSERT_FALSE(batch, TextType_Equals(type, NULL),
+        "Equals() false with NULL");
+
+    FieldType *dummy = (FieldType*)DummyFieldType_new(NULL);
+    ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)dummy),
+        "Equals() false with non-TextType");
+    DECREF(dummy);
+
+    Analyzer *alt_analyzer = (Analyzer*)DummyAnalyzer_new(2);
+    TextType *analyzer_differs = TextType_new(alt_analyzer, NULL);
+    ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)analyzer_differs),
+        "Equals() false with different Analyzer");
+    DECREF(analyzer_differs);
+    DECREF(alt_analyzer);
+
+    TextType *hl_differs = TextType_new(analyzer, NULL);
+    TextType_Set_Highlightable(hl_differs, true);
+    ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)hl_differs),
+        "Equals() false with highlightable => true");
+    DECREF(hl_differs);
+
+    DECREF(type);
+    DECREF(analyzer);
+}
+
+static void
+test_Dump_and_Load(TestBatch *batch)
+{
+    Analyzer   *analyzer = (Analyzer*)DummyAnalyzer_new(1);
+    Similarity *sim      = (Similarity*)DummySim_new(1);
+    TextType   *type     = TextType_new(analyzer, sim);
+
+    // Set all settings to their non-defaults so that Dump must catch them.
+    TextType_Set_Highlightable(type, true);
+    TextType_Set_Indexed(type, false);
+    TextType_Set_Stored(type, false);
+    TextType_Set_Sortable(type, true);
+
+    {
+        Obj *dump  = (Obj*)TextType_Dump(type);
+        Obj *other = Obj_Load(dump, dump);
+        ASSERT_TRUE(batch, TextType_Equals(type, other), 
+            "Dump => Load round trip");
+        DECREF(dump);
+        DECREF(other);
+    }
+
+    {
+        Obj *dump = (Obj*)TextType_Dump_For_Schema(type);
+        // (These steps are normally performed by Schema_Load() internally.) 
+        Hash_Store_Str((Hash*)dump, "analyzer", 8, INCREF(analyzer));
+        Hash_Store_Str((Hash*)dump, "similarity", 10, INCREF(sim));
+        TextType *other = (TextType*)TextType_load(NULL, dump);
+        ASSERT_TRUE(batch, TextType_Equals(type, (Obj*)other), 
+            "Dump_For_Schema => Load round trip");
+        DECREF(dump);
+        DECREF(other);
+    }
+
+    DECREF(type);
+    DECREF(sim);
+    DECREF(analyzer);
+}
+
+void
+TestTextType_run_tests()
+{
+    TestBatch *batch = TestBatch_new(6);
+    TestBatch_Plan(batch);
+    test_Equals(batch);
+    test_Dump_and_Load(batch);
+    DECREF(batch);
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Test.pm?rev=953505&r1=953504&r2=953505&view=diff
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ lucene/lucy/trunk/perl/lib/Lucy/Test.pm Thu Jun 10 23:54:52 2010
@@ -53,6 +53,9 @@ PPCODE:
     else if (strEQ(package, "TestFieldType")) {
         lucy_TestFType_run_tests();
     }
+    else if (strEQ(package, "TestTextType")) {
+        lucy_TestTextType_run_tests();
+    }
     // Lucy::Store 
     else if (strEQ(package, "TestCompoundFileReader")) {
         lucy_TestCFReader_run_tests();

Added: lucene/lucy/trunk/perl/t/core/230-text_type.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/core/230-text_type.t?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/core/230-text_type.t (added)
+++ lucene/lucy/trunk/perl/t/core/230-text_type.t Thu Jun 10 23:54:52 2010
@@ -0,0 +1,6 @@
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestTextType");
+

Propchange: lucene/lucy/trunk/perl/t/core/230-text_type.t
------------------------------------------------------------------------------
    svn:eol-style = native