You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/06/11 01:54:53 UTC
svn commit: r953505 - in /lucene/lucy/trunk: core/Lucy/Plan/TextType.bp
core/Lucy/Plan/TextType.c core/Lucy/Test/Plan/TestTextType.bp
core/Lucy/Test/Plan/TestTextType.c perl/lib/Lucy/Test.pm
perl/t/core/230-text_type.t
Author: marvin
Date: Thu Jun 10 23:54:52 2010
New Revision: 953505
URL: http://svn.apache.org/viewvc?rev=953505&view=rev
Log:
Add Lucy::Plan::TextType. (LUCY-106)
Added:
lucene/lucy/trunk/core/Lucy/Plan/TextType.bp (with props)
lucene/lucy/trunk/core/Lucy/Plan/TextType.c (with props)
lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp (with props)
lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c (with props)
lucene/lucy/trunk/perl/t/core/230-text_type.t (with props)
Modified:
lucene/lucy/trunk/perl/lib/Lucy/Test.pm
Added: lucene/lucy/trunk/core/Lucy/Plan/TextType.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Plan/TextType.bp?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Plan/TextType.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Plan/TextType.bp Thu Jun 10 23:54:52 2010
@@ -0,0 +1,97 @@
+parcel Lucy;
+
+/** Text field type.
+ *
+ * Lucy::Plan::TextType is an implementation of
+ * L<Lucy::Plan::FieldType> tuned for unicode string values.
+ *
+ * It is typical to use TextType fields with an
+ * L<Analyzer|Lucy::Analysis::Analyzer> to tokenize and normalize the
+ * text so that it may be searched for individual "words". If no Analyzer is
+ * supplied to TextType's constructor, the complete field value will be
+ * treated as a single exact-match token.
+ */
+class Lucy::Plan::TextType extends Lucy::Plan::FieldType
+ : dumpable {
+
+ bool_t highlightable;
+ Analyzer *analyzer;
+
+ /**
+ * @param analyzer An Analyzer, which will optionally be used to tokenize
+ * and normalize field values.
+ * @param similarity A Similarity.
+ * @param boost floating point per-field boost.
+ * @param indexed boolean indicating whether the field should be indexed.
+ * @param stored boolean indicating whether the field should be stored.
+ * @param sortable boolean indicating whether the field should be sortable.
+ * @param highlightable boolean indicating whether the field should be
+ * highlightable.
+ */
+ public inert TextType*
+ init(TextType *self, Analyzer *analyzer = NULL,
+ Similarity *similarity = NULL);
+
+ inert TextType*
+ init2(TextType *self, Analyzer *analyzer = NULL,
+ Similarity *similarity = NULL, float boost = 1.0,
+ bool_t indexed = true, bool_t stored = true,
+ bool_t sortable = false, bool_t highlightable = false);
+
+ public inert incremented TextType*
+ new(Analyzer *analyzer, Similarity *similarity = NULL);
+
+ public void
+ Destroy(TextType *self);
+
+ /** Indicate whether to store data required by
+ * L<Lucy::Highlight::Highlighter> for excerpt selection and search
+ * term highlighting.
+ */
+ public void
+ Set_Highlightable(TextType *self, bool_t highlightable);
+
+ /** Accessor for "highlightable" property.
+ */
+ public bool_t
+ Highlightable(TextType *self);
+
+ /** Accessor for "analyzer" member.
+ */
+ public nullable Analyzer*
+ Get_Analyzer(TextType *self);
+
+ incremented CharBuf*
+ Make_Blank(TextType *self);
+
+ uint8_t
+ Scalar_Type_ID(TextType *self);
+
+ incremented Hash*
+ Dump_For_Schema(TextType *self);
+
+ public incremented Hash*
+ Dump(TextType *self);
+
+ public incremented TextType*
+ Load(TextType *self, Obj *dump);
+
+ public bool_t
+ Equals(TextType *self, Obj *other);
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Plan/TextType.bp
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Plan/TextType.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Plan/TextType.c?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Plan/TextType.c (added)
+++ lucene/lucy/trunk/core/Lucy/Plan/TextType.c Thu Jun 10 23:54:52 2010
@@ -0,0 +1,220 @@
+#define C_LUCY_TEXTTYPE
+#define C_LUCY_TEXTTERMSTEPPER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Plan/TextType.h"
+#include "Lucy/Analysis/Analyzer.h"
+#include "Lucy/Index/Similarity.h"
+#include "Lucy/Index/Similarity/LuceneSimilarity.h"
+#include "Lucy/Store/InStream.h"
+#include "Lucy/Store/OutStream.h"
+#include "Lucy/Util/StringHelper.h"
+
+TextType*
+TextType_new(Analyzer *analyzer, Similarity *similarity)
+{
+ TextType *self = (TextType*)VTable_Make_Obj(TEXTTYPE);
+ return TextType_init(self, analyzer, similarity);
+}
+
+TextType*
+TextType_init(TextType *self, Analyzer *analyzer, Similarity *similarity)
+{
+ return TextType_init2(self, analyzer, similarity, 1.0, true, true,
+ false, false);
+}
+
+TextType*
+TextType_init2(TextType *self, Analyzer *analyzer, Similarity *similarity,
+ float boost, bool_t indexed, bool_t stored, bool_t sortable,
+ bool_t highlightable)
+{
+ FType_init2((FieldType*)self, similarity, boost, indexed, stored,
+ sortable);
+ self->highlightable = highlightable;
+ self->analyzer = (Analyzer*)INCREF(analyzer);
+ return self;
+}
+
+void
+TextType_destroy(TextType *self)
+{
+ DECREF(self->analyzer);
+ SUPER_DESTROY(self, TEXTTYPE);
+}
+
+void
+TextType_set_highlightable(TextType *self, bool_t highlightable)
+{
+ self->highlightable = highlightable;
+}
+
+Analyzer*
+TextType_get_analyzer(TextType *self) { return self->analyzer; }
+bool_t
+TextType_highlightable(TextType *self) { return self->highlightable; }
+
+CharBuf*
+TextType_make_blank(TextType *self)
+{
+ UNUSED_VAR(self);
+ return CB_new(0);
+}
+
+uint8_t
+TextType_scalar_type_id(TextType *self)
+{
+ UNUSED_VAR(self);
+ return Obj_TEXT;
+}
+
+bool_t
+TextType_equals(TextType *self, Obj *other)
+{
+ TextType *evil_twin = (TextType*)other;
+ TextType_equals_t super_equals = (TextType_equals_t)SUPER_METHOD(
+ TEXTTYPE, TextType, Equals);
+ if (!other) return false;
+ if (evil_twin == self) return true;
+ if (!Obj_Is_A(other, TEXTTYPE)) return false;
+ if (!super_equals(self, other)) return false;
+ if (!!self->highlightable != !!evil_twin->highlightable) return false;
+ if (!!self->analyzer != !!evil_twin->analyzer) return false;
+ if (self->analyzer) {
+ if (!Analyzer_Equals(self->analyzer, (Obj*)evil_twin->analyzer)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Hash*
+TextType_dump_for_schema(TextType *self)
+{
+ Hash *dump = Hash_new(0);
+ Hash_Store_Str(dump, "type", 4, (Obj*)CB_newf("text"));
+
+ // Store attributes that override the defaults.
+ if (self->boost != 1.0) {
+ Hash_Store_Str(dump, "boost", 5, (Obj*)CB_newf("%f64", self->boost));
+ }
+ if (!self->indexed) {
+ Hash_Store_Str(dump, "indexed", 7, (Obj*)CB_newf("0"));
+ }
+ if (!self->stored) {
+ Hash_Store_Str(dump, "stored", 6, (Obj*)CB_newf("0"));
+ }
+ if (self->sortable) {
+ Hash_Store_Str(dump, "sortable", 8, (Obj*)CB_newf("1"));
+ }
+ if (self->highlightable) {
+ Hash_Store_Str(dump, "highlightable", 13, (Obj*)CB_newf("1"));
+ }
+
+ return dump;
+}
+
+Hash*
+TextType_dump(TextType *self)
+{
+ Hash *dump = TextType_Dump_For_Schema(self);
+ Hash_Store_Str(dump, "_class", 6,
+ (Obj*)CB_Clone(TextType_Get_Class_Name(self)));
+ if (self->analyzer) {
+ Hash_Store_Str(dump, "analyzer", 8,
+ (Obj*)Analyzer_Dump(self->analyzer));
+ }
+ Hash_Store_Str(dump, "similarity", 10, (Obj*)Sim_Dump(self->sim));
+ DECREF(Hash_Delete_Str(dump, "type", 4));
+ return dump;
+}
+
+TextType*
+TextType_load(TextType *self, Obj *dump)
+{
+ UNUSED_VAR(self);
+ Hash *source = (Hash*)CERTIFY(dump, HASH);
+ CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6);
+ CharBuf *type_str = (CharBuf*)Hash_Fetch_Str(source, "type", 4);
+ VTable *vtable = NULL;
+ if (class_name && Obj_Is_A((Obj*)class_name, CHARBUF)) {
+ vtable = VTable_singleton(class_name, NULL);
+ }
+ else if (type_str && Obj_Is_A((Obj*)type_str, CHARBUF)) {
+ if ( CB_Equals_Str(type_str, "text", 4)
+ || CB_Equals_Str(type_str, "fulltext", 8)
+ || CB_Equals_Str(type_str, "string", 6)
+ ) {
+ vtable = TEXTTYPE;
+ }
+ }
+ if (!vtable) {
+ THROW(ERR, "Unknown class or type");
+ }
+ TextType *loaded = (TextType*)VTable_Make_Obj(vtable);
+
+ // Extract boost.
+ Obj *boost_dump = Hash_Fetch_Str(source, "boost", 5);
+ float boost = boost_dump ? (float)Obj_To_F64(boost_dump) : 1.0f;
+
+ // Find boolean properties.
+ Obj *indexed_dump = Hash_Fetch_Str(source, "indexed", 7);
+ Obj *stored_dump = Hash_Fetch_Str(source, "stored", 6);
+ Obj *sort_dump = Hash_Fetch_Str(source, "sortable", 8);
+ Obj *hl_dump = Hash_Fetch_Str(source, "highlightable", 13);
+ bool_t indexed = indexed_dump ? (bool_t)Obj_To_I64(indexed_dump) : true;
+ bool_t stored = stored_dump ? (bool_t)Obj_To_I64(stored_dump) : true;
+ bool_t sortable = sort_dump ? (bool_t)Obj_To_I64(sort_dump) : false;
+ bool_t hl = hl_dump ? (bool_t)Obj_To_I64(hl_dump) : false;
+
+ // Extract an Analyzer.
+ Obj *analyzer_dump = Hash_Fetch_Str(source, "analyzer", 8);
+ Analyzer *analyzer = NULL;
+ if (analyzer_dump) {
+ if (Obj_Is_A(analyzer_dump, ANALYZER)) {
+ // Schema munged the dump and installed a shared analyzer.
+ analyzer = (Analyzer*)INCREF(analyzer_dump);
+ }
+ else {
+ analyzer = (Analyzer*)Obj_Load(analyzer_dump, analyzer_dump);
+ }
+ CERTIFY(analyzer, ANALYZER);
+ }
+
+ // Extract a Similarity.
+ Similarity *sim = NULL;
+ Obj *sim_dump = Hash_Fetch_Str(source, "similarity", 10);
+ if (sim_dump) {
+ if (Obj_Is_A(sim_dump, SIMILARITY)) {
+ // Schema munged the dump and installed a shared sim.
+ sim = (Similarity*)INCREF(sim_dump);
+ }
+ else {
+ sim = (Similarity*)CERTIFY(
+ Obj_Load(sim_dump, sim_dump), SIMILARITY);
+ }
+ }
+
+ TextType_init2(loaded, analyzer, sim, boost, indexed, stored,
+ sortable, hl);
+
+ DECREF(sim);
+ DECREF(analyzer);
+ return loaded;
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Plan/TextType.c
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp Thu Jun 10 23:54:52 2010
@@ -0,0 +1,22 @@
+parcel Lucy;
+
+inert class Lucy::Test::Plan::TestTextType {
+ inert void
+ run_tests();
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.bp
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c Thu Jun 10 23:54:52 2010
@@ -0,0 +1,104 @@
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Plan/TestTextType.h"
+#include "Lucy/Test/Plan/TestFieldType.h"
+#include "Lucy/Test/Index/Similarity/DummySimilarity.h"
+#include "Lucy/Plan/TextType.h"
+#include "Lucy/Test/Analysis/DummyAnalyzer.h"
+
+static void
+test_Equals(TestBatch *batch)
+{
+ Analyzer *analyzer = (Analyzer*)DummyAnalyzer_new(1);
+ TextType *type = TextType_new(analyzer, NULL);
+
+ ASSERT_FALSE(batch, TextType_Equals(type, NULL),
+ "Equals() false with NULL");
+
+ FieldType *dummy = (FieldType*)DummyFieldType_new(NULL);
+ ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)dummy),
+ "Equals() false with non-TextType");
+ DECREF(dummy);
+
+ Analyzer *alt_analyzer = (Analyzer*)DummyAnalyzer_new(2);
+ TextType *analyzer_differs = TextType_new(alt_analyzer, NULL);
+ ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)analyzer_differs),
+ "Equals() false with different Analyzer");
+ DECREF(analyzer_differs);
+ DECREF(alt_analyzer);
+
+ TextType *hl_differs = TextType_new(analyzer, NULL);
+ TextType_Set_Highlightable(hl_differs, true);
+ ASSERT_FALSE(batch, TextType_Equals(type, (Obj*)hl_differs),
+ "Equals() false with highlightable => true");
+ DECREF(hl_differs);
+
+ DECREF(type);
+ DECREF(analyzer);
+}
+
+static void
+test_Dump_and_Load(TestBatch *batch)
+{
+ Analyzer *analyzer = (Analyzer*)DummyAnalyzer_new(1);
+ Similarity *sim = (Similarity*)DummySim_new(1);
+ TextType *type = TextType_new(analyzer, sim);
+
+ // Set all settings to their non-defaults so that Dump must catch them.
+ TextType_Set_Highlightable(type, true);
+ TextType_Set_Indexed(type, false);
+ TextType_Set_Stored(type, false);
+ TextType_Set_Sortable(type, true);
+
+ {
+ Obj *dump = (Obj*)TextType_Dump(type);
+ Obj *other = Obj_Load(dump, dump);
+ ASSERT_TRUE(batch, TextType_Equals(type, other),
+ "Dump => Load round trip");
+ DECREF(dump);
+ DECREF(other);
+ }
+
+ {
+ Obj *dump = (Obj*)TextType_Dump_For_Schema(type);
+ // (These steps are normally performed by Schema_Load() internally.)
+ Hash_Store_Str((Hash*)dump, "analyzer", 8, INCREF(analyzer));
+ Hash_Store_Str((Hash*)dump, "similarity", 10, INCREF(sim));
+ TextType *other = (TextType*)TextType_load(NULL, dump);
+ ASSERT_TRUE(batch, TextType_Equals(type, (Obj*)other),
+ "Dump_For_Schema => Load round trip");
+ DECREF(dump);
+ DECREF(other);
+ }
+
+ DECREF(type);
+ DECREF(sim);
+ DECREF(analyzer);
+}
+
+void
+TestTextType_run_tests()
+{
+ TestBatch *batch = TestBatch_new(6);
+ TestBatch_Plan(batch);
+ test_Equals(batch);
+ test_Dump_and_Load(batch);
+ DECREF(batch);
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Test/Plan/TestTextType.c
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Test.pm?rev=953505&r1=953504&r2=953505&view=diff
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ lucene/lucy/trunk/perl/lib/Lucy/Test.pm Thu Jun 10 23:54:52 2010
@@ -53,6 +53,9 @@ PPCODE:
else if (strEQ(package, "TestFieldType")) {
lucy_TestFType_run_tests();
}
+ else if (strEQ(package, "TestTextType")) {
+ lucy_TestTextType_run_tests();
+ }
// Lucy::Store
else if (strEQ(package, "TestCompoundFileReader")) {
lucy_TestCFReader_run_tests();
Added: lucene/lucy/trunk/perl/t/core/230-text_type.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/core/230-text_type.t?rev=953505&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/core/230-text_type.t (added)
+++ lucene/lucy/trunk/perl/t/core/230-text_type.t Thu Jun 10 23:54:52 2010
@@ -0,0 +1,6 @@
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestTextType");
+
Propchange: lucene/lucy/trunk/perl/t/core/230-text_type.t
------------------------------------------------------------------------------
svn:eol-style = native