You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ka...@apache.org on 2017/02/24 02:33:59 UTC

lucy git commit: Add test showing custom analyzer

Repository: lucy
Updated Branches:
  refs/heads/master 040bb290f -> 488d6f4cf


Add test showing custom analyzer

**Why**: User discussion on email list revealed our Analyzer docs
needed bolstering. This test demonstrates the simple custom
analyzer use case.

Ref https://lists.apache.org/thread.html/7e0f446bcccddf8faaffbb4fbb5a5265016a06f736f30757f0ee35af@%3Cuser.lucy.apache.org%3E


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/488d6f4c
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/488d6f4c
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/488d6f4c

Branch: refs/heads/master
Commit: 488d6f4cfbf23faf79c52de193fc50de6152236e
Parents: 040bb29
Author: Peter Karman <ka...@peknet.com>
Authored: Thu Feb 23 20:30:51 2017 -0600
Committer: Peter Karman <ka...@peknet.com>
Committed: Thu Feb 23 20:30:51 2017 -0600

----------------------------------------------------------------------
 perl/t/240-custom-analyzer.t | 69 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/488d6f4c/perl/t/240-custom-analyzer.t
----------------------------------------------------------------------
diff --git a/perl/t/240-custom-analyzer.t b/perl/t/240-custom-analyzer.t
new file mode 100644
index 0000000..bdda0c8
--- /dev/null
+++ b/perl/t/240-custom-analyzer.t
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Test::More tests => 3;
+use Lucy;
+
+package TestAnalyzer;
+use base qw( Lucy::Analysis::Analyzer );
+sub transform { $_[1] }
+sub equals { $_[1]->isa(__PACKAGE__) }
+
+package main;
+use Encode qw( _utf8_on );
+use Lucy::Test;
+
+sub new_schema {
+    my $schema     = Lucy::Plan::Schema->new;
+    my $analyzer   = TestAnalyzer->new;
+    my $fulltext   = Lucy::Plan::FullTextType->new( analyzer => $analyzer );
+    my $not_stored = Lucy::Plan::FullTextType->new(
+        analyzer => $analyzer,
+        stored   => 0,
+    );
+    $schema->spec_field( name => 'text',     type => $fulltext );
+    $schema->spec_field( name => 'unstored', type => $not_stored );
+    $schema->spec_field( name => 'empty',    type => $fulltext );
+    return $schema;
+}
+
+# This valid UTF-8 string includes skull and crossbones, null byte -- however,
+# the binary value is not flagged as UTF-8.
+my $bin_val = my $val = "a b c \xe2\x98\xA0 \0a";
+_utf8_on($val);
+
+my $folder = Lucy::Store::RAMFolder->new;
+
+for my $try ( ( 1 .. 3 ) ) {
+    my $schema = new_schema();
+
+    ok( my $indexer = Lucy::Index::Indexer->new(
+            index  => $folder,
+            schema => $schema,
+            create => 1,
+        ),
+        "create indexer $try"
+    );
+    $indexer->add_doc(
+        {   text     => $val,
+            unstored => $val,
+            empty    => '',
+        }
+    );
+    $indexer->commit;
+}