You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2011/03/12 00:35:37 UTC

[lucy-commits] svn commit: r1080795 - in /incubator/lucy/trunk: clownfish/lib/Clownfish/Binding/ core/Lucy/Analysis/ core/Lucy/Test/ core/Lucy/Test/Analysis/ core/Lucy/Test/Plan/ devel/benchmarks/indexers/ perl/lib/ perl/lib/Lucy/ perl/lib/Lucy/Analysis/ perl/lib/Luc...

Author: marvin
Date: Fri Mar 11 23:35:35 2011
New Revision: 1080795

URL: http://svn.apache.org/viewvc?rev=1080795&view=rev
Log:
LUCY-132 -- regex_tokenizer.patch
Rename Lucy::Analysis::Tokenizer to Lucy::Analysis::RegexTokenizer.

Added:
    incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c
      - copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c
    incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh
      - copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh
    incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c
      - copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c
    incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh
      - copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh
    incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm
      - copied, changed from r1078955, incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm
    incubator/lucy/trunk/perl/t/154-regex_tokenizer.t
      - copied, changed from r1078955, incubator/lucy/trunk/perl/t/154-tokenizer.t
    incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t
      - copied, changed from r1078955, incubator/lucy/trunk/perl/t/core/154-tokenizer.t
Removed:
    incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c
    incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh
    incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c
    incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh
    incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm
    incubator/lucy/trunk/perl/t/154-tokenizer.t
    incubator/lucy/trunk/perl/t/core/154-tokenizer.t
Modified:
    incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm
    incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh
    incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c
    incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh
    incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c
    incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c
    incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c
    incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c
    incubator/lucy/trunk/core/Lucy/Test/TestSchema.c
    incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
    incubator/lucy/trunk/perl/lib/Lucy.pm
    incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm
    incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod
    incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod
    incubator/lucy/trunk/perl/lib/Lucy/Test.pm
    incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t
    incubator/lucy/trunk/perl/t/156-snowball_stemmer.t
    incubator/lucy/trunk/perl/t/213-segment_merging.t
    incubator/lucy/trunk/perl/t/214-spec_field.t
    incubator/lucy/trunk/perl/t/215-term_vectors.t
    incubator/lucy/trunk/perl/t/218-del_merging.t
    incubator/lucy/trunk/perl/t/219-byte_buf_doc.t
    incubator/lucy/trunk/perl/t/220-zlib_doc.t
    incubator/lucy/trunk/perl/t/221-sort_writer.t
    incubator/lucy/trunk/perl/t/302-many_fields.t
    incubator/lucy/trunk/perl/t/303-highlighter.t
    incubator/lucy/trunk/perl/t/304-verify_utf8.t
    incubator/lucy/trunk/perl/t/311-hl_selection.t
    incubator/lucy/trunk/perl/t/400-match_posting.t
    incubator/lucy/trunk/perl/t/504-similarity.t
    incubator/lucy/trunk/perl/t/510-remote_search.t
    incubator/lucy/trunk/perl/t/511-sort_spec.t
    incubator/lucy/trunk/perl/t/515-range_query.t
    incubator/lucy/trunk/perl/t/601-queryparser.t
    incubator/lucy/trunk/perl/t/602-boosts.t
    incubator/lucy/trunk/perl/t/604-simple_search.t
    incubator/lucy/trunk/perl/t/605-store_pos_boost.t
    incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t
    incubator/lucy/trunk/perl/t/611-queryparser_syntax.t
    incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c

Modified: incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm (original)
+++ incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm Fri Mar 11 23:35:35 2011
@@ -338,7 +338,7 @@ my %ks_compat = (
         [qw( KinoSearch::Plan::BlobType KinoSearch::FieldType::BlobType )],
     'Lucy::Analysis::PolyAnalyzer' =>
         [qw( KinoSearch::Analysis::PolyAnalyzer )],
-    'Lucy::Analysis::Tokenizer'  => [qw( KinoSearch::Analysis::Tokenizer )],
+    'Lucy::Analysis::RegexTokenizer'  => [qw( KinoSearch::Analysis::Tokenizer )],
     'Lucy::Analysis::CaseFolder' => [
         qw( KinoSearch::Analysis::CaseFolder KinoSearch::Analysis::LCNormalizer )
     ],

Modified: incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh Fri Mar 11 23:35:35 2011
@@ -20,7 +20,7 @@ parcel Lucy;
  *
  * An Analyzer is a filter which processes text, transforming it from one form
  * into another.  For instance, an analyzer might break up a long text into
- * smaller pieces (L<Tokenizer|Lucy::Analysis::Tokenizer>), or it might
+ * smaller pieces (L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>), or it might
  * perform case folding to facilitate case-insensitive search
  * (L<CaseFolder|Lucy::Analysis::CaseFolder>).
  */

Modified: incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c Fri Mar 11 23:35:35 2011
@@ -22,7 +22,7 @@
 #include "Lucy/Analysis/Token.h"
 #include "Lucy/Analysis/Inversion.h"
 #include "Lucy/Analysis/SnowballStemmer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 
 PolyAnalyzer*
 PolyAnalyzer_new(const CharBuf *language, VArray *analyzers)
@@ -45,7 +45,7 @@ PolyAnalyzer_init(PolyAnalyzer *self, co
     else if (language) {
         self->analyzers = VA_new(3);
         VA_Push(self->analyzers, (Obj*)CaseFolder_new());
-        VA_Push(self->analyzers, (Obj*)Tokenizer_new(NULL));
+        VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
         VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
     }
     else {

Modified: incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ parcel Lucy;
  * either provide the Analyzers yourself, or you can specify a supported
  * language, in which case a PolyAnalyzer consisting of a
  * L<CaseFolder|Lucy::Analysis::CaseFolder>, a
- * L<Tokenizer|Lucy::Analysis::Tokenizer>, and a
+ * L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>, and a
  * L<SnowballStemmer|Lucy::Analysis::SnowballStemmer> will be generated for you.  
  * 
  * Supported languages:
@@ -55,7 +55,7 @@ class Lucy::Analysis::PolyAnalyzer 
     /** 
      * @param language An ISO code from the list of supported languages.
      * @param analyzers An array of Analyzers.  The order of the analyzers
-     * matters.  Don't put a SnowballStemmer before a Tokenizer (can't stem whole
+     * matters.  Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
      * documents or paragraphs -- just individual words), or a SnowballStopFilter
      * after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
      * stoplist).  In general, the sequence should be: normalize, tokenize,

Copied: incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c (from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c?p2=incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c&p1=incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,71 +14,72 @@
  * limitations under the License.
  */
 
-#define C_LUCY_TOKENIZER
+#define C_LUCY_REGEXTOKENIZER
 #define C_LUCY_TOKEN
 #include "Lucy/Util/ToolSet.h"
 
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 #include "Lucy/Analysis/Token.h"
 #include "Lucy/Analysis/Inversion.h"
 
-Tokenizer*
-Tokenizer_new(const CharBuf *pattern)
+RegexTokenizer*
+RegexTokenizer_new(const CharBuf *pattern)
 {
-    Tokenizer *self = (Tokenizer*)VTable_Make_Obj(TOKENIZER);
-    return Tokenizer_init(self, pattern);
+    RegexTokenizer *self = (RegexTokenizer*)VTable_Make_Obj(REGEXTOKENIZER);
+    return RegexTokenizer_init(self, pattern);
 }
 
 Inversion*
-Tokenizer_transform(Tokenizer *self, Inversion *inversion)
+RegexTokenizer_transform(RegexTokenizer *self, Inversion *inversion)
 {
     Inversion *new_inversion = Inversion_new(NULL);
     Token *token;
 
     while (NULL != (token = Inversion_Next(inversion))) {
-        Tokenizer_Tokenize_Str(self, token->text, token->len, new_inversion);
+        RegexTokenizer_Tokenize_Str(self, token->text, token->len, 
+                                    new_inversion);
     }
 
     return new_inversion;
 }
 
 Inversion*
-Tokenizer_transform_text(Tokenizer *self, CharBuf *text)
+RegexTokenizer_transform_text(RegexTokenizer *self, CharBuf *text)
 {
     Inversion *new_inversion = Inversion_new(NULL);
-    Tokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text), CB_Get_Size(text), 
-        new_inversion);
+    RegexTokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text), 
+        CB_Get_Size(text), new_inversion);
     return new_inversion;
 }
 
 Obj*
-Tokenizer_dump(Tokenizer *self)
+RegexTokenizer_dump(RegexTokenizer *self)
 {
-    Tokenizer_dump_t super_dump
-        = (Tokenizer_dump_t)SUPER_METHOD(TOKENIZER, Tokenizer, Dump);
+    RegexTokenizer_dump_t super_dump
+        = (RegexTokenizer_dump_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Dump);
     Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
     Hash_Store_Str(dump, "pattern", 7, CB_Dump(self->pattern));
     return (Obj*)dump;
 }
 
-Tokenizer*
-Tokenizer_load(Tokenizer *self, Obj *dump)
+RegexTokenizer*
+RegexTokenizer_load(RegexTokenizer *self, Obj *dump)
 {
     Hash *source = (Hash*)CERTIFY(dump, HASH);
-    Tokenizer_load_t super_load 
-        = (Tokenizer_load_t)SUPER_METHOD(TOKENIZER, Tokenizer, Load);
-    Tokenizer *loaded = super_load(self, dump);
+    RegexTokenizer_load_t super_load 
+        = (RegexTokenizer_load_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Load);
+    RegexTokenizer *loaded = super_load(self, dump);
     CharBuf *pattern = (CharBuf*)CERTIFY(
         Hash_Fetch_Str(source, "pattern", 7), CHARBUF);
-    return Tokenizer_init(loaded, pattern);
+    return RegexTokenizer_init(loaded, pattern);
 }
 
 bool_t
-Tokenizer_equals(Tokenizer *self, Obj *other)
+RegexTokenizer_equals(RegexTokenizer *self, Obj *other)
 {
-    Tokenizer *const evil_twin = (Tokenizer*)other;
+    RegexTokenizer *const evil_twin = (RegexTokenizer*)other;
     if (evil_twin == self) return true;
-    if (!Obj_Is_A(other, TOKENIZER)) return false;
+    if (!Obj_Is_A(other, REGEXTOKENIZER)) return false;
     if (!CB_Equals(evil_twin->pattern, (Obj*)self->pattern)) return false;
     return true;
 }

Copied: incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh (from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh?p2=incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh&p1=incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh Fri Mar 11 23:35:35 2011
@@ -22,7 +22,7 @@ parcel Lucy;
  * array of "tokens".  For instance, the string "three blind mice" might be
  * tokenized into "three", "blind", "mice".
  * 
- * Lucy::Analysis::Tokenizer decides where it should break up the text
+ * Lucy::Analysis::RegexTokenizer decides where it should break up the text
  * based on a regular expression compiled from a supplied <code>pattern</code>
  * matching one token.  If our source string is...
  * 
@@ -47,13 +47,13 @@ parcel Lucy;
  * ... the difference being that the word character tokenizer skips over
  * punctuation as well as whitespace when determining token boundaries.
  */
-class Lucy::Analysis::Tokenizer 
+class Lucy::Analysis::RegexTokenizer 
     inherits Lucy::Analysis::Analyzer {
 
     CharBuf *pattern;
     void    *token_re;
 
-    inert incremented Tokenizer* 
+    inert incremented RegexTokenizer* 
     new(const CharBuf *pattern = NULL);
 
     /**
@@ -62,39 +62,39 @@ class Lucy::Analysis::Tokenizer 
      * <code>\w+(?:[\x{2019}']\w+)*</code>, which matches "it's" as well as
      * "it" and "O'Henry's" as well as "Henry".
      */
-    public inert Tokenizer* 
-    init(Tokenizer *self, const CharBuf *pattern = NULL);
+    public inert RegexTokenizer* 
+    init(RegexTokenizer *self, const CharBuf *pattern = NULL);
 
     public incremented Inversion*
-    Transform(Tokenizer *self, Inversion *inversion);
+    Transform(RegexTokenizer *self, Inversion *inversion);
 
     public incremented Inversion*
-    Transform_Text(Tokenizer *self, CharBuf *text);
+    Transform_Text(RegexTokenizer *self, CharBuf *text);
 
     /** Tokenize the supplied string and add any Tokens generated to the
      * supplied Inversion.
      */
     void
-    Tokenize_Str(Tokenizer *self, const char *text, size_t len, 
+    Tokenize_Str(RegexTokenizer *self, const char *text, size_t len, 
                  Inversion *inversion);
 
     /** Set the compiled regular expression for matching a token.  Also sets
      * <code>pattern</code> as a side effect.
      */
     void
-    Set_Token_RE(Tokenizer *self, void *token_re);
+    Set_Token_RE(RegexTokenizer *self, void *token_re);
 
     public incremented Obj*
-    Dump(Tokenizer *self);
+    Dump(RegexTokenizer *self);
 
-    public incremented Tokenizer*
-    Load(Tokenizer *self, Obj *dump);
+    public incremented RegexTokenizer*
+    Load(RegexTokenizer *self, Obj *dump);
 
     public bool_t
-    Equals(Tokenizer *self, Obj *other);
+    Equals(RegexTokenizer *self, Obj *other);
 
     public void
-    Destroy(Tokenizer *self);
+    Destroy(RegexTokenizer *self);
 }
 
 

Modified: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@
 #include "Lucy/Analysis/CaseFolder.h"
 #include "Lucy/Analysis/SnowballStopFilter.h"
 #include "Lucy/Analysis/SnowballStemmer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 
 static void
 test_Dump_Load_and_Equals(TestBatch *batch)
@@ -60,7 +60,7 @@ test_analysis(TestBatch *batch)
     CharBuf            *EN          = (CharBuf*)ZCB_WRAP_STR("en", 2);
     CharBuf            *source_text = CB_newf("Eats, shoots and leaves.");
     CaseFolder         *case_folder = CaseFolder_new();
-    Tokenizer          *tokenizer   = Tokenizer_new(NULL);
+    RegexTokenizer     *tokenizer   = RegexTokenizer_new(NULL);
     SnowballStopFilter *stopfilter  = SnowStop_new(EN, NULL);
     SnowballStemmer    *stemmer     = SnowStemmer_new(EN);
 
@@ -100,7 +100,7 @@ test_analysis(TestBatch *batch)
         VA_Push(expected, (Obj*)CB_newf("and"));
         VA_Push(expected, (Obj*)CB_newf("leaves"));
         TestUtils_test_analyzer(batch, (Analyzer*)polyanalyzer, source_text,
-            expected, "With Tokenizer");
+            expected, "With RegexTokenizer");
         DECREF(expected);
         DECREF(polyanalyzer);
         DECREF(analyzers);

Copied: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c (from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c?p2=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c&p1=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#define C_LUCY_TESTTOKENIZER
+#define C_LUCY_TESTREGEXTOKENIZER
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Test.h"
-#include "Lucy/Test/Analysis/TestTokenizer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Test/Analysis/TestRegexTokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 
 
 static void
@@ -27,22 +27,22 @@ test_Dump_Load_and_Equals(TestBatch *bat
 {
     ZombieCharBuf *word_char_pattern  = ZCB_WRAP_STR("\\w+", 3);  
     ZombieCharBuf *whitespace_pattern = ZCB_WRAP_STR("\\S+", 3);
-    Tokenizer *word_char_tokenizer =
-        Tokenizer_new((CharBuf*)word_char_pattern);
-    Tokenizer *whitespace_tokenizer =
-        Tokenizer_new((CharBuf*)whitespace_pattern);
-    Obj *word_char_dump  = Tokenizer_Dump(word_char_tokenizer);
-    Obj *whitespace_dump = Tokenizer_Dump(whitespace_tokenizer);
-    Tokenizer *word_char_clone 
-        = Tokenizer_Load(whitespace_tokenizer, word_char_dump);
-    Tokenizer *whitespace_clone 
-        = Tokenizer_Load(whitespace_tokenizer, whitespace_dump);
+    RegexTokenizer *word_char_tokenizer =
+        RegexTokenizer_new((CharBuf*)word_char_pattern);
+    RegexTokenizer *whitespace_tokenizer =
+        RegexTokenizer_new((CharBuf*)whitespace_pattern);
+    Obj *word_char_dump  = RegexTokenizer_Dump(word_char_tokenizer);
+    Obj *whitespace_dump = RegexTokenizer_Dump(whitespace_tokenizer);
+    RegexTokenizer *word_char_clone 
+        = RegexTokenizer_Load(whitespace_tokenizer, word_char_dump);
+    RegexTokenizer *whitespace_clone 
+        = RegexTokenizer_Load(whitespace_tokenizer, whitespace_dump);
 
-    TEST_FALSE(batch, Tokenizer_Equals(word_char_tokenizer,
+    TEST_FALSE(batch, RegexTokenizer_Equals(word_char_tokenizer,
         (Obj*)whitespace_tokenizer), "Equals() false with different pattern");
-    TEST_TRUE(batch, Tokenizer_Equals(word_char_tokenizer,
+    TEST_TRUE(batch, RegexTokenizer_Equals(word_char_tokenizer,
         (Obj*)word_char_clone), "Dump => Load round trip");
-    TEST_TRUE(batch, Tokenizer_Equals(whitespace_tokenizer,
+    TEST_TRUE(batch, RegexTokenizer_Equals(whitespace_tokenizer,
         (Obj*)whitespace_clone), "Dump => Load round trip");
 
     DECREF(word_char_tokenizer);
@@ -54,7 +54,7 @@ test_Dump_Load_and_Equals(TestBatch *bat
 }
 
 void
-TestTokenizer_run_tests()
+TestRegexTokenizer_run_tests()
 {
     TestBatch *batch = TestBatch_new(3);
 

Copied: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh (from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh?p2=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh&p1=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh Fri Mar 11 23:35:35 2011
@@ -16,7 +16,7 @@
 
 parcel Lucy;
 
-inert class Lucy::Test::Analysis::TestTokenizer {
+inert class Lucy::Test::Analysis::TestRegexTokenizer {
     inert void
     run_tests();
 }

Modified: incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c Fri Mar 11 23:35:35 2011
@@ -21,7 +21,7 @@
 #include "Lucy/Test/Plan/TestBlobType.h"
 #include "Lucy/Test/TestUtils.h"
 #include "Lucy/Plan/BlobType.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 
 static void
 test_Dump_Load_and_Equals(TestBatch *batch)

Modified: incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c Fri Mar 11 23:35:35 2011
@@ -22,22 +22,22 @@
 #include "Lucy/Test/TestUtils.h"
 #include "Lucy/Plan/FullTextType.h"
 #include "Lucy/Analysis/CaseFolder.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 
 static void
 test_Dump_Load_and_Equals(TestBatch *batch)
 {
-    Tokenizer    *tokenizer     = Tokenizer_new(NULL);
-    CaseFolder   *case_folder   = CaseFolder_new();
-    FullTextType *type          = FullTextType_new((Analyzer*)tokenizer);
-    FullTextType *other         = FullTextType_new((Analyzer*)case_folder);
-    FullTextType *boost_differs = FullTextType_new((Analyzer*)tokenizer);
-    FullTextType *not_indexed   = FullTextType_new((Analyzer*)tokenizer);
-    FullTextType *not_stored    = FullTextType_new((Analyzer*)tokenizer);
-    FullTextType *highlightable = FullTextType_new((Analyzer*)tokenizer);
-    Obj          *dump          = (Obj*)FullTextType_Dump(type);
-    Obj          *clone         = Obj_Load(dump, dump);
-    Obj          *another_dump  = (Obj*)FullTextType_Dump_For_Schema(type);
+    RegexTokenizer *tokenizer     = RegexTokenizer_new(NULL);
+    CaseFolder     *case_folder   = CaseFolder_new();
+    FullTextType   *type          = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType   *other         = FullTextType_new((Analyzer*)case_folder);
+    FullTextType   *boost_differs = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType   *not_indexed   = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType   *not_stored    = FullTextType_new((Analyzer*)tokenizer);
+    FullTextType   *highlightable = FullTextType_new((Analyzer*)tokenizer);
+    Obj            *dump          = (Obj*)FullTextType_Dump(type);
+    Obj            *clone         = Obj_Load(dump, dump);
+    Obj            *another_dump  = (Obj*)FullTextType_Dump_For_Schema(type);
 
     FullTextType_Set_Boost(boost_differs, 1.5);
     FullTextType_Set_Indexed(not_indexed, false);
@@ -80,10 +80,10 @@ test_Dump_Load_and_Equals(TestBatch *bat
 static void
 test_Compare_Values(TestBatch *batch)
 {
-    Tokenizer     *tokenizer = Tokenizer_new(NULL);
-    FullTextType  *type      = FullTextType_new((Analyzer*)tokenizer);
-    ZombieCharBuf *a         = ZCB_WRAP_STR("a", 1);
-    ZombieCharBuf *b         = ZCB_WRAP_STR("b", 1);
+    RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
+    FullTextType   *type      = FullTextType_new((Analyzer*)tokenizer);
+    ZombieCharBuf  *a         = ZCB_WRAP_STR("a", 1);
+    ZombieCharBuf  *b         = ZCB_WRAP_STR("b", 1);
 
     TEST_TRUE(batch, 
         FullTextType_Compare_Values(type, (Obj*)a, (Obj*)b) < 0,

Modified: incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c Fri Mar 11 23:35:35 2011
@@ -26,7 +26,7 @@
 #include "Lucy/Test/TestSchema.h"
 #include "Lucy/Test/TestUtils.h"
 #include "Lucy/Analysis/Analyzer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 #include "Lucy/Document/Doc.h"
 #include "Lucy/Index/Indexer.h"
 #include "Lucy/Search/Hits.h"

Modified: incubator/lucy/trunk/core/Lucy/Test/TestSchema.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/TestSchema.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/TestSchema.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/TestSchema.c Fri Mar 11 23:35:35 2011
@@ -21,7 +21,7 @@
 #include "Lucy/Test/Plan/TestArchitecture.h"
 #include "Lucy/Test/TestSchema.h"
 #include "Lucy/Analysis/CaseFolder.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 #include "Lucy/Plan/FullTextType.h"
 #include "Lucy/Plan/Architecture.h"
 
@@ -35,7 +35,7 @@ TestSchema_new()
 TestSchema*
 TestSchema_init(TestSchema *self)
 {
-    Tokenizer *tokenizer = Tokenizer_new(NULL);
+    RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
     FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
 
     Schema_init((Schema*)self);

Modified: incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm (original)
+++ incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm Fri Mar 11 23:35:35 2011
@@ -122,13 +122,13 @@ END_REPORT
 }
 
 package BenchSchema::WhiteSpaceTokenizer;
-use base qw( Lucy::Analysis::Tokenizer );
+use base qw( Lucy::Analysis::RegexTokenizer );
 
 sub new { return shift->SUPER::new( pattern => '\S+' ) }
 
 package BenchSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new;

Modified: incubator/lucy/trunk/perl/lib/Lucy.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy.pm Fri Mar 11 23:35:35 2011
@@ -100,7 +100,7 @@ sub error {$Lucy::Object::Err::error}
 }
 
 {
-    package Lucy::Analysis::Tokenizer;
+    package Lucy::Analysis::RegexTokenizer;
 
     sub compile_token_re { return qr/$_[1]/ }
 

Modified: incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm Fri Mar 11 23:35:35 2011
@@ -42,7 +42,7 @@ my $constructor = <<'END_CONSTRUCTOR';
     # or...
 
     my $case_folder  = Lucy::Analysis::CaseFolder->new;
-    my $tokenizer    = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer    = Lucy::Analysis::RegexTokenizer->new;
     my $stemmer      = Lucy::Analysis::SnowballStemmer->new( language => 'en' );
     my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
         analyzers => [ $case_folder, $whitespace_tokenizer, $stemmer, ], );

Copied: incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm (from r1078955, incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm?p2=incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm&p1=incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm Fri Mar 11 23:35:35 2011
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-package Lucy::Analysis::Tokenizer;
+package Lucy::Analysis::RegexTokenizer;
 use Lucy;
 
 1;
@@ -24,14 +24,14 @@ __BINDING__
 
 my $synopsis = <<'END_SYNOPSIS';
     my $whitespace_tokenizer
-        = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+        = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
 
     # or...
     my $word_char_tokenizer
-        = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
+        = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
 
     # or...
-    my $apostrophising_tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $apostrophising_tokenizer = Lucy::Analysis::RegexTokenizer->new;
 
     # Then... once you have a tokenizer, put it into a PolyAnalyzer:
     my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
@@ -39,14 +39,14 @@ my $synopsis = <<'END_SYNOPSIS';
 END_SYNOPSIS
 
 my $constructor = <<'END_CONSTRUCTOR';
-    my $word_char_tokenizer = Lucy::Analysis::Tokenizer->new(
+    my $word_char_tokenizer = Lucy::Analysis::RegexTokenizer->new(
         pattern => '\w+',    # required
     );
 END_CONSTRUCTOR
 
 Clownfish::Binding::Perl::Class->register(
     parcel            => "Lucy",
-    class_name        => "Lucy::Analysis::Tokenizer",
+    class_name        => "Lucy::Analysis::RegexTokenizer",
     bind_constructors => ["_new"],
     make_pod          => {
         constructor => { sample => $constructor },

Modified: incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod Fri Mar 11 23:35:35 2011
@@ -305,7 +305,7 @@ prefix conflation -- is not perfectly co
     # Polyanalyzer with no SnowballStemmer.
     my $analyzer = Lucy::Analysis::PolyAnalyzer->new(
         analyzers => [
-            Lucy::Analysis::Tokenizer->new,
+            Lucy::Analysis::RegexTokenizer->new,
             Lucy::Analysis::CaseFolder->new,
         ],
     );

Modified: incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod Fri Mar 11 23:35:35 2011
@@ -19,9 +19,9 @@ Lucy::Docs::Tutorial::Analysis - How to 
 
 =head1 DESCRIPTION
 
-Try swapping out the PolyAnalyzer in our Schema for a Tokenizer:
+Try swapping out the PolyAnalyzer in our Schema for a RegexTokenizer:
 
-    my $tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
     my $type = Lucy::Plan::FullTextType->new(
         analyzer => $tokenizer,
     );
@@ -30,13 +30,13 @@ Search for C<senate>, C<Senate>, and C<S
 change and re-indexing.
 
 Under PolyAnalyzer, the results are identical for all three searches, but
-under Tokenizer, searches are case-sensitive, and the result sets for
+under RegexTokenizer, searches are case-sensitive, and the result sets for
 C<Senate> and C<Senator> are distinct.
 
 =head2 PolyAnalyzer
 
 What's happening is that PolyAnalyzer is performing more aggressive processing
-than Tokenizer.  In addition to tokenizing, it's also converting all text to
+than RegexTokenizer.  In addition to tokenizing, it's also converting all text to
 lower case so that searches are case-insensitive, and using a "stemming"
 algorithm to reduce related words to a common stem (C<senat>, in this case).
 
@@ -45,7 +45,7 @@ In this case, it's three-in-one, since s
 C<< language => 'en' >> is equivalent to this snippet:
 
     my $case_folder  = Lucy::Analysis::CaseFolder->new;
-    my $tokenizer    = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer    = Lucy::Analysis::RegexTokenizer->new;
     my $stemmer      = Lucy::Analysis::SnowballStemmer->new( language => 'en' );
     my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
         analyzers => [ $case_folder, $tokenizer, $stemmer ], 

Modified: incubator/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Test.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Test.pm Fri Mar 11 23:35:35 2011
@@ -58,8 +58,8 @@ PPCODE:
     else if (strEQ(package, "TestSnowStemmer")) {
         lucy_TestSnowStemmer_run_tests();
     }
-    else if (strEQ(package, "TestTokenizer")) {
-        lucy_TestTokenizer_run_tests();
+    else if (strEQ(package, "TestRegexTokenizer")) {
+        lucy_TestRegexTokenizer_run_tests();
     }
     // Lucy::Object 
     else if (strEQ(package, "TestObj")) {

Copied: incubator/lucy/trunk/perl/t/154-regex_tokenizer.t (from r1078955, incubator/lucy/trunk/perl/t/154-tokenizer.t)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/154-regex_tokenizer.t?p2=incubator/lucy/trunk/perl/t/154-regex_tokenizer.t&p1=incubator/lucy/trunk/perl/t/154-tokenizer.t&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/154-tokenizer.t (original)
+++ incubator/lucy/trunk/perl/t/154-regex_tokenizer.t Fri Mar 11 23:35:35 2011
@@ -19,9 +19,9 @@ use warnings;
 use Test::More tests => 15;
 use Lucy::Test;
 
-my $tokenizer   = Lucy::Analysis::Tokenizer->new;
-my $other       = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
-my $yet_another = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
+my $tokenizer   = Lucy::Analysis::RegexTokenizer->new;
+my $other       = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
+my $yet_another = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
 ok( $other->equals($yet_another), "Equals" );
 ok( !$tokenizer->equals($other),  "different patterns foil Equals" );
 
@@ -41,7 +41,7 @@ is_deeply( \@token_texts, [qw( a b c )],
 is_deeply( \@start_offsets, [ 0, 2, 4, ], "correctstart offsets" );
 is_deeply( \@end_offsets,   [ 1, 3, 5, ], "correct end offsets" );
 
-$tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '.' );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '.' );
 $inversion = Lucy::Analysis::Inversion->new( text    => "a b c" );
 $inversion = $tokenizer->transform($inversion);
 
@@ -73,25 +73,25 @@ is_deeply(
     "no freakout when fed multiple tokens"
 );
 
-$tokenizer = Lucy::Analysis::Tokenizer->new( token_re => qr/../ );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( token_re => qr/../ );
 is_deeply( $tokenizer->split('aabbcc'),
     [qw( aa bb cc )], "back compat with token_re argument" );
 
 eval {
     my $toke
-        = Lucy::Analysis::Tokenizer->new(
+        = Lucy::Analysis::RegexTokenizer->new(
         pattern => '\\p{Carp::confess}' );
 };
 like( $@, qr/\\p/, "\\p forbidden in pattern" );
 
 eval {
     my $toke
-        = Lucy::Analysis::Tokenizer->new(
+        = Lucy::Analysis::RegexTokenizer->new(
         pattern => '\\P{Carp::confess}' );
 };
 like( $@, qr/\\P/, "\\P forbidden in pattern" );
 
-$tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\\w+' );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\\w+' );
 my $dump = $tokenizer->dump;
 $dump->{pattern} = "\\p{Carp::confess}";
 eval { $tokenizer->load($dump) };

Modified: incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t (original)
+++ incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ use Lucy::Test::TestUtils qw( test_analy
 my $stopfilter = Lucy::Analysis::SnowballStopFilter->new( language => 'en' );
 test_analyzer( $stopfilter, 'the', [], "single stopword stopalized" );
 
-my $tokenizer    = Lucy::Analysis::Tokenizer->new;
+my $tokenizer    = Lucy::Analysis::RegexTokenizer->new;
 my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
     analyzers => [ $tokenizer, $stopfilter ], );
 test_analyzer( $polyanalyzer, 'i am the walrus',

Modified: incubator/lucy/trunk/perl/t/156-snowball_stemmer.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/156-snowball_stemmer.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/156-snowball_stemmer.t (original)
+++ incubator/lucy/trunk/perl/t/156-snowball_stemmer.t Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@ my $stemmer = Lucy::Analysis::SnowballSt
 test_analyzer( $stemmer, 'ponies', ['poni'], "single word stemmed" );
 test_analyzer( $stemmer, 'pony',   ['poni'], "stem, not just truncate" );
 
-my $tokenizer    = Lucy::Analysis::Tokenizer->new;
+my $tokenizer    = Lucy::Analysis::RegexTokenizer->new;
 my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
     analyzers => [ $tokenizer, $stemmer ], );
 test_analyzer(

Modified: incubator/lucy/trunk/perl/t/213-segment_merging.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/213-segment_merging.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/213-segment_merging.t (original)
+++ incubator/lucy/trunk/perl/t/213-segment_merging.t Fri Mar 11 23:35:35 2011
@@ -36,7 +36,7 @@ use base qw( Lucy::Test::TestSchema );
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = Lucy::Plan::FullTextType->new(
-        analyzer      => Lucy::Analysis::Tokenizer->new,
+        analyzer      => Lucy::Analysis::RegexTokenizer->new,
         highlightable => 1,
     );
     $self->spec_field( name => 'content', type => $type );

Modified: incubator/lucy/trunk/perl/t/214-spec_field.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/214-spec_field.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/214-spec_field.t (original)
+++ incubator/lucy/trunk/perl/t/214-spec_field.t Fri Mar 11 23:35:35 2011
@@ -27,7 +27,7 @@ use base qw( Lucy::Plan::Schema );
 
 sub new {
     my $self      = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
     my $polyanalyzer
         = Lucy::Analysis::PolyAnalyzer->new( language => 'en' );
     my $plain

Modified: incubator/lucy/trunk/perl/t/215-term_vectors.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/215-term_vectors.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/215-term_vectors.t (original)
+++ incubator/lucy/trunk/perl/t/215-term_vectors.t Fri Mar 11 23:35:35 2011
@@ -25,7 +25,7 @@ use base qw( Lucy::Plan::Schema );
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = Lucy::Plan::FullTextType->new(
-        analyzer      => Lucy::Analysis::Tokenizer->new,
+        analyzer      => Lucy::Analysis::RegexTokenizer->new,
         highlightable => 1,
     );
     $self->spec_field( name => 'content', type => $type );

Modified: incubator/lucy/trunk/perl/t/218-del_merging.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/218-del_merging.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/218-del_merging.t (original)
+++ incubator/lucy/trunk/perl/t/218-del_merging.t Fri Mar 11 23:35:35 2011
@@ -32,7 +32,7 @@ use base 'Lucy::Plan::Schema';
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new, );
+        analyzer => Lucy::Analysis::RegexTokenizer->new, );
     $self->spec_field( name => 'foo', type => $type );
     $self->spec_field( name => 'bar', type => $type );
     return $self;

Modified: incubator/lucy/trunk/perl/t/219-byte_buf_doc.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/219-byte_buf_doc.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/219-byte_buf_doc.t (original)
+++ incubator/lucy/trunk/perl/t/219-byte_buf_doc.t Fri Mar 11 23:35:35 2011
@@ -63,7 +63,7 @@ sub architecture { MyArchitecture->new }
 
 sub new {
     my $self      = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
     my $type = Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
     $self->spec_field( name => 'value', type => $type );
     return $self;

Modified: incubator/lucy/trunk/perl/t/220-zlib_doc.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/220-zlib_doc.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/220-zlib_doc.t (original)
+++ incubator/lucy/trunk/perl/t/220-zlib_doc.t Fri Mar 11 23:35:35 2011
@@ -59,7 +59,7 @@ sub architecture { MyArchitecture->new }
 
 sub new {
     my $self      = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
     my $main_type
         = Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
     my $unstored_type = Lucy::Plan::FullTextType->new(

Modified: incubator/lucy/trunk/perl/t/221-sort_writer.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/221-sort_writer.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/221-sort_writer.t (original)
+++ incubator/lucy/trunk/perl/t/221-sort_writer.t Fri Mar 11 23:35:35 2011
@@ -30,7 +30,7 @@ use base qw( Lucy::Plan::Schema );
 sub new {
     my $self          = shift->SUPER::new(@_);
     my $fulltext_type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new,
+        analyzer => Lucy::Analysis::RegexTokenizer->new,
         sortable => 1,
     );
     my $string_type = Lucy::Plan::StringType->new( sortable => 1 );

Modified: incubator/lucy/trunk/perl/t/302-many_fields.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/302-many_fields.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/302-many_fields.t (original)
+++ incubator/lucy/trunk/perl/t/302-many_fields.t Fri Mar 11 23:35:35 2011
@@ -18,7 +18,7 @@ use warnings;
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 our %fields = ();
 
@@ -29,7 +29,7 @@ use Lucy::Test;
 
 my $schema = MySchema->new;
 my $type   = Lucy::Plan::FullTextType->new(
-    analyzer => Lucy::Analysis::Tokenizer->new, );
+    analyzer => Lucy::Analysis::RegexTokenizer->new, );
 
 for my $num_fields ( 1 .. 10 ) {
     # Build an index with $num_fields fields, and the same content in each.

Modified: incubator/lucy/trunk/perl/t/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/303-highlighter.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/303-highlighter.t (original)
+++ incubator/lucy/trunk/perl/t/303-highlighter.t Fri Mar 11 23:35:35 2011
@@ -19,12 +19,12 @@ use lib 'buildlib';
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $class      = shift;
     my $self       = $class->SUPER::new(@_);
-    my $tokenizer  = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer  = Lucy::Analysis::RegexTokenizer->new;
     my $plain_type = Lucy::Plan::FullTextType->new(
         analyzer      => $tokenizer,
         highlightable => 1,

Modified: incubator/lucy/trunk/perl/t/304-verify_utf8.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/304-verify_utf8.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/304-verify_utf8.t (original)
+++ incubator/lucy/trunk/perl/t/304-verify_utf8.t Fri Mar 11 23:35:35 2011
@@ -19,11 +19,11 @@ use lib 'buildlib';
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new(@_);
-    my $analyzer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+    my $analyzer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
     my $type = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
     $self->spec_field( name => 'content', type => $type );
     return $self;

Modified: incubator/lucy/trunk/perl/t/311-hl_selection.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/311-hl_selection.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/311-hl_selection.t (original)
+++ incubator/lucy/trunk/perl/t/311-hl_selection.t Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ my $schema = Lucy::Plan::Schema->new;
 $schema->spec_field(
     name => 'content',
     type => Lucy::Plan::FullTextType->new(
-        analyzer      => Lucy::Analysis::Tokenizer->new,
+        analyzer      => Lucy::Analysis::RegexTokenizer->new,
         highlightable => 1,
     ),
 );

Modified: incubator/lucy/trunk/perl/t/400-match_posting.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/400-match_posting.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/400-match_posting.t (original)
+++ incubator/lucy/trunk/perl/t/400-match_posting.t Fri Mar 11 23:35:35 2011
@@ -34,12 +34,12 @@ sub make_similarity { MatchOnlySim->new 
 
 package MatchSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = MatchSchema::MatchOnly->new(
-        analyzer => Lucy::Analysis::Tokenizer->new );
+        analyzer => Lucy::Analysis::RegexTokenizer->new );
     $self->spec_field( name => 'content', type => $type );
     return $self;
 }

Modified: incubator/lucy/trunk/perl/t/504-similarity.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/504-similarity.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/504-similarity.t (original)
+++ incubator/lucy/trunk/perl/t/504-similarity.t Fri Mar 11 23:35:35 2011
@@ -24,11 +24,11 @@ sub make_similarity { LucyX::Index::Long
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self     = shift->SUPER::new(@_);
-    my $analyzer = Lucy::Analysis::Tokenizer->new;
+    my $analyzer = Lucy::Analysis::RegexTokenizer->new;
     my $plain_type
         = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
     my $long_field_type

Modified: incubator/lucy/trunk/perl/t/510-remote_search.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/510-remote_search.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/510-remote_search.t (original)
+++ incubator/lucy/trunk/perl/t/510-remote_search.t Fri Mar 11 23:35:35 2011
@@ -32,12 +32,12 @@ BEGIN {
 
 package SortSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self       = shift->SUPER::new(@_);
     my $plain_type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new );
+        analyzer => Lucy::Analysis::RegexTokenizer->new );
     my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
     $self->spec_field( name => 'content', type => $plain_type );
     $self->spec_field( name => 'number',  type => $string_type );

Modified: incubator/lucy/trunk/perl/t/511-sort_spec.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/511-sort_spec.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/511-sort_spec.t (original)
+++ incubator/lucy/trunk/perl/t/511-sort_spec.t Fri Mar 11 23:35:35 2011
@@ -38,7 +38,7 @@ use base qw( Lucy::Plan::Schema );
 sub new {
     my $self       = shift->SUPER::new(@_);
     my $unsortable = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new, );
+        analyzer => Lucy::Analysis::RegexTokenizer->new, );
     my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
     my $int32_type = Lucy::Plan::Int32Type->new(
         indexed  => 0,

Modified: incubator/lucy/trunk/perl/t/515-range_query.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/515-range_query.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/515-range_query.t (original)
+++ incubator/lucy/trunk/perl/t/515-range_query.t Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@ use Lucy::Test;
 
 package RangeSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new(@_);

Modified: incubator/lucy/trunk/perl/t/601-queryparser.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/601-queryparser.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/601-queryparser.t (original)
+++ incubator/lucy/trunk/perl/t/601-queryparser.t Fri Mar 11 23:35:35 2011
@@ -21,11 +21,11 @@ use Lucy::Test;
 
 package PlainSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
     my $type = Lucy::Plan::FullTextType->new( analyzer => $tokenizer, );
     $self->spec_field( name => 'content', type => $type );
     return $self;
@@ -37,7 +37,7 @@ use base qw( Lucy::Plan::Schema );
 sub new {
     my $self = shift->SUPER::new(@_);
     my $whitespace_tokenizer
-        = Lucy::Analysis::Tokenizer->new( token_re => qr/\S+/ );
+        = Lucy::Analysis::RegexTokenizer->new( token_re => qr/\S+/ );
     my $stopfilter
         = Lucy::Analysis::SnowballStopFilter->new( stoplist => { x => 1 } );
     my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(

Modified: incubator/lucy/trunk/perl/t/602-boosts.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/602-boosts.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/602-boosts.t (original)
+++ incubator/lucy/trunk/perl/t/602-boosts.t Fri Mar 11 23:35:35 2011
@@ -25,7 +25,7 @@ use base qw( Lucy::Plan::Schema );
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new );
+        analyzer => Lucy::Analysis::RegexTokenizer->new );
     $self->spec_field( name => 'content',  type => $type );
     $self->spec_field( name => 'category', type => $type );
     return $self;
@@ -36,7 +36,7 @@ use base qw( Lucy::Plan::Schema );
 
 sub new {
     my $self      = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new;
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
     my $plain_type
         = Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
     my $boosted_type = Lucy::Plan::FullTextType->new(

Modified: incubator/lucy/trunk/perl/t/604-simple_search.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/604-simple_search.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/604-simple_search.t (original)
+++ incubator/lucy/trunk/perl/t/604-simple_search.t Fri Mar 11 23:35:35 2011
@@ -18,12 +18,12 @@ use warnings;
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self = shift->SUPER::new(@_);
     my $type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new, );
+        analyzer => Lucy::Analysis::RegexTokenizer->new, );
     $self->spec_field( name => 'title', type => $type );
     $self->spec_field( name => 'body',  type => $type );
     return $self;
@@ -56,7 +56,7 @@ $indexer->commit;
 
 my $searcher = Lucy::Search::IndexSearcher->new( index => $folder );
 
-my $tokenizer = Lucy::Analysis::Tokenizer->new;
+my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
 my $or_parser = Lucy::Search::QueryParser->new(
     schema   => $schema,
     analyzer => $tokenizer,

Modified: incubator/lucy/trunk/perl/t/605-store_pos_boost.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/605-store_pos_boost.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/605-store_pos_boost.t (original)
+++ incubator/lucy/trunk/perl/t/605-store_pos_boost.t Fri Mar 11 23:35:35 2011
@@ -17,7 +17,7 @@ use strict;
 use warnings;
 use lib 'buildlib';
 
-package MyTokenizer;
+package MyRegexTokenizer;
 use base qw( Lucy::Analysis::Analyzer );
 use Lucy::Analysis::Inversion;
 
@@ -68,14 +68,14 @@ sub make_similarity { RichSim->new }
 
 package MySchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self       = shift->SUPER::new(@_);
     my $plain_type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new );
+        analyzer => Lucy::Analysis::RegexTokenizer->new );
     my $boosted_type
-        = MySchema::boosted->new( analyzer => MyTokenizer->new, );
+        = MySchema::boosted->new( analyzer => MyRegexTokenizer->new, );
     $self->spec_field( name => 'plain',   type => $plain_type );
     $self->spec_field( name => 'boosted', type => $boosted_type );
     return $self;

Modified: incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t (original)
+++ incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t Fri Mar 11 23:35:35 2011
@@ -19,12 +19,12 @@ use lib 'buildlib';
 
 package MultiFieldSchema;
 use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
 
 sub new {
     my $self       = shift->SUPER::new(@_);
     my $plain_type = Lucy::Plan::FullTextType->new(
-        analyzer => Lucy::Analysis::Tokenizer->new );
+        analyzer => Lucy::Analysis::RegexTokenizer->new );
     my $not_analyzed_type = Lucy::Plan::StringType->new;
     $self->spec_field( name => 'a', type => $plain_type );
     $self->spec_field( name => 'b', type => $plain_type );

Modified: incubator/lucy/trunk/perl/t/611-queryparser_syntax.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/611-queryparser_syntax.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/611-queryparser_syntax.t (original)
+++ incubator/lucy/trunk/perl/t/611-queryparser_syntax.t Fri Mar 11 23:35:35 2011
@@ -24,9 +24,9 @@ use base qw( Lucy::Plan::Schema );
 
 sub new {
     my $self = shift->SUPER::new(@_);
-    my $tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+    my $tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
     my $wordchar_tokenizer
-        = Lucy::Analysis::Tokenizer->new( pattern => '\w+', );
+        = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+', );
     my $stopfilter
         = Lucy::Analysis::SnowballStopFilter->new( stoplist => { x => 1 } );
     my $fancy_analyzer = Lucy::Analysis::PolyAnalyzer->new(

Copied: incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t (from r1078955, incubator/lucy/trunk/perl/t/core/154-tokenizer.t)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t?p2=incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t&p1=incubator/lucy/trunk/perl/t/core/154-tokenizer.t&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/core/154-tokenizer.t (original)
+++ incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t Fri Mar 11 23:35:35 2011
@@ -17,5 +17,5 @@ use strict;
 use warnings;
 
 use Lucy::Test;
-Lucy::Test::run_tests("TestTokenizer");
+Lucy::Test::run_tests("TestRegexTokenizer");
 

Modified: incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c (original)
+++ incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#define C_LUCY_TOKENIZER
+#define C_LUCY_REGEXTOKENIZER
 #define C_LUCY_TOKEN
 #include "XSBind.h"
 
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
 #include "Lucy/Analysis/Token.h"
 #include "Lucy/Analysis/Inversion.h"
 #include "Lucy/Object/Host.h"
@@ -26,13 +26,14 @@
 #include "Lucy/Util/StringHelper.h"
 
 static void
-S_set_token_re_but_not_pattern(lucy_Tokenizer *self, void *token_re);
+S_set_token_re_but_not_pattern(lucy_RegexTokenizer *self, void *token_re);
 
 static void
-S_set_pattern_from_token_re(lucy_Tokenizer *self, void *token_re);
+S_set_pattern_from_token_re(lucy_RegexTokenizer *self, void *token_re);
 
-lucy_Tokenizer*
-lucy_Tokenizer_init(lucy_Tokenizer *self, const lucy_CharBuf *pattern)
+lucy_RegexTokenizer*
+lucy_RegexTokenizer_init(lucy_RegexTokenizer *self,
+                         const lucy_CharBuf *pattern)
 {
     SV    *token_re_sv;
 
@@ -53,7 +54,7 @@ lucy_Tokenizer_init(lucy_Tokenizer *self
     }
 
     // Acquire a compiled regex engine for matching one token. 
-    token_re_sv = (SV*)lucy_Host_callback_host(LUCY_TOKENIZER,
+    token_re_sv = (SV*)lucy_Host_callback_host(LUCY_REGEXTOKENIZER,
         "compile_token_re", 1, CFISH_ARG_STR("pattern", self->pattern));
     S_set_token_re_but_not_pattern(self, SvRV(token_re_sv));
 
@@ -61,7 +62,7 @@ lucy_Tokenizer_init(lucy_Tokenizer *self
 }
 
 static void
-S_set_token_re_but_not_pattern(lucy_Tokenizer *self, void *token_re)
+S_set_token_re_but_not_pattern(lucy_RegexTokenizer *self, void *token_re)
 {
     MAGIC *magic = NULL;
     REGEXP *rx;
@@ -84,7 +85,7 @@ S_set_token_re_but_not_pattern(lucy_Toke
 }
 
 static void
-S_set_pattern_from_token_re(lucy_Tokenizer *self, void *token_re)
+S_set_pattern_from_token_re(lucy_RegexTokenizer *self, void *token_re)
 {
     SV *rv = newRV((SV*)token_re);
     STRLEN len = 0;
@@ -94,7 +95,7 @@ S_set_pattern_from_token_re(lucy_Tokeniz
 }
 
 void
-lucy_Tokenizer_set_token_re(lucy_Tokenizer *self, void *token_re)
+lucy_RegexTokenizer_set_token_re(lucy_RegexTokenizer *self, void *token_re)
 {
     S_set_token_re_but_not_pattern(self, token_re);
     // Set pattern as a side effect. 
@@ -102,16 +103,17 @@ lucy_Tokenizer_set_token_re(lucy_Tokeniz
 }
 
 void
-lucy_Tokenizer_destroy(lucy_Tokenizer *self)
+lucy_RegexTokenizer_destroy(lucy_RegexTokenizer *self)
 {
     LUCY_DECREF(self->pattern);
     ReREFCNT_dec(((REGEXP*)self->token_re));
-    LUCY_SUPER_DESTROY(self, LUCY_TOKENIZER);
+    LUCY_SUPER_DESTROY(self, LUCY_REGEXTOKENIZER);
 }
 
 void
-lucy_Tokenizer_tokenize_str(lucy_Tokenizer *self, const char *string, 
-                            size_t string_len, lucy_Inversion *inversion)
+lucy_RegexTokenizer_tokenize_str(lucy_RegexTokenizer *self,
+                                 const char *string, size_t string_len, 
+                                 lucy_Inversion *inversion)
 {
     uint32_t   num_code_points = 0;
     SV        *wrapper    = sv_newmortal();