You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2011/03/12 00:35:37 UTC
[lucy-commits] svn commit: r1080795 - in /incubator/lucy/trunk:
clownfish/lib/Clownfish/Binding/ core/Lucy/Analysis/ core/Lucy/Test/
core/Lucy/Test/Analysis/ core/Lucy/Test/Plan/ devel/benchmarks/indexers/
perl/lib/ perl/lib/Lucy/ perl/lib/Lucy/Analysis/ perl/lib/Luc...
Author: marvin
Date: Fri Mar 11 23:35:35 2011
New Revision: 1080795
URL: http://svn.apache.org/viewvc?rev=1080795&view=rev
Log:
LUCY-132 -- regex_tokenizer.patch
Rename Lucy::Analysis::Tokenizer to Lucy::Analysis::RegexTokenizer.
Added:
incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c
- copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c
incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh
- copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh
incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c
- copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c
incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh
- copied, changed from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh
incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm
- copied, changed from r1078955, incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm
incubator/lucy/trunk/perl/t/154-regex_tokenizer.t
- copied, changed from r1078955, incubator/lucy/trunk/perl/t/154-tokenizer.t
incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t
- copied, changed from r1078955, incubator/lucy/trunk/perl/t/core/154-tokenizer.t
Removed:
incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c
incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh
incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c
incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh
incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm
incubator/lucy/trunk/perl/t/154-tokenizer.t
incubator/lucy/trunk/perl/t/core/154-tokenizer.t
Modified:
incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm
incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh
incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c
incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh
incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c
incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c
incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c
incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c
incubator/lucy/trunk/core/Lucy/Test/TestSchema.c
incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
incubator/lucy/trunk/perl/lib/Lucy.pm
incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm
incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod
incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod
incubator/lucy/trunk/perl/lib/Lucy/Test.pm
incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t
incubator/lucy/trunk/perl/t/156-snowball_stemmer.t
incubator/lucy/trunk/perl/t/213-segment_merging.t
incubator/lucy/trunk/perl/t/214-spec_field.t
incubator/lucy/trunk/perl/t/215-term_vectors.t
incubator/lucy/trunk/perl/t/218-del_merging.t
incubator/lucy/trunk/perl/t/219-byte_buf_doc.t
incubator/lucy/trunk/perl/t/220-zlib_doc.t
incubator/lucy/trunk/perl/t/221-sort_writer.t
incubator/lucy/trunk/perl/t/302-many_fields.t
incubator/lucy/trunk/perl/t/303-highlighter.t
incubator/lucy/trunk/perl/t/304-verify_utf8.t
incubator/lucy/trunk/perl/t/311-hl_selection.t
incubator/lucy/trunk/perl/t/400-match_posting.t
incubator/lucy/trunk/perl/t/504-similarity.t
incubator/lucy/trunk/perl/t/510-remote_search.t
incubator/lucy/trunk/perl/t/511-sort_spec.t
incubator/lucy/trunk/perl/t/515-range_query.t
incubator/lucy/trunk/perl/t/601-queryparser.t
incubator/lucy/trunk/perl/t/602-boosts.t
incubator/lucy/trunk/perl/t/604-simple_search.t
incubator/lucy/trunk/perl/t/605-store_pos_boost.t
incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t
incubator/lucy/trunk/perl/t/611-queryparser_syntax.t
incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c
Modified: incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm (original)
+++ incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Perl.pm Fri Mar 11 23:35:35 2011
@@ -338,7 +338,7 @@ my %ks_compat = (
[qw( KinoSearch::Plan::BlobType KinoSearch::FieldType::BlobType )],
'Lucy::Analysis::PolyAnalyzer' =>
[qw( KinoSearch::Analysis::PolyAnalyzer )],
- 'Lucy::Analysis::Tokenizer' => [qw( KinoSearch::Analysis::Tokenizer )],
+ 'Lucy::Analysis::RegexTokenizer' => [qw( KinoSearch::Analysis::Tokenizer )],
'Lucy::Analysis::CaseFolder' => [
qw( KinoSearch::Analysis::CaseFolder KinoSearch::Analysis::LCNormalizer )
],
Modified: incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Analyzer.cfh Fri Mar 11 23:35:35 2011
@@ -20,7 +20,7 @@ parcel Lucy;
*
* An Analyzer is a filter which processes text, transforming it from one form
* into another. For instance, an analyzer might break up a long text into
- * smaller pieces (L<Tokenizer|Lucy::Analysis::Tokenizer>), or it might
+ * smaller pieces (L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>), or it might
* perform case folding to facilitate case-insensitive search
* (L<CaseFolder|Lucy::Analysis::CaseFolder>).
*/
Modified: incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.c Fri Mar 11 23:35:35 2011
@@ -22,7 +22,7 @@
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Analysis/SnowballStemmer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
PolyAnalyzer*
PolyAnalyzer_new(const CharBuf *language, VArray *analyzers)
@@ -45,7 +45,7 @@ PolyAnalyzer_init(PolyAnalyzer *self, co
else if (language) {
self->analyzers = VA_new(3);
VA_Push(self->analyzers, (Obj*)CaseFolder_new());
- VA_Push(self->analyzers, (Obj*)Tokenizer_new(NULL));
+ VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
}
else {
Modified: incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/PolyAnalyzer.cfh Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ parcel Lucy;
* either provide the Analyzers yourself, or you can specify a supported
* language, in which case a PolyAnalyzer consisting of a
* L<CaseFolder|Lucy::Analysis::CaseFolder>, a
- * L<Tokenizer|Lucy::Analysis::Tokenizer>, and a
+ * L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>, and a
* L<SnowballStemmer|Lucy::Analysis::SnowballStemmer> will be generated for you.
*
* Supported languages:
@@ -55,7 +55,7 @@ class Lucy::Analysis::PolyAnalyzer
/**
* @param language An ISO code from the list of supported languages.
* @param analyzers An array of Analyzers. The order of the analyzers
- * matters. Don't put a SnowballStemmer before a Tokenizer (can't stem whole
+ * matters. Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
* documents or paragraphs -- just individual words), or a SnowballStopFilter
* after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
* stoplist). In general, the sequence should be: normalize, tokenize,
Copied: incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c (from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c?p2=incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c&p1=incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,71 +14,72 @@
* limitations under the License.
*/
-#define C_LUCY_TOKENIZER
+#define C_LUCY_REGEXTOKENIZER
#define C_LUCY_TOKEN
#include "Lucy/Util/ToolSet.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
-Tokenizer*
-Tokenizer_new(const CharBuf *pattern)
+RegexTokenizer*
+RegexTokenizer_new(const CharBuf *pattern)
{
- Tokenizer *self = (Tokenizer*)VTable_Make_Obj(TOKENIZER);
- return Tokenizer_init(self, pattern);
+ RegexTokenizer *self = (RegexTokenizer*)VTable_Make_Obj(REGEXTOKENIZER);
+ return RegexTokenizer_init(self, pattern);
}
Inversion*
-Tokenizer_transform(Tokenizer *self, Inversion *inversion)
+RegexTokenizer_transform(RegexTokenizer *self, Inversion *inversion)
{
Inversion *new_inversion = Inversion_new(NULL);
Token *token;
while (NULL != (token = Inversion_Next(inversion))) {
- Tokenizer_Tokenize_Str(self, token->text, token->len, new_inversion);
+ RegexTokenizer_Tokenize_Str(self, token->text, token->len,
+ new_inversion);
}
return new_inversion;
}
Inversion*
-Tokenizer_transform_text(Tokenizer *self, CharBuf *text)
+RegexTokenizer_transform_text(RegexTokenizer *self, CharBuf *text)
{
Inversion *new_inversion = Inversion_new(NULL);
- Tokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text), CB_Get_Size(text),
- new_inversion);
+ RegexTokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text),
+ CB_Get_Size(text), new_inversion);
return new_inversion;
}
Obj*
-Tokenizer_dump(Tokenizer *self)
+RegexTokenizer_dump(RegexTokenizer *self)
{
- Tokenizer_dump_t super_dump
- = (Tokenizer_dump_t)SUPER_METHOD(TOKENIZER, Tokenizer, Dump);
+ RegexTokenizer_dump_t super_dump
+ = (RegexTokenizer_dump_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Dump);
Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
Hash_Store_Str(dump, "pattern", 7, CB_Dump(self->pattern));
return (Obj*)dump;
}
-Tokenizer*
-Tokenizer_load(Tokenizer *self, Obj *dump)
+RegexTokenizer*
+RegexTokenizer_load(RegexTokenizer *self, Obj *dump)
{
Hash *source = (Hash*)CERTIFY(dump, HASH);
- Tokenizer_load_t super_load
- = (Tokenizer_load_t)SUPER_METHOD(TOKENIZER, Tokenizer, Load);
- Tokenizer *loaded = super_load(self, dump);
+ RegexTokenizer_load_t super_load
+ = (RegexTokenizer_load_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Load);
+ RegexTokenizer *loaded = super_load(self, dump);
CharBuf *pattern = (CharBuf*)CERTIFY(
Hash_Fetch_Str(source, "pattern", 7), CHARBUF);
- return Tokenizer_init(loaded, pattern);
+ return RegexTokenizer_init(loaded, pattern);
}
bool_t
-Tokenizer_equals(Tokenizer *self, Obj *other)
+RegexTokenizer_equals(RegexTokenizer *self, Obj *other)
{
- Tokenizer *const evil_twin = (Tokenizer*)other;
+ RegexTokenizer *const evil_twin = (RegexTokenizer*)other;
if (evil_twin == self) return true;
- if (!Obj_Is_A(other, TOKENIZER)) return false;
+ if (!Obj_Is_A(other, REGEXTOKENIZER)) return false;
if (!CB_Equals(evil_twin->pattern, (Obj*)self->pattern)) return false;
return true;
}
Copied: incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh (from r1078955, incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh?p2=incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh&p1=incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Tokenizer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/RegexTokenizer.cfh Fri Mar 11 23:35:35 2011
@@ -22,7 +22,7 @@ parcel Lucy;
* array of "tokens". For instance, the string "three blind mice" might be
* tokenized into "three", "blind", "mice".
*
- * Lucy::Analysis::Tokenizer decides where it should break up the text
+ * Lucy::Analysis::RegexTokenizer decides where it should break up the text
* based on a regular expression compiled from a supplied <code>pattern</code>
* matching one token. If our source string is...
*
@@ -47,13 +47,13 @@ parcel Lucy;
* ... the difference being that the word character tokenizer skips over
* punctuation as well as whitespace when determining token boundaries.
*/
-class Lucy::Analysis::Tokenizer
+class Lucy::Analysis::RegexTokenizer
inherits Lucy::Analysis::Analyzer {
CharBuf *pattern;
void *token_re;
- inert incremented Tokenizer*
+ inert incremented RegexTokenizer*
new(const CharBuf *pattern = NULL);
/**
@@ -62,39 +62,39 @@ class Lucy::Analysis::Tokenizer
* <code>\w+(?:[\x{2019}']\w+)*</code>, which matches "it's" as well as
* "it" and "O'Henry's" as well as "Henry".
*/
- public inert Tokenizer*
- init(Tokenizer *self, const CharBuf *pattern = NULL);
+ public inert RegexTokenizer*
+ init(RegexTokenizer *self, const CharBuf *pattern = NULL);
public incremented Inversion*
- Transform(Tokenizer *self, Inversion *inversion);
+ Transform(RegexTokenizer *self, Inversion *inversion);
public incremented Inversion*
- Transform_Text(Tokenizer *self, CharBuf *text);
+ Transform_Text(RegexTokenizer *self, CharBuf *text);
/** Tokenize the supplied string and add any Tokens generated to the
* supplied Inversion.
*/
void
- Tokenize_Str(Tokenizer *self, const char *text, size_t len,
+ Tokenize_Str(RegexTokenizer *self, const char *text, size_t len,
Inversion *inversion);
/** Set the compiled regular expression for matching a token. Also sets
* <code>pattern</code> as a side effect.
*/
void
- Set_Token_RE(Tokenizer *self, void *token_re);
+ Set_Token_RE(RegexTokenizer *self, void *token_re);
public incremented Obj*
- Dump(Tokenizer *self);
+ Dump(RegexTokenizer *self);
- public incremented Tokenizer*
- Load(Tokenizer *self, Obj *dump);
+ public incremented RegexTokenizer*
+ Load(RegexTokenizer *self, Obj *dump);
public bool_t
- Equals(Tokenizer *self, Obj *other);
+ Equals(RegexTokenizer *self, Obj *other);
public void
- Destroy(Tokenizer *self);
+ Destroy(RegexTokenizer *self);
}
Modified: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestPolyAnalyzer.c Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@
#include "Lucy/Analysis/CaseFolder.h"
#include "Lucy/Analysis/SnowballStopFilter.h"
#include "Lucy/Analysis/SnowballStemmer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
static void
test_Dump_Load_and_Equals(TestBatch *batch)
@@ -60,7 +60,7 @@ test_analysis(TestBatch *batch)
CharBuf *EN = (CharBuf*)ZCB_WRAP_STR("en", 2);
CharBuf *source_text = CB_newf("Eats, shoots and leaves.");
CaseFolder *case_folder = CaseFolder_new();
- Tokenizer *tokenizer = Tokenizer_new(NULL);
+ RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
SnowballStopFilter *stopfilter = SnowStop_new(EN, NULL);
SnowballStemmer *stemmer = SnowStemmer_new(EN);
@@ -100,7 +100,7 @@ test_analysis(TestBatch *batch)
VA_Push(expected, (Obj*)CB_newf("and"));
VA_Push(expected, (Obj*)CB_newf("leaves"));
TestUtils_test_analyzer(batch, (Analyzer*)polyanalyzer, source_text,
- expected, "With Tokenizer");
+ expected, "With RegexTokenizer");
DECREF(expected);
DECREF(polyanalyzer);
DECREF(analyzers);
Copied: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c (from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c?p2=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c&p1=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#define C_LUCY_TESTTOKENIZER
+#define C_LUCY_TESTREGEXTOKENIZER
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Test.h"
-#include "Lucy/Test/Analysis/TestTokenizer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Test/Analysis/TestRegexTokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
static void
@@ -27,22 +27,22 @@ test_Dump_Load_and_Equals(TestBatch *bat
{
ZombieCharBuf *word_char_pattern = ZCB_WRAP_STR("\\w+", 3);
ZombieCharBuf *whitespace_pattern = ZCB_WRAP_STR("\\S+", 3);
- Tokenizer *word_char_tokenizer =
- Tokenizer_new((CharBuf*)word_char_pattern);
- Tokenizer *whitespace_tokenizer =
- Tokenizer_new((CharBuf*)whitespace_pattern);
- Obj *word_char_dump = Tokenizer_Dump(word_char_tokenizer);
- Obj *whitespace_dump = Tokenizer_Dump(whitespace_tokenizer);
- Tokenizer *word_char_clone
- = Tokenizer_Load(whitespace_tokenizer, word_char_dump);
- Tokenizer *whitespace_clone
- = Tokenizer_Load(whitespace_tokenizer, whitespace_dump);
+ RegexTokenizer *word_char_tokenizer =
+ RegexTokenizer_new((CharBuf*)word_char_pattern);
+ RegexTokenizer *whitespace_tokenizer =
+ RegexTokenizer_new((CharBuf*)whitespace_pattern);
+ Obj *word_char_dump = RegexTokenizer_Dump(word_char_tokenizer);
+ Obj *whitespace_dump = RegexTokenizer_Dump(whitespace_tokenizer);
+ RegexTokenizer *word_char_clone
+ = RegexTokenizer_Load(whitespace_tokenizer, word_char_dump);
+ RegexTokenizer *whitespace_clone
+ = RegexTokenizer_Load(whitespace_tokenizer, whitespace_dump);
- TEST_FALSE(batch, Tokenizer_Equals(word_char_tokenizer,
+ TEST_FALSE(batch, RegexTokenizer_Equals(word_char_tokenizer,
(Obj*)whitespace_tokenizer), "Equals() false with different pattern");
- TEST_TRUE(batch, Tokenizer_Equals(word_char_tokenizer,
+ TEST_TRUE(batch, RegexTokenizer_Equals(word_char_tokenizer,
(Obj*)word_char_clone), "Dump => Load round trip");
- TEST_TRUE(batch, Tokenizer_Equals(whitespace_tokenizer,
+ TEST_TRUE(batch, RegexTokenizer_Equals(whitespace_tokenizer,
(Obj*)whitespace_clone), "Dump => Load round trip");
DECREF(word_char_tokenizer);
@@ -54,7 +54,7 @@ test_Dump_Load_and_Equals(TestBatch *bat
}
void
-TestTokenizer_run_tests()
+TestRegexTokenizer_run_tests()
{
TestBatch *batch = TestBatch_new(3);
Copied: incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh (from r1078955, incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh?p2=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh&p1=incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Analysis/TestTokenizer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Analysis/TestRegexTokenizer.cfh Fri Mar 11 23:35:35 2011
@@ -16,7 +16,7 @@
parcel Lucy;
-inert class Lucy::Test::Analysis::TestTokenizer {
+inert class Lucy::Test::Analysis::TestRegexTokenizer {
inert void
run_tests();
}
Modified: incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Plan/TestBlobType.c Fri Mar 11 23:35:35 2011
@@ -21,7 +21,7 @@
#include "Lucy/Test/Plan/TestBlobType.h"
#include "Lucy/Test/TestUtils.h"
#include "Lucy/Plan/BlobType.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
static void
test_Dump_Load_and_Equals(TestBatch *batch)
Modified: incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Plan/TestFullTextType.c Fri Mar 11 23:35:35 2011
@@ -22,22 +22,22 @@
#include "Lucy/Test/TestUtils.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Analysis/CaseFolder.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
static void
test_Dump_Load_and_Equals(TestBatch *batch)
{
- Tokenizer *tokenizer = Tokenizer_new(NULL);
- CaseFolder *case_folder = CaseFolder_new();
- FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
- FullTextType *other = FullTextType_new((Analyzer*)case_folder);
- FullTextType *boost_differs = FullTextType_new((Analyzer*)tokenizer);
- FullTextType *not_indexed = FullTextType_new((Analyzer*)tokenizer);
- FullTextType *not_stored = FullTextType_new((Analyzer*)tokenizer);
- FullTextType *highlightable = FullTextType_new((Analyzer*)tokenizer);
- Obj *dump = (Obj*)FullTextType_Dump(type);
- Obj *clone = Obj_Load(dump, dump);
- Obj *another_dump = (Obj*)FullTextType_Dump_For_Schema(type);
+ RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
+ CaseFolder *case_folder = CaseFolder_new();
+ FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType *other = FullTextType_new((Analyzer*)case_folder);
+ FullTextType *boost_differs = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType *not_indexed = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType *not_stored = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType *highlightable = FullTextType_new((Analyzer*)tokenizer);
+ Obj *dump = (Obj*)FullTextType_Dump(type);
+ Obj *clone = Obj_Load(dump, dump);
+ Obj *another_dump = (Obj*)FullTextType_Dump_For_Schema(type);
FullTextType_Set_Boost(boost_differs, 1.5);
FullTextType_Set_Indexed(not_indexed, false);
@@ -80,10 +80,10 @@ test_Dump_Load_and_Equals(TestBatch *bat
static void
test_Compare_Values(TestBatch *batch)
{
- Tokenizer *tokenizer = Tokenizer_new(NULL);
- FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
- ZombieCharBuf *a = ZCB_WRAP_STR("a", 1);
- ZombieCharBuf *b = ZCB_WRAP_STR("b", 1);
+ RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
+ FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
+ ZombieCharBuf *a = ZCB_WRAP_STR("a", 1);
+ ZombieCharBuf *b = ZCB_WRAP_STR("b", 1);
TEST_TRUE(batch,
FullTextType_Compare_Values(type, (Obj*)a, (Obj*)b) < 0,
Modified: incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/TestQueryParserLogic.c Fri Mar 11 23:35:35 2011
@@ -26,7 +26,7 @@
#include "Lucy/Test/TestSchema.h"
#include "Lucy/Test/TestUtils.h"
#include "Lucy/Analysis/Analyzer.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Index/Indexer.h"
#include "Lucy/Search/Hits.h"
Modified: incubator/lucy/trunk/core/Lucy/Test/TestSchema.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/TestSchema.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/TestSchema.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/TestSchema.c Fri Mar 11 23:35:35 2011
@@ -21,7 +21,7 @@
#include "Lucy/Test/Plan/TestArchitecture.h"
#include "Lucy/Test/TestSchema.h"
#include "Lucy/Analysis/CaseFolder.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Architecture.h"
@@ -35,7 +35,7 @@ TestSchema_new()
TestSchema*
TestSchema_init(TestSchema *self)
{
- Tokenizer *tokenizer = Tokenizer_new(NULL);
+ RegexTokenizer *tokenizer = RegexTokenizer_new(NULL);
FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
Schema_init((Schema*)self);
Modified: incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm (original)
+++ incubator/lucy/trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm Fri Mar 11 23:35:35 2011
@@ -122,13 +122,13 @@ END_REPORT
}
package BenchSchema::WhiteSpaceTokenizer;
-use base qw( Lucy::Analysis::Tokenizer );
+use base qw( Lucy::Analysis::RegexTokenizer );
sub new { return shift->SUPER::new( pattern => '\S+' ) }
package BenchSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new;
Modified: incubator/lucy/trunk/perl/lib/Lucy.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy.pm Fri Mar 11 23:35:35 2011
@@ -100,7 +100,7 @@ sub error {$Lucy::Object::Err::error}
}
{
- package Lucy::Analysis::Tokenizer;
+ package Lucy::Analysis::RegexTokenizer;
sub compile_token_re { return qr/$_[1]/ }
Modified: incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Analysis/PolyAnalyzer.pm Fri Mar 11 23:35:35 2011
@@ -42,7 +42,7 @@ my $constructor = <<'END_CONSTRUCTOR';
# or...
my $case_folder = Lucy::Analysis::CaseFolder->new;
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $stemmer = Lucy::Analysis::SnowballStemmer->new( language => 'en' );
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [ $case_folder, $whitespace_tokenizer, $stemmer, ], );
Copied: incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm (from r1078955, incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm?p2=incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm&p1=incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Analysis/Tokenizer.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Analysis/RegexTokenizer.pm Fri Mar 11 23:35:35 2011
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-package Lucy::Analysis::Tokenizer;
+package Lucy::Analysis::RegexTokenizer;
use Lucy;
1;
@@ -24,14 +24,14 @@ __BINDING__
my $synopsis = <<'END_SYNOPSIS';
my $whitespace_tokenizer
- = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+ = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
# or...
my $word_char_tokenizer
- = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
+ = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
# or...
- my $apostrophising_tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $apostrophising_tokenizer = Lucy::Analysis::RegexTokenizer->new;
# Then... once you have a tokenizer, put it into a PolyAnalyzer:
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
@@ -39,14 +39,14 @@ my $synopsis = <<'END_SYNOPSIS';
END_SYNOPSIS
my $constructor = <<'END_CONSTRUCTOR';
- my $word_char_tokenizer = Lucy::Analysis::Tokenizer->new(
+ my $word_char_tokenizer = Lucy::Analysis::RegexTokenizer->new(
pattern => '\w+', # required
);
END_CONSTRUCTOR
Clownfish::Binding::Perl::Class->register(
parcel => "Lucy",
- class_name => "Lucy::Analysis::Tokenizer",
+ class_name => "Lucy::Analysis::RegexTokenizer",
bind_constructors => ["_new"],
make_pod => {
constructor => { sample => $constructor },
Modified: incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Docs/Cookbook/CustomQuery.pod Fri Mar 11 23:35:35 2011
@@ -305,7 +305,7 @@ prefix conflation -- is not perfectly co
# Polyanalyzer with no SnowballStemmer.
my $analyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [
- Lucy::Analysis::Tokenizer->new,
+ Lucy::Analysis::RegexTokenizer->new,
Lucy::Analysis::CaseFolder->new,
],
);
Modified: incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Docs/Tutorial/Analysis.pod Fri Mar 11 23:35:35 2011
@@ -19,9 +19,9 @@ Lucy::Docs::Tutorial::Analysis - How to
=head1 DESCRIPTION
-Try swapping out the PolyAnalyzer in our Schema for a Tokenizer:
+Try swapping out the PolyAnalyzer in our Schema for a RegexTokenizer:
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $type = Lucy::Plan::FullTextType->new(
analyzer => $tokenizer,
);
@@ -30,13 +30,13 @@ Search for C<senate>, C<Senate>, and C<S
change and re-indexing.
Under PolyAnalyzer, the results are identical for all three searches, but
-under Tokenizer, searches are case-sensitive, and the result sets for
+under RegexTokenizer, searches are case-sensitive, and the result sets for
C<Senate> and C<Senator> are distinct.
=head2 PolyAnalyzer
What's happening is that PolyAnalyzer is performing more aggressive processing
-than Tokenizer. In addition to tokenizing, it's also converting all text to
+than RegexTokenizer. In addition to tokenizing, it's also converting all text to
lower case so that searches are case-insensitive, and using a "stemming"
algorithm to reduce related words to a common stem (C<senat>, in this case).
@@ -45,7 +45,7 @@ In this case, it's three-in-one, since s
C<< language => 'en' >> is equivalent to this snippet:
my $case_folder = Lucy::Analysis::CaseFolder->new;
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $stemmer = Lucy::Analysis::SnowballStemmer->new( language => 'en' );
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [ $case_folder, $tokenizer, $stemmer ],
Modified: incubator/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Test.pm?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Test.pm Fri Mar 11 23:35:35 2011
@@ -58,8 +58,8 @@ PPCODE:
else if (strEQ(package, "TestSnowStemmer")) {
lucy_TestSnowStemmer_run_tests();
}
- else if (strEQ(package, "TestTokenizer")) {
- lucy_TestTokenizer_run_tests();
+ else if (strEQ(package, "TestRegexTokenizer")) {
+ lucy_TestRegexTokenizer_run_tests();
}
// Lucy::Object
else if (strEQ(package, "TestObj")) {
Copied: incubator/lucy/trunk/perl/t/154-regex_tokenizer.t (from r1078955, incubator/lucy/trunk/perl/t/154-tokenizer.t)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/154-regex_tokenizer.t?p2=incubator/lucy/trunk/perl/t/154-regex_tokenizer.t&p1=incubator/lucy/trunk/perl/t/154-tokenizer.t&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/154-tokenizer.t (original)
+++ incubator/lucy/trunk/perl/t/154-regex_tokenizer.t Fri Mar 11 23:35:35 2011
@@ -19,9 +19,9 @@ use warnings;
use Test::More tests => 15;
use Lucy::Test;
-my $tokenizer = Lucy::Analysis::Tokenizer->new;
-my $other = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
-my $yet_another = Lucy::Analysis::Tokenizer->new( pattern => '\w+' );
+my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
+my $other = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
+my $yet_another = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );
ok( $other->equals($yet_another), "Equals" );
ok( !$tokenizer->equals($other), "different patterns foil Equals" );
@@ -41,7 +41,7 @@ is_deeply( \@token_texts, [qw( a b c )],
is_deeply( \@start_offsets, [ 0, 2, 4, ], "correctstart offsets" );
is_deeply( \@end_offsets, [ 1, 3, 5, ], "correct end offsets" );
-$tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '.' );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '.' );
$inversion = Lucy::Analysis::Inversion->new( text => "a b c" );
$inversion = $tokenizer->transform($inversion);
@@ -73,25 +73,25 @@ is_deeply(
"no freakout when fed multiple tokens"
);
-$tokenizer = Lucy::Analysis::Tokenizer->new( token_re => qr/../ );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( token_re => qr/../ );
is_deeply( $tokenizer->split('aabbcc'),
[qw( aa bb cc )], "back compat with token_re argument" );
eval {
my $toke
- = Lucy::Analysis::Tokenizer->new(
+ = Lucy::Analysis::RegexTokenizer->new(
pattern => '\\p{Carp::confess}' );
};
like( $@, qr/\\p/, "\\p forbidden in pattern" );
eval {
my $toke
- = Lucy::Analysis::Tokenizer->new(
+ = Lucy::Analysis::RegexTokenizer->new(
pattern => '\\P{Carp::confess}' );
};
like( $@, qr/\\P/, "\\P forbidden in pattern" );
-$tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\\w+' );
+$tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\\w+' );
my $dump = $tokenizer->dump;
$dump->{pattern} = "\\p{Carp::confess}";
eval { $tokenizer->load($dump) };
Modified: incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t (original)
+++ incubator/lucy/trunk/perl/t/155-snowball_stop_filter.t Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ use Lucy::Test::TestUtils qw( test_analy
my $stopfilter = Lucy::Analysis::SnowballStopFilter->new( language => 'en' );
test_analyzer( $stopfilter, 'the', [], "single stopword stopalized" );
-my $tokenizer = Lucy::Analysis::Tokenizer->new;
+my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [ $tokenizer, $stopfilter ], );
test_analyzer( $polyanalyzer, 'i am the walrus',
Modified: incubator/lucy/trunk/perl/t/156-snowball_stemmer.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/156-snowball_stemmer.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/156-snowball_stemmer.t (original)
+++ incubator/lucy/trunk/perl/t/156-snowball_stemmer.t Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@ my $stemmer = Lucy::Analysis::SnowballSt
test_analyzer( $stemmer, 'ponies', ['poni'], "single word stemmed" );
test_analyzer( $stemmer, 'pony', ['poni'], "stem, not just truncate" );
-my $tokenizer = Lucy::Analysis::Tokenizer->new;
+my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [ $tokenizer, $stemmer ], );
test_analyzer(
Modified: incubator/lucy/trunk/perl/t/213-segment_merging.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/213-segment_merging.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/213-segment_merging.t (original)
+++ incubator/lucy/trunk/perl/t/213-segment_merging.t Fri Mar 11 23:35:35 2011
@@ -36,7 +36,7 @@ use base qw( Lucy::Test::TestSchema );
sub new {
my $self = shift->SUPER::new(@_);
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new,
+ analyzer => Lucy::Analysis::RegexTokenizer->new,
highlightable => 1,
);
$self->spec_field( name => 'content', type => $type );
Modified: incubator/lucy/trunk/perl/t/214-spec_field.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/214-spec_field.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/214-spec_field.t (original)
+++ incubator/lucy/trunk/perl/t/214-spec_field.t Fri Mar 11 23:35:35 2011
@@ -27,7 +27,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $polyanalyzer
= Lucy::Analysis::PolyAnalyzer->new( language => 'en' );
my $plain
Modified: incubator/lucy/trunk/perl/t/215-term_vectors.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/215-term_vectors.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/215-term_vectors.t (original)
+++ incubator/lucy/trunk/perl/t/215-term_vectors.t Fri Mar 11 23:35:35 2011
@@ -25,7 +25,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new,
+ analyzer => Lucy::Analysis::RegexTokenizer->new,
highlightable => 1,
);
$self->spec_field( name => 'content', type => $type );
Modified: incubator/lucy/trunk/perl/t/218-del_merging.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/218-del_merging.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/218-del_merging.t (original)
+++ incubator/lucy/trunk/perl/t/218-del_merging.t Fri Mar 11 23:35:35 2011
@@ -32,7 +32,7 @@ use base 'Lucy::Plan::Schema';
sub new {
my $self = shift->SUPER::new(@_);
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new, );
+ analyzer => Lucy::Analysis::RegexTokenizer->new, );
$self->spec_field( name => 'foo', type => $type );
$self->spec_field( name => 'bar', type => $type );
return $self;
Modified: incubator/lucy/trunk/perl/t/219-byte_buf_doc.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/219-byte_buf_doc.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/219-byte_buf_doc.t (original)
+++ incubator/lucy/trunk/perl/t/219-byte_buf_doc.t Fri Mar 11 23:35:35 2011
@@ -63,7 +63,7 @@ sub architecture { MyArchitecture->new }
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $type = Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
$self->spec_field( name => 'value', type => $type );
return $self;
Modified: incubator/lucy/trunk/perl/t/220-zlib_doc.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/220-zlib_doc.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/220-zlib_doc.t (original)
+++ incubator/lucy/trunk/perl/t/220-zlib_doc.t Fri Mar 11 23:35:35 2011
@@ -59,7 +59,7 @@ sub architecture { MyArchitecture->new }
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $main_type
= Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
my $unstored_type = Lucy::Plan::FullTextType->new(
Modified: incubator/lucy/trunk/perl/t/221-sort_writer.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/221-sort_writer.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/221-sort_writer.t (original)
+++ incubator/lucy/trunk/perl/t/221-sort_writer.t Fri Mar 11 23:35:35 2011
@@ -30,7 +30,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $fulltext_type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new,
+ analyzer => Lucy::Analysis::RegexTokenizer->new,
sortable => 1,
);
my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
Modified: incubator/lucy/trunk/perl/t/302-many_fields.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/302-many_fields.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/302-many_fields.t (original)
+++ incubator/lucy/trunk/perl/t/302-many_fields.t Fri Mar 11 23:35:35 2011
@@ -18,7 +18,7 @@ use warnings;
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
our %fields = ();
@@ -29,7 +29,7 @@ use Lucy::Test;
my $schema = MySchema->new;
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new, );
+ analyzer => Lucy::Analysis::RegexTokenizer->new, );
for my $num_fields ( 1 .. 10 ) {
# Build an index with $num_fields fields, and the same content in each.
Modified: incubator/lucy/trunk/perl/t/303-highlighter.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/303-highlighter.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/303-highlighter.t (original)
+++ incubator/lucy/trunk/perl/t/303-highlighter.t Fri Mar 11 23:35:35 2011
@@ -19,12 +19,12 @@ use lib 'buildlib';
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $plain_type = Lucy::Plan::FullTextType->new(
analyzer => $tokenizer,
highlightable => 1,
Modified: incubator/lucy/trunk/perl/t/304-verify_utf8.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/304-verify_utf8.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/304-verify_utf8.t (original)
+++ incubator/lucy/trunk/perl/t/304-verify_utf8.t Fri Mar 11 23:35:35 2011
@@ -19,11 +19,11 @@ use lib 'buildlib';
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
- my $analyzer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+ my $analyzer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
my $type = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
$self->spec_field( name => 'content', type => $type );
return $self;
Modified: incubator/lucy/trunk/perl/t/311-hl_selection.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/311-hl_selection.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/311-hl_selection.t (original)
+++ incubator/lucy/trunk/perl/t/311-hl_selection.t Fri Mar 11 23:35:35 2011
@@ -23,7 +23,7 @@ my $schema = Lucy::Plan::Schema->new;
$schema->spec_field(
name => 'content',
type => Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new,
+ analyzer => Lucy::Analysis::RegexTokenizer->new,
highlightable => 1,
),
);
Modified: incubator/lucy/trunk/perl/t/400-match_posting.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/400-match_posting.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/400-match_posting.t (original)
+++ incubator/lucy/trunk/perl/t/400-match_posting.t Fri Mar 11 23:35:35 2011
@@ -34,12 +34,12 @@ sub make_similarity { MatchOnlySim->new
package MatchSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $type = MatchSchema::MatchOnly->new(
- analyzer => Lucy::Analysis::Tokenizer->new );
+ analyzer => Lucy::Analysis::RegexTokenizer->new );
$self->spec_field( name => 'content', type => $type );
return $self;
}
Modified: incubator/lucy/trunk/perl/t/504-similarity.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/504-similarity.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/504-similarity.t (original)
+++ incubator/lucy/trunk/perl/t/504-similarity.t Fri Mar 11 23:35:35 2011
@@ -24,11 +24,11 @@ sub make_similarity { LucyX::Index::Long
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
- my $analyzer = Lucy::Analysis::Tokenizer->new;
+ my $analyzer = Lucy::Analysis::RegexTokenizer->new;
my $plain_type
= Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
my $long_field_type
Modified: incubator/lucy/trunk/perl/t/510-remote_search.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/510-remote_search.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/510-remote_search.t (original)
+++ incubator/lucy/trunk/perl/t/510-remote_search.t Fri Mar 11 23:35:35 2011
@@ -32,12 +32,12 @@ BEGIN {
package SortSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $plain_type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new );
+ analyzer => Lucy::Analysis::RegexTokenizer->new );
my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
$self->spec_field( name => 'content', type => $plain_type );
$self->spec_field( name => 'number', type => $string_type );
Modified: incubator/lucy/trunk/perl/t/511-sort_spec.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/511-sort_spec.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/511-sort_spec.t (original)
+++ incubator/lucy/trunk/perl/t/511-sort_spec.t Fri Mar 11 23:35:35 2011
@@ -38,7 +38,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $unsortable = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new, );
+ analyzer => Lucy::Analysis::RegexTokenizer->new, );
my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
my $int32_type = Lucy::Plan::Int32Type->new(
indexed => 0,
Modified: incubator/lucy/trunk/perl/t/515-range_query.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/515-range_query.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/515-range_query.t (original)
+++ incubator/lucy/trunk/perl/t/515-range_query.t Fri Mar 11 23:35:35 2011
@@ -24,7 +24,7 @@ use Lucy::Test;
package RangeSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
Modified: incubator/lucy/trunk/perl/t/601-queryparser.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/601-queryparser.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/601-queryparser.t (original)
+++ incubator/lucy/trunk/perl/t/601-queryparser.t Fri Mar 11 23:35:35 2011
@@ -21,11 +21,11 @@ use Lucy::Test;
package PlainSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
my $type = Lucy::Plan::FullTextType->new( analyzer => $tokenizer, );
$self->spec_field( name => 'content', type => $type );
return $self;
@@ -37,7 +37,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $whitespace_tokenizer
- = Lucy::Analysis::Tokenizer->new( token_re => qr/\S+/ );
+ = Lucy::Analysis::RegexTokenizer->new( token_re => qr/\S+/ );
my $stopfilter
= Lucy::Analysis::SnowballStopFilter->new( stoplist => { x => 1 } );
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
Modified: incubator/lucy/trunk/perl/t/602-boosts.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/602-boosts.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/602-boosts.t (original)
+++ incubator/lucy/trunk/perl/t/602-boosts.t Fri Mar 11 23:35:35 2011
@@ -25,7 +25,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new );
+ analyzer => Lucy::Analysis::RegexTokenizer->new );
$self->spec_field( name => 'content', type => $type );
$self->spec_field( name => 'category', type => $type );
return $self;
@@ -36,7 +36,7 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new;
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $plain_type
= Lucy::Plan::FullTextType->new( analyzer => $tokenizer );
my $boosted_type = Lucy::Plan::FullTextType->new(
Modified: incubator/lucy/trunk/perl/t/604-simple_search.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/604-simple_search.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/604-simple_search.t (original)
+++ incubator/lucy/trunk/perl/t/604-simple_search.t Fri Mar 11 23:35:35 2011
@@ -18,12 +18,12 @@ use warnings;
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new, );
+ analyzer => Lucy::Analysis::RegexTokenizer->new, );
$self->spec_field( name => 'title', type => $type );
$self->spec_field( name => 'body', type => $type );
return $self;
@@ -56,7 +56,7 @@ $indexer->commit;
my $searcher = Lucy::Search::IndexSearcher->new( index => $folder );
-my $tokenizer = Lucy::Analysis::Tokenizer->new;
+my $tokenizer = Lucy::Analysis::RegexTokenizer->new;
my $or_parser = Lucy::Search::QueryParser->new(
schema => $schema,
analyzer => $tokenizer,
Modified: incubator/lucy/trunk/perl/t/605-store_pos_boost.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/605-store_pos_boost.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/605-store_pos_boost.t (original)
+++ incubator/lucy/trunk/perl/t/605-store_pos_boost.t Fri Mar 11 23:35:35 2011
@@ -17,7 +17,7 @@ use strict;
use warnings;
use lib 'buildlib';
-package MyTokenizer;
+package MyRegexTokenizer;
use base qw( Lucy::Analysis::Analyzer );
use Lucy::Analysis::Inversion;
@@ -68,14 +68,14 @@ sub make_similarity { RichSim->new }
package MySchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $plain_type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new );
+ analyzer => Lucy::Analysis::RegexTokenizer->new );
my $boosted_type
- = MySchema::boosted->new( analyzer => MyTokenizer->new, );
+ = MySchema::boosted->new( analyzer => MyRegexTokenizer->new, );
$self->spec_field( name => 'plain', type => $plain_type );
$self->spec_field( name => 'boosted', type => $boosted_type );
return $self;
Modified: incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t (original)
+++ incubator/lucy/trunk/perl/t/607-queryparser_multi_field.t Fri Mar 11 23:35:35 2011
@@ -19,12 +19,12 @@ use lib 'buildlib';
package MultiFieldSchema;
use base qw( Lucy::Plan::Schema );
-use Lucy::Analysis::Tokenizer;
+use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $plain_type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::Tokenizer->new );
+ analyzer => Lucy::Analysis::RegexTokenizer->new );
my $not_analyzed_type = Lucy::Plan::StringType->new;
$self->spec_field( name => 'a', type => $plain_type );
$self->spec_field( name => 'b', type => $plain_type );
Modified: incubator/lucy/trunk/perl/t/611-queryparser_syntax.t
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/611-queryparser_syntax.t?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/611-queryparser_syntax.t (original)
+++ incubator/lucy/trunk/perl/t/611-queryparser_syntax.t Fri Mar 11 23:35:35 2011
@@ -24,9 +24,9 @@ use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
- my $tokenizer = Lucy::Analysis::Tokenizer->new( pattern => '\S+' );
+ my $tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
my $wordchar_tokenizer
- = Lucy::Analysis::Tokenizer->new( pattern => '\w+', );
+ = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+', );
my $stopfilter
= Lucy::Analysis::SnowballStopFilter->new( stoplist => { x => 1 } );
my $fancy_analyzer = Lucy::Analysis::PolyAnalyzer->new(
Copied: incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t (from r1078955, incubator/lucy/trunk/perl/t/core/154-tokenizer.t)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t?p2=incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t&p1=incubator/lucy/trunk/perl/t/core/154-tokenizer.t&r1=1078955&r2=1080795&rev=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/t/core/154-tokenizer.t (original)
+++ incubator/lucy/trunk/perl/t/core/154-regex_tokenizer.t Fri Mar 11 23:35:35 2011
@@ -17,5 +17,5 @@ use strict;
use warnings;
use Lucy::Test;
-Lucy::Test::run_tests("TestTokenizer");
+Lucy::Test::run_tests("TestRegexTokenizer");
Modified: incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c?rev=1080795&r1=1080794&r2=1080795&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c (original)
+++ incubator/lucy/trunk/perl/xs/Lucy/Analysis/Tokenizer.c Fri Mar 11 23:35:35 2011
@@ -14,11 +14,11 @@
* limitations under the License.
*/
-#define C_LUCY_TOKENIZER
+#define C_LUCY_REGEXTOKENIZER
#define C_LUCY_TOKEN
#include "XSBind.h"
-#include "Lucy/Analysis/Tokenizer.h"
+#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Object/Host.h"
@@ -26,13 +26,14 @@
#include "Lucy/Util/StringHelper.h"
static void
-S_set_token_re_but_not_pattern(lucy_Tokenizer *self, void *token_re);
+S_set_token_re_but_not_pattern(lucy_RegexTokenizer *self, void *token_re);
static void
-S_set_pattern_from_token_re(lucy_Tokenizer *self, void *token_re);
+S_set_pattern_from_token_re(lucy_RegexTokenizer *self, void *token_re);
-lucy_Tokenizer*
-lucy_Tokenizer_init(lucy_Tokenizer *self, const lucy_CharBuf *pattern)
+lucy_RegexTokenizer*
+lucy_RegexTokenizer_init(lucy_RegexTokenizer *self,
+ const lucy_CharBuf *pattern)
{
SV *token_re_sv;
@@ -53,7 +54,7 @@ lucy_Tokenizer_init(lucy_Tokenizer *self
}
// Acquire a compiled regex engine for matching one token.
- token_re_sv = (SV*)lucy_Host_callback_host(LUCY_TOKENIZER,
+ token_re_sv = (SV*)lucy_Host_callback_host(LUCY_REGEXTOKENIZER,
"compile_token_re", 1, CFISH_ARG_STR("pattern", self->pattern));
S_set_token_re_but_not_pattern(self, SvRV(token_re_sv));
@@ -61,7 +62,7 @@ lucy_Tokenizer_init(lucy_Tokenizer *self
}
static void
-S_set_token_re_but_not_pattern(lucy_Tokenizer *self, void *token_re)
+S_set_token_re_but_not_pattern(lucy_RegexTokenizer *self, void *token_re)
{
MAGIC *magic = NULL;
REGEXP *rx;
@@ -84,7 +85,7 @@ S_set_token_re_but_not_pattern(lucy_Toke
}
static void
-S_set_pattern_from_token_re(lucy_Tokenizer *self, void *token_re)
+S_set_pattern_from_token_re(lucy_RegexTokenizer *self, void *token_re)
{
SV *rv = newRV((SV*)token_re);
STRLEN len = 0;
@@ -94,7 +95,7 @@ S_set_pattern_from_token_re(lucy_Tokeniz
}
void
-lucy_Tokenizer_set_token_re(lucy_Tokenizer *self, void *token_re)
+lucy_RegexTokenizer_set_token_re(lucy_RegexTokenizer *self, void *token_re)
{
S_set_token_re_but_not_pattern(self, token_re);
// Set pattern as a side effect.
@@ -102,16 +103,17 @@ lucy_Tokenizer_set_token_re(lucy_Tokeniz
}
void
-lucy_Tokenizer_destroy(lucy_Tokenizer *self)
+lucy_RegexTokenizer_destroy(lucy_RegexTokenizer *self)
{
LUCY_DECREF(self->pattern);
ReREFCNT_dec(((REGEXP*)self->token_re));
- LUCY_SUPER_DESTROY(self, LUCY_TOKENIZER);
+ LUCY_SUPER_DESTROY(self, LUCY_REGEXTOKENIZER);
}
void
-lucy_Tokenizer_tokenize_str(lucy_Tokenizer *self, const char *string,
- size_t string_len, lucy_Inversion *inversion)
+lucy_RegexTokenizer_tokenize_str(lucy_RegexTokenizer *self,
+ const char *string, size_t string_len,
+ lucy_Inversion *inversion)
{
uint32_t num_code_points = 0;
SV *wrapper = sv_newmortal();