You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2013/07/17 16:12:23 UTC
[lucy-commits] [03/34] git commit: refs/heads/master - Migrate Lucy's analysis
classes to IVARS.
Migrate Lucy's analysis classes to IVARS.
Change all Lucy's analysis classes to access instance vars via an
IVARS struct rather than via `self`.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/68352179
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/68352179
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/68352179
Branch: refs/heads/master
Commit: 683521798756a052cd279e55d1d3eac811cb4823
Parents: ec90e66
Author: Marvin Humphrey <ma...@rectangular.com>
Authored: Thu Jun 27 13:59:37 2013 -0700
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Tue Jul 16 15:50:06 2013 -0700
----------------------------------------------------------------------
core/Lucy/Analysis/Analyzer.c | 5 +-
core/Lucy/Analysis/CaseFolder.c | 16 ++--
core/Lucy/Analysis/EasyAnalyzer.c | 44 ++++++----
core/Lucy/Analysis/Inversion.c | 124 ++++++++++++++-------------
core/Lucy/Analysis/Normalizer.c | 39 +++++----
core/Lucy/Analysis/PolyAnalyzer.c | 30 ++++---
core/Lucy/Analysis/RegexTokenizer.c | 15 ++--
core/Lucy/Analysis/SnowballStemmer.c | 43 ++++++----
core/Lucy/Analysis/SnowballStopFilter.c | 24 ++++--
core/Lucy/Analysis/StandardTokenizer.c | 8 +-
core/Lucy/Analysis/Token.c | 54 ++++++------
11 files changed, 227 insertions(+), 175 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/Analyzer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/Analyzer.c b/core/Lucy/Analysis/Analyzer.c
index 2ba60e5..ab23fdc 100644
--- a/core/Lucy/Analysis/Analyzer.c
+++ b/core/Lucy/Analysis/Analyzer.c
@@ -47,7 +47,10 @@ Analyzer_split(Analyzer *self, CharBuf *text) {
Token *token;
while ((token = Inversion_Next(inversion)) != NULL) {
- VA_Push(out, (Obj*)CB_new_from_trusted_utf8(token->text, token->len));
+ TokenIVARS *const token_ivars = Token_IVARS(token);
+ CharBuf *string
+ = CB_new_from_trusted_utf8(token_ivars->text, token_ivars->len);
+ VA_Push(out, (Obj*)string);
}
DECREF(inversion);
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/CaseFolder.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/CaseFolder.c b/core/Lucy/Analysis/CaseFolder.c
index 313d0cd..9330caf 100644
--- a/core/Lucy/Analysis/CaseFolder.c
+++ b/core/Lucy/Analysis/CaseFolder.c
@@ -30,31 +30,33 @@ CaseFolder_new() {
CaseFolder*
CaseFolder_init(CaseFolder *self) {
Analyzer_init((Analyzer*)self);
- self->normalizer = Normalizer_new(NULL, true, false);
+ CaseFolderIVARS *const ivars = CaseFolder_IVARS(self);
+ ivars->normalizer = Normalizer_new(NULL, true, false);
return self;
}
void
CaseFolder_destroy(CaseFolder *self) {
- DECREF(self->normalizer);
+ CaseFolderIVARS *const ivars = CaseFolder_IVARS(self);
+ DECREF(ivars->normalizer);
SUPER_DESTROY(self, CASEFOLDER);
}
Inversion*
CaseFolder_transform(CaseFolder *self, Inversion *inversion) {
- return Normalizer_Transform(self->normalizer, inversion);
+ CaseFolderIVARS *const ivars = CaseFolder_IVARS(self);
+ return Normalizer_Transform(ivars->normalizer, inversion);
}
Inversion*
CaseFolder_transform_text(CaseFolder *self, CharBuf *text) {
- return Normalizer_Transform_Text(self->normalizer, text);
+ CaseFolderIVARS *const ivars = CaseFolder_IVARS(self);
+ return Normalizer_Transform_Text(ivars->normalizer, text);
}
bool
CaseFolder_equals(CaseFolder *self, Obj *other) {
- CaseFolder *const twin = (CaseFolder*)other;
- if (twin == self) { return true; }
- UNUSED_VAR(self);
+ if ((CaseFolder*)other == self) { return true; }
if (!Obj_Is_A(other, CASEFOLDER)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/EasyAnalyzer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/EasyAnalyzer.c b/core/Lucy/Analysis/EasyAnalyzer.c
index 8d1534c..5441fd8 100644
--- a/core/Lucy/Analysis/EasyAnalyzer.c
+++ b/core/Lucy/Analysis/EasyAnalyzer.c
@@ -32,48 +32,53 @@ EasyAnalyzer_new(const CharBuf *language) {
EasyAnalyzer*
EasyAnalyzer_init(EasyAnalyzer *self, const CharBuf *language) {
Analyzer_init((Analyzer*)self);
- self->language = CB_Clone(language);
- self->tokenizer = StandardTokenizer_new();
- self->normalizer = Normalizer_new(NULL, true, false);
- self->stemmer = SnowStemmer_new(language);
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
+ ivars->language = CB_Clone(language);
+ ivars->tokenizer = StandardTokenizer_new();
+ ivars->normalizer = Normalizer_new(NULL, true, false);
+ ivars->stemmer = SnowStemmer_new(language);
return self;
}
void
EasyAnalyzer_destroy(EasyAnalyzer *self) {
- DECREF(self->language);
- DECREF(self->tokenizer);
- DECREF(self->normalizer);
- DECREF(self->stemmer);
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
+ DECREF(ivars->language);
+ DECREF(ivars->tokenizer);
+ DECREF(ivars->normalizer);
+ DECREF(ivars->stemmer);
SUPER_DESTROY(self, EASYANALYZER);
}
Inversion*
EasyAnalyzer_transform(EasyAnalyzer *self, Inversion *inversion) {
- Inversion *inv1 = StandardTokenizer_Transform(self->tokenizer, inversion);
- Inversion *inv2 = Normalizer_Transform(self->normalizer, inv1);
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
+ Inversion *inv1 = StandardTokenizer_Transform(ivars->tokenizer, inversion);
+ Inversion *inv2 = Normalizer_Transform(ivars->normalizer, inv1);
DECREF(inv1);
- inv1 = SnowStemmer_Transform(self->stemmer, inv2);
+ inv1 = SnowStemmer_Transform(ivars->stemmer, inv2);
DECREF(inv2);
return inv1;
}
Inversion*
EasyAnalyzer_transform_text(EasyAnalyzer *self, CharBuf *text) {
- Inversion *inv1 = StandardTokenizer_Transform_Text(self->tokenizer, text);
- Inversion *inv2 = Normalizer_Transform(self->normalizer, inv1);
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
+ Inversion *inv1 = StandardTokenizer_Transform_Text(ivars->tokenizer, text);
+ Inversion *inv2 = Normalizer_Transform(ivars->normalizer, inv1);
DECREF(inv1);
- inv1 = SnowStemmer_Transform(self->stemmer, inv2);
+ inv1 = SnowStemmer_Transform(ivars->stemmer, inv2);
DECREF(inv2);
return inv1;
}
Hash*
EasyAnalyzer_dump(EasyAnalyzer *self) {
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
EasyAnalyzer_Dump_t super_dump
= SUPER_METHOD_PTR(EASYANALYZER, Lucy_EasyAnalyzer_Dump);
Hash *dump = super_dump(self);
- Hash_Store_Str(dump, "language", 8, (Obj*)CB_Clone(self->language));
+ Hash_Store_Str(dump, "language", 8, (Obj*)CB_Clone(ivars->language));
return dump;
}
@@ -90,10 +95,11 @@ EasyAnalyzer_load(EasyAnalyzer *self, Obj *dump) {
bool
EasyAnalyzer_equals(EasyAnalyzer *self, Obj *other) {
- EasyAnalyzer *const twin = (EasyAnalyzer*)other;
- if (twin == self) { return true; }
- if (!Obj_Is_A(other, EASYANALYZER)) { return false; }
- if (!CB_Equals(twin->language, (Obj*)self->language)) { return false; }
+ if ((EasyAnalyzer*)other == self) { return true; }
+ if (!Obj_Is_A(other, EASYANALYZER)) { return false; }
+ EasyAnalyzerIVARS *const ivars = EasyAnalyzer_IVARS(self);
+ EasyAnalyzerIVARS *const ovars = EasyAnalyzer_IVARS((EasyAnalyzer*)other);
+ if (!CB_Equals(ovars->language, (Obj*)ivars->language)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/Inversion.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/Inversion.c b/core/Lucy/Analysis/Inversion.c
index abfede2..9a3fb34 100644
--- a/core/Lucy/Analysis/Inversion.c
+++ b/core/Lucy/Analysis/Inversion.c
@@ -24,20 +24,21 @@
// After inversion, record how many like tokens occur in each group.
static void
-S_count_clusters(Inversion *self);
+S_count_clusters(Inversion *self, InversionIVARS *ivars);
Inversion*
Inversion_new(Token *seed_token) {
Inversion *self = (Inversion*)VTable_Make_Obj(INVERSION);
+ InversionIVARS *const ivars = Inversion_IVARS(self);
// Init.
- self->cap = 16;
- self->size = 0;
- self->tokens = (Token**)CALLOCATE(self->cap, sizeof(Token*));
- self->cur = 0;
- self->inverted = false;
- self->cluster_counts = NULL;
- self->cluster_counts_size = 0;
+ ivars->cap = 16;
+ ivars->size = 0;
+ ivars->tokens = (Token**)CALLOCATE(ivars->cap, sizeof(Token*));
+ ivars->cur = 0;
+ ivars->inverted = false;
+ ivars->cluster_counts = NULL;
+ ivars->cluster_counts_size = 0;
// Process the seed token.
if (seed_token != NULL) {
@@ -49,138 +50,145 @@ Inversion_new(Token *seed_token) {
void
Inversion_destroy(Inversion *self) {
- if (self->tokens) {
- Token **tokens = self->tokens;
- Token **const limit = tokens + self->size;
+ InversionIVARS *const ivars = Inversion_IVARS(self);
+ if (ivars->tokens) {
+ Token **tokens = ivars->tokens;
+ Token **const limit = tokens + ivars->size;
for (; tokens < limit; tokens++) {
DECREF(*tokens);
}
- FREEMEM(self->tokens);
+ FREEMEM(ivars->tokens);
}
- FREEMEM(self->cluster_counts);
+ FREEMEM(ivars->cluster_counts);
SUPER_DESTROY(self, INVERSION);
}
uint32_t
Inversion_get_size(Inversion *self) {
- return self->size;
+ return Inversion_IVARS(self)->size;
}
Token*
Inversion_next(Inversion *self) {
+ InversionIVARS *const ivars = Inversion_IVARS(self);
// Kill the iteration if we're out of tokens.
- if (self->cur == self->size) {
+ if (ivars->cur == ivars->size) {
return NULL;
}
- return self->tokens[self->cur++];
+ return ivars->tokens[ivars->cur++];
}
void
Inversion_reset(Inversion *self) {
- self->cur = 0;
+ Inversion_IVARS(self)->cur = 0;
}
static void
S_grow(Inversion *self, size_t size) {
- if (size > self->cap) {
+ InversionIVARS *const ivars = Inversion_IVARS(self);
+ if (size > ivars->cap) {
uint64_t amount = size * sizeof(Token*);
// Clip rather than wrap.
if (amount > SIZE_MAX || amount < size) { amount = SIZE_MAX; }
- self->tokens = (Token**)REALLOCATE(self->tokens, (size_t)amount);
- self->cap = size;
- memset(self->tokens + self->size, 0,
- (size - self->size) * sizeof(Token*));
+ ivars->tokens = (Token**)REALLOCATE(ivars->tokens, (size_t)amount);
+ ivars->cap = size;
+ memset(ivars->tokens + ivars->size, 0,
+ (size - ivars->size) * sizeof(Token*));
}
}
void
Inversion_append(Inversion *self, Token *token) {
- if (self->inverted) {
+ InversionIVARS *const ivars = Inversion_IVARS(self);
+ if (ivars->inverted) {
THROW(ERR, "Can't append tokens after inversion");
}
- if (self->size >= self->cap) {
- size_t new_capacity = Memory_oversize(self->size + 1, sizeof(Token*));
+ if (ivars->size >= ivars->cap) {
+ size_t new_capacity = Memory_oversize(ivars->size + 1, sizeof(Token*));
S_grow(self, new_capacity);
}
- self->tokens[self->size] = token;
- self->size++;
+ ivars->tokens[ivars->size] = token;
+ ivars->size++;
}
Token**
Inversion_next_cluster(Inversion *self, uint32_t *count) {
- Token **cluster = self->tokens + self->cur;
+ InversionIVARS *const ivars = Inversion_IVARS(self);
+ Token **cluster = ivars->tokens + ivars->cur;
- if (self->cur == self->size) {
+ if (ivars->cur == ivars->size) {
*count = 0;
return NULL;
}
// Don't read past the end of the cluster counts array.
- if (!self->inverted) {
+ if (!ivars->inverted) {
THROW(ERR, "Inversion not yet inverted");
}
- if (self->cur > self->cluster_counts_size) {
+ if (ivars->cur > ivars->cluster_counts_size) {
THROW(ERR, "Tokens were added after inversion");
}
// Place cluster count in passed-in var, advance bookmark.
- *count = self->cluster_counts[self->cur];
- self->cur += *count;
+ *count = ivars->cluster_counts[ivars->cur];
+ ivars->cur += *count;
return cluster;
}
void
Inversion_invert(Inversion *self) {
- Token **tokens = self->tokens;
- Token **limit = tokens + self->size;
+ InversionIVARS *const ivars = Inversion_IVARS(self);
+ Token **tokens = ivars->tokens;
+ Token **limit = tokens + ivars->size;
int32_t token_pos = 0;
// Thwart future attempts to append.
- if (self->inverted) {
+ if (ivars->inverted) {
THROW(ERR, "Inversion has already been inverted");
}
- self->inverted = true;
+ ivars->inverted = true;
// Assign token positions.
for (; tokens < limit; tokens++) {
- Token *const cur_token = *tokens;
- cur_token->pos = token_pos;
- token_pos = (int32_t)((uint32_t)token_pos + (uint32_t)cur_token->pos_inc);
- if (token_pos < cur_token->pos) {
+ TokenIVARS *const cur_token_ivars = Token_IVARS(*tokens);
+ cur_token_ivars->pos = token_pos;
+ token_pos = (int32_t)((uint32_t)token_pos
+ + (uint32_t)cur_token_ivars->pos_inc);
+ if (token_pos < cur_token_ivars->pos) {
THROW(ERR, "Token positions out of order: %i32 %i32",
- cur_token->pos, token_pos);
+ cur_token_ivars->pos, token_pos);
}
}
// Sort the tokens lexically, and hand off to cluster counting routine.
- Sort_quicksort(self->tokens, self->size, sizeof(Token*), Token_compare,
+ Sort_quicksort(ivars->tokens, ivars->size, sizeof(Token*), Token_compare,
NULL);
- S_count_clusters(self);
+ S_count_clusters(self, ivars);
}
static void
-S_count_clusters(Inversion *self) {
- Token **tokens = self->tokens;
+S_count_clusters(Inversion *self, InversionIVARS *ivars) {
+ UNUSED_VAR(self);
+ Token **tokens = ivars->tokens;
uint32_t *counts
- = (uint32_t*)CALLOCATE(self->size + 1, sizeof(uint32_t));
+ = (uint32_t*)CALLOCATE(ivars->size + 1, sizeof(uint32_t));
// Save the cluster counts.
- self->cluster_counts_size = self->size;
- self->cluster_counts = counts;
+ ivars->cluster_counts_size = ivars->size;
+ ivars->cluster_counts = counts;
- for (uint32_t i = 0; i < self->size;) {
- Token *const base_token = tokens[i];
- char *const base_text = base_token->text;
- const size_t base_len = base_token->len;
+ for (uint32_t i = 0; i < ivars->size;) {
+ TokenIVARS *const base_token_ivars = Token_IVARS(tokens[i]);
+ char *const base_text = base_token_ivars->text;
+ const size_t base_len = base_token_ivars->len;
uint32_t j = i + 1;
// Iterate through tokens until text doesn't match.
- while (j < self->size) {
- Token *const candidate = tokens[j];
-
- if ((candidate->len == base_len)
- && (memcmp(candidate->text, base_text, base_len) == 0)
+ while (j < ivars->size) {
+ TokenIVARS *const candidate_ivars = Token_IVARS(tokens[j]);
+ if ((candidate_ivars->len == base_len)
+ && (memcmp(candidate_ivars->text, base_text, base_len) == 0)
) {
j++;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/Normalizer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/Normalizer.c b/core/Lucy/Analysis/Normalizer.c
index 3102882..a120db3 100644
--- a/core/Lucy/Analysis/Normalizer.c
+++ b/core/Lucy/Analysis/Normalizer.c
@@ -37,6 +37,7 @@ Normalizer*
Normalizer_init(Normalizer *self, const CharBuf *form, bool case_fold,
bool strip_accents) {
int options = UTF8PROC_STABLE;
+ NormalizerIVARS *const ivars = Normalizer_IVARS(self);
if (form == NULL
|| CB_Equals_Str(form, "NFKC", 4) || CB_Equals_Str(form, "nfkc", 4)
@@ -59,7 +60,7 @@ Normalizer_init(Normalizer *self, const CharBuf *form, bool case_fold,
if (case_fold) { options |= UTF8PROC_CASEFOLD; }
if (strip_accents) { options |= UTF8PROC_STRIPMARK; }
- self->options = options;
+ ivars->options = options;
return self;
}
@@ -72,10 +73,14 @@ Normalizer_transform(Normalizer *self, Inversion *inversion) {
int32_t *buffer = static_buffer;
ssize_t bufsize = INITIAL_BUFSIZE;
Token *token;
+ NormalizerIVARS *const ivars = Normalizer_IVARS(self);
while (NULL != (token = Inversion_Next(inversion))) {
- ssize_t len = utf8proc_decompose((uint8_t*)token->text, token->len,
- buffer, bufsize, self->options);
+ TokenIVARS *const token_ivars = Token_IVARS(token);
+ ssize_t len
+ = utf8proc_decompose((uint8_t*)token_ivars->text,
+ token_ivars->len, buffer, bufsize,
+ ivars->options);
if (len > bufsize) {
// buffer too small, (re)allocate
@@ -85,22 +90,23 @@ Normalizer_transform(Normalizer *self, Inversion *inversion) {
// allocate additional INITIAL_BUFSIZE items
bufsize = len + INITIAL_BUFSIZE;
buffer = (int32_t*)MALLOCATE((bufsize + 1) * sizeof(int32_t));
- len = utf8proc_decompose((uint8_t*)token->text, token->len,
- buffer, bufsize, self->options);
+ len = utf8proc_decompose((uint8_t*)token_ivars->text,
+ token_ivars->len, buffer, bufsize,
+ ivars->options);
}
if (len < 0) {
continue;
}
- len = utf8proc_reencode(buffer, len, self->options);
+ len = utf8proc_reencode(buffer, len, ivars->options);
if (len >= 0) {
- if (len > (ssize_t)token->len) {
- FREEMEM(token->text);
- token->text = (char*)MALLOCATE(len + 1);
+ if (len > (ssize_t)token_ivars->len) {
+ FREEMEM(token_ivars->text);
+ token_ivars->text = (char*)MALLOCATE(len + 1);
}
- memcpy(token->text, buffer, len + 1);
- token->len = len;
+ memcpy(token_ivars->text, buffer, len + 1);
+ token_ivars->len = len;
}
}
@@ -117,7 +123,7 @@ Normalizer_dump(Normalizer *self) {
Normalizer_Dump_t super_dump
= SUPER_METHOD_PTR(NORMALIZER, Lucy_Normalizer_Dump);
Hash *dump = super_dump(self);
- int options = self->options;
+ int options = Normalizer_IVARS(self)->options;
CharBuf *form = options & UTF8PROC_COMPOSE ?
options & UTF8PROC_COMPAT ?
@@ -157,10 +163,11 @@ Normalizer_load(Normalizer *self, Obj *dump) {
bool
Normalizer_equals(Normalizer *self, Obj *other) {
- Normalizer *const twin = (Normalizer*)other;
- if (twin == self) { return true; }
- if (!Obj_Is_A(other, NORMALIZER)) { return false; }
- if (twin->options != self->options) { return false; }
+ if ((Normalizer*)other == self) { return true; }
+ if (!Obj_Is_A(other, NORMALIZER)) { return false; }
+ NormalizerIVARS *const ivars = Normalizer_IVARS(self);
+ NormalizerIVARS *const ovars = Normalizer_IVARS((Normalizer*)other);
+ if (ovars->options != ivars->options) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/PolyAnalyzer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/PolyAnalyzer.c b/core/Lucy/Analysis/PolyAnalyzer.c
index dd635c1..f755e4e 100644
--- a/core/Lucy/Analysis/PolyAnalyzer.c
+++ b/core/Lucy/Analysis/PolyAnalyzer.c
@@ -34,17 +34,19 @@ PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language,
VArray *analyzers) {
Analyzer_init((Analyzer*)self);
+ PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
+
if (analyzers) {
for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
CERTIFY(VA_Fetch(analyzers, i), ANALYZER);
}
- self->analyzers = (VArray*)INCREF(analyzers);
+ ivars->analyzers = (VArray*)INCREF(analyzers);
}
else if (language) {
- self->analyzers = VA_new(3);
- VA_Push(self->analyzers, (Obj*)CaseFolder_new());
- VA_Push(self->analyzers, (Obj*)RegexTokenizer_new(NULL));
- VA_Push(self->analyzers, (Obj*)SnowStemmer_new(language));
+ ivars->analyzers = VA_new(3);
+ VA_Push(ivars->analyzers, (Obj*)CaseFolder_new());
+ VA_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL));
+ VA_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language));
}
else {
THROW(ERR, "Must specify either 'language' or 'analyzers'");
@@ -55,18 +57,19 @@ PolyAnalyzer_init(PolyAnalyzer *self, const CharBuf *language,
void
PolyAnalyzer_destroy(PolyAnalyzer *self) {
- DECREF(self->analyzers);
+ PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
+ DECREF(ivars->analyzers);
SUPER_DESTROY(self, POLYANALYZER);
}
VArray*
PolyAnalyzer_get_analyzers(PolyAnalyzer *self) {
- return self->analyzers;
+ return PolyAnalyzer_IVARS(self)->analyzers;
}
Inversion*
PolyAnalyzer_transform(PolyAnalyzer *self, Inversion *inversion) {
- VArray *const analyzers = self->analyzers;
+ VArray *const analyzers = PolyAnalyzer_IVARS(self)->analyzers;
(void)INCREF(inversion);
// Iterate through each of the analyzers in order.
@@ -82,7 +85,7 @@ PolyAnalyzer_transform(PolyAnalyzer *self, Inversion *inversion) {
Inversion*
PolyAnalyzer_transform_text(PolyAnalyzer *self, CharBuf *text) {
- VArray *const analyzers = self->analyzers;
+ VArray *const analyzers = PolyAnalyzer_IVARS(self)->analyzers;
const uint32_t num_analyzers = VA_Get_Size(analyzers);
Inversion *retval;
@@ -109,10 +112,11 @@ PolyAnalyzer_transform_text(PolyAnalyzer *self, CharBuf *text) {
bool
PolyAnalyzer_equals(PolyAnalyzer *self, Obj *other) {
- PolyAnalyzer *const twin = (PolyAnalyzer*)other;
- if (twin == self) { return true; }
- if (!Obj_Is_A(other, POLYANALYZER)) { return false; }
- if (!VA_Equals(twin->analyzers, (Obj*)self->analyzers)) { return false; }
+ if ((PolyAnalyzer*)other == self) { return true; }
+ if (!Obj_Is_A(other, POLYANALYZER)) { return false; }
+ PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
+ PolyAnalyzerIVARS *const ovars = PolyAnalyzer_IVARS((PolyAnalyzer*)other);
+ if (!VA_Equals(ovars->analyzers, (Obj*)ivars->analyzers)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/RegexTokenizer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/RegexTokenizer.c b/core/Lucy/Analysis/RegexTokenizer.c
index 628571f..b44095e 100644
--- a/core/Lucy/Analysis/RegexTokenizer.c
+++ b/core/Lucy/Analysis/RegexTokenizer.c
@@ -34,7 +34,8 @@ RegexTokenizer_transform(RegexTokenizer *self, Inversion *inversion) {
Token *token;
while (NULL != (token = Inversion_Next(inversion))) {
- RegexTokenizer_Tokenize_Str(self, token->text, token->len,
+ TokenIVARS *const token_ivars = Token_IVARS(token);
+ RegexTokenizer_Tokenize_Str(self, token_ivars->text, token_ivars->len,
new_inversion);
}
@@ -51,10 +52,11 @@ RegexTokenizer_transform_text(RegexTokenizer *self, CharBuf *text) {
Obj*
RegexTokenizer_dump(RegexTokenizer *self) {
+ RegexTokenizerIVARS *const ivars = RegexTokenizer_IVARS(self);
RegexTokenizer_Dump_t super_dump
= SUPER_METHOD_PTR(REGEXTOKENIZER, Lucy_RegexTokenizer_Dump);
Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
- Hash_Store_Str(dump, "pattern", 7, CB_Dump(self->pattern));
+ Hash_Store_Str(dump, "pattern", 7, CB_Dump(ivars->pattern));
return (Obj*)dump;
}
@@ -71,10 +73,11 @@ RegexTokenizer_load(RegexTokenizer *self, Obj *dump) {
bool
RegexTokenizer_equals(RegexTokenizer *self, Obj *other) {
- RegexTokenizer *const twin = (RegexTokenizer*)other;
- if (twin == self) { return true; }
- if (!Obj_Is_A(other, REGEXTOKENIZER)) { return false; }
- if (!CB_Equals(twin->pattern, (Obj*)self->pattern)) { return false; }
+ if ((RegexTokenizer*)other == self) { return true; }
+ if (!Obj_Is_A(other, REGEXTOKENIZER)) { return false; }
+ RegexTokenizerIVARS *ivars = RegexTokenizer_IVARS(self);
+ RegexTokenizerIVARS *ovars = RegexTokenizer_IVARS((RegexTokenizer*)other);
+ if (!CB_Equals(ivars->pattern, (Obj*)ovars->pattern)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/SnowballStemmer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/SnowballStemmer.c b/core/Lucy/Analysis/SnowballStemmer.c
index 67be914..2229389 100644
--- a/core/Lucy/Analysis/SnowballStemmer.c
+++ b/core/Lucy/Analysis/SnowballStemmer.c
@@ -35,14 +35,15 @@ SnowballStemmer*
SnowStemmer_init(SnowballStemmer *self, const CharBuf *language) {
char lang_buf[3];
Analyzer_init((Analyzer*)self);
- self->language = CB_Clone(language);
+ SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
+ ivars->language = CB_Clone(language);
// Get a Snowball stemmer. Be case-insensitive.
lang_buf[0] = tolower(CB_Code_Point_At(language, 0));
lang_buf[1] = tolower(CB_Code_Point_At(language, 1));
lang_buf[2] = '\0';
- self->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
- if (!self->snowstemmer) {
+ ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
+ if (!ivars->snowstemmer) {
THROW(ERR, "Can't find a Snowball stemmer for %o", language);
}
@@ -51,29 +52,33 @@ SnowStemmer_init(SnowballStemmer *self, const CharBuf *language) {
void
SnowStemmer_destroy(SnowballStemmer *self) {
- if (self->snowstemmer) {
- sb_stemmer_delete((struct sb_stemmer*)self->snowstemmer);
+ SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
+ if (ivars->snowstemmer) {
+ sb_stemmer_delete((struct sb_stemmer*)ivars->snowstemmer);
}
- DECREF(self->language);
+ DECREF(ivars->language);
SUPER_DESTROY(self, SNOWBALLSTEMMER);
}
Inversion*
SnowStemmer_transform(SnowballStemmer *self, Inversion *inversion) {
Token *token;
+ SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
struct sb_stemmer *const snowstemmer
- = (struct sb_stemmer*)self->snowstemmer;
+ = (struct sb_stemmer*)ivars->snowstemmer;
while (NULL != (token = Inversion_Next(inversion))) {
+ TokenIVARS *const token_ivars = Token_IVARS(token);
const sb_symbol *stemmed_text
- = sb_stemmer_stem(snowstemmer, (sb_symbol*)token->text, token->len);
+ = sb_stemmer_stem(snowstemmer, (sb_symbol*)token_ivars->text,
+ token_ivars->len);
size_t len = sb_stemmer_length(snowstemmer);
- if (len > token->len) {
- FREEMEM(token->text);
- token->text = (char*)MALLOCATE(len + 1);
+ if (len > token_ivars->len) {
+ FREEMEM(token_ivars->text);
+ token_ivars->text = (char*)MALLOCATE(len + 1);
}
- memcpy(token->text, stemmed_text, len + 1);
- token->len = len;
+ memcpy(token_ivars->text, stemmed_text, len + 1);
+ token_ivars->len = len;
}
Inversion_Reset(inversion);
return (Inversion*)INCREF(inversion);
@@ -81,10 +86,11 @@ SnowStemmer_transform(SnowballStemmer *self, Inversion *inversion) {
Hash*
SnowStemmer_dump(SnowballStemmer *self) {
+ SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
SnowStemmer_Dump_t super_dump
= SUPER_METHOD_PTR(SNOWBALLSTEMMER, Lucy_SnowStemmer_Dump);
Hash *dump = super_dump(self);
- Hash_Store_Str(dump, "language", 8, (Obj*)CB_Clone(self->language));
+ Hash_Store_Str(dump, "language", 8, (Obj*)CB_Clone(ivars->language));
return dump;
}
@@ -101,10 +107,11 @@ SnowStemmer_load(SnowballStemmer *self, Obj *dump) {
bool
SnowStemmer_equals(SnowballStemmer *self, Obj *other) {
- SnowballStemmer *const twin = (SnowballStemmer*)other;
- if (twin == self) { return true; }
- if (!Obj_Is_A(other, SNOWBALLSTEMMER)) { return false; }
- if (!CB_Equals(twin->language, (Obj*)self->language)) { return false; }
+ if ((SnowballStemmer*)other == self) { return true; }
+ if (!Obj_Is_A(other, SNOWBALLSTEMMER)) { return false; }
+ SnowballStemmerIVARS *ivars = SnowStemmer_IVARS(self);
+ SnowballStemmerIVARS *ovars = SnowStemmer_IVARS((SnowballStemmer*)other);
+ if (!CB_Equals(ovars->language, (Obj*)ivars->language)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/SnowballStopFilter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/SnowballStopFilter.c b/core/Lucy/Analysis/SnowballStopFilter.c
index 50c995c..e0d1f63 100644
--- a/core/Lucy/Analysis/SnowballStopFilter.c
+++ b/core/Lucy/Analysis/SnowballStopFilter.c
@@ -33,14 +33,15 @@ SnowballStopFilter*
SnowStop_init(SnowballStopFilter *self, const CharBuf *language,
Hash *stoplist) {
Analyzer_init((Analyzer*)self);
+ SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
if (stoplist) {
if (language) { THROW(ERR, "Can't have both stoplist and language"); }
- self->stoplist = (Hash*)INCREF(stoplist);
+ ivars->stoplist = (Hash*)INCREF(stoplist);
}
else if (language) {
- self->stoplist = SnowStop_gen_stoplist(language);
- if (!self->stoplist) {
+ ivars->stoplist = SnowStop_gen_stoplist(language);
+ if (!ivars->stoplist) {
THROW(ERR, "Can't get a stoplist for '%o'", language);
}
}
@@ -53,7 +54,8 @@ SnowStop_init(SnowballStopFilter *self, const CharBuf *language,
void
SnowStop_destroy(SnowballStopFilter *self) {
- DECREF(self->stoplist);
+ SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
+ DECREF(ivars->stoplist);
SUPER_DESTROY(self, SNOWBALLSTOPFILTER);
}
@@ -61,10 +63,12 @@ Inversion*
SnowStop_transform(SnowballStopFilter *self, Inversion *inversion) {
Token *token;
Inversion *new_inversion = Inversion_new(NULL);
- Hash *const stoplist = self->stoplist;
+ SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
+ Hash *const stoplist = ivars->stoplist;
while (NULL != (token = Inversion_Next(inversion))) {
- if (!Hash_Fetch_Str(stoplist, token->text, token->len)) {
+ TokenIVARS *const token_ivars = Token_IVARS(token);
+ if (!Hash_Fetch_Str(stoplist, token_ivars->text, token_ivars->len)) {
Inversion_Append(new_inversion, (Token*)INCREF(token));
}
}
@@ -74,10 +78,12 @@ SnowStop_transform(SnowballStopFilter *self, Inversion *inversion) {
bool
SnowStop_equals(SnowballStopFilter *self, Obj *other) {
- SnowballStopFilter *const twin = (SnowballStopFilter*)other;
- if (twin == self) { return true; }
+ if ((SnowballStopFilter*)other == self) { return true; }
if (!Obj_Is_A(other, SNOWBALLSTOPFILTER)) { return false; }
- if (!Hash_Equals(twin->stoplist, (Obj*)self->stoplist)) {
+ SnowballStopFilterIVARS *const ivars = SnowStop_IVARS(self);
+ SnowballStopFilterIVARS *const ovars
+ = SnowStop_IVARS((SnowballStopFilter*)other);
+ if (!Hash_Equals(ivars->stoplist, (Obj*)ovars->stoplist)) {
return false;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/StandardTokenizer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/StandardTokenizer.c b/core/Lucy/Analysis/StandardTokenizer.c
index 1ccd2d0..61a19a1 100644
--- a/core/Lucy/Analysis/StandardTokenizer.c
+++ b/core/Lucy/Analysis/StandardTokenizer.c
@@ -86,8 +86,9 @@ StandardTokenizer_transform(StandardTokenizer *self, Inversion *inversion) {
Token *token;
while (NULL != (token = Inversion_Next(inversion))) {
- StandardTokenizer_Tokenize_Str(self, token->text, token->len,
- new_inversion);
+ TokenIVARS *const token_ivars = Token_IVARS(token);
+ StandardTokenizer_Tokenize_Str(self, token_ivars->text,
+ token_ivars->len, new_inversion);
}
return new_inversion;
@@ -293,8 +294,7 @@ S_skip_extend_format(const char *text, size_t len, lucy_StringIter *iter) {
bool
StandardTokenizer_equals(StandardTokenizer *self, Obj *other) {
- StandardTokenizer *const twin = (StandardTokenizer*)other;
- if (twin == self) { return true; }
+ if ((StandardTokenizer*)other == self) { return true; }
if (!Obj_Is_A(other, STANDARDTOKENIZER)) { return false; }
return true;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/68352179/core/Lucy/Analysis/Token.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Analysis/Token.c b/core/Lucy/Analysis/Token.c
index 5b53043..f5fec54 100644
--- a/core/Lucy/Analysis/Token.c
+++ b/core/Lucy/Analysis/Token.c
@@ -30,34 +30,39 @@ Token_new(const char* text, size_t len, uint32_t start_offset,
Token*
Token_init(Token *self, const char* text, size_t len, uint32_t start_offset,
uint32_t end_offset, float boost, int32_t pos_inc) {
+ TokenIVARS *const ivars = Token_IVARS(self);
+
// Allocate and assign.
- self->text = (char*)MALLOCATE(len + 1);
- self->text[len] = '\0';
- memcpy(self->text, text, len);
+ ivars->text = (char*)MALLOCATE(len + 1);
+ ivars->text[len] = '\0';
+ memcpy(ivars->text, text, len);
// Assign.
- self->len = len;
- self->start_offset = start_offset;
- self->end_offset = end_offset;
- self->boost = boost;
- self->pos_inc = pos_inc;
+ ivars->len = len;
+ ivars->start_offset = start_offset;
+ ivars->end_offset = end_offset;
+ ivars->boost = boost;
+ ivars->pos_inc = pos_inc;
// Init.
- self->pos = -1;
+ ivars->pos = -1;
return self;
}
void
Token_destroy(Token *self) {
- FREEMEM(self->text);
+ TokenIVARS *const ivars = Token_IVARS(self);
+ FREEMEM(ivars->text);
SUPER_DESTROY(self, TOKEN);
}
int
Token_compare(void *context, const void *va, const void *vb) {
- Token *const a = *((Token**)va);
- Token *const b = *((Token**)vb);
+ Token *const token_a = *((Token**)va);
+ Token *const token_b = *((Token**)vb);
+ TokenIVARS *const a = Token_IVARS(token_a);
+ TokenIVARS *const b = Token_IVARS(token_b);
size_t min_len = a->len < b->len ? a->len : b->len;
int comparison = memcmp(a->text, b->text, min_len);
UNUSED_VAR(context);
@@ -76,43 +81,44 @@ Token_compare(void *context, const void *va, const void *vb) {
uint32_t
Token_get_start_offset(Token *self) {
- return self->start_offset;
+ return Token_IVARS(self)->start_offset;
}
uint32_t
Token_get_end_offset(Token *self) {
- return self->end_offset;
+ return Token_IVARS(self)->end_offset;
}
float
Token_get_boost(Token *self) {
- return self->boost;
+ return Token_IVARS(self)->boost;
}
int32_t
Token_get_pos_inc(Token *self) {
- return self->pos_inc;
+ return Token_IVARS(self)->pos_inc;
}
char*
Token_get_text(Token *self) {
- return self->text;
+ return Token_IVARS(self)->text;
}
size_t
Token_get_len(Token *self) {
- return self->len;
+ return Token_IVARS(self)->len;
}
void
Token_set_text(Token *self, char *text, size_t len) {
- if (len > self->len) {
- FREEMEM(self->text);
- self->text = (char*)MALLOCATE(len + 1);
+ TokenIVARS *const ivars = Token_IVARS(self);
+ if (len > ivars->len) {
+ FREEMEM(ivars->text);
+ ivars->text = (char*)MALLOCATE(len + 1);
}
- memcpy(self->text, text, len);
- self->text[len] = '\0';
- self->len = len;
+ memcpy(ivars->text, text, len);
+ ivars->text[len] = '\0';
+ ivars->len = len;
}