You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2012/06/30 07:42:13 UTC

[lucy-commits] svn commit: r1355639 - in /lucy/trunk/core/Lucy/Search: QueryParser.c QueryParser.cfh

Author: marvin
Date: Sat Jun 30 05:42:12 2012
New Revision: 1355639

URL: http://svn.apache.org/viewvc?rev=1355639&view=rev
Log:
Remove superseded code from QueryParser.

Modified:
    lucy/trunk/core/Lucy/Search/QueryParser.c
    lucy/trunk/core/Lucy/Search/QueryParser.cfh

Modified: lucy/trunk/core/Lucy/Search/QueryParser.c
URL: http://svn.apache.org/viewvc/lucy/trunk/core/Lucy/Search/QueryParser.c?rev=1355639&r1=1355638&r2=1355639&view=diff
==============================================================================
--- lucy/trunk/core/Lucy/Search/QueryParser.c (original)
+++ lucy/trunk/core/Lucy/Search/QueryParser.c Sat Jun 30 05:42:12 2012
@@ -56,12 +56,6 @@ static Query*
 S_parse_subquery(QueryParser *self, VArray *elems, CharBuf *default_field,
                  bool_t enclosed);
 
-static CharBuf*
-S_balance_parens_in_string(CharBuf *qstring);
-
-static VArray*
-S_parse_flat_string(QueryParser *self, CharBuf *query_string);
-
 // Drop unmatched right parens and add matching right parens at end to
 // close paren groups implicitly.
 static void
@@ -101,59 +95,6 @@ S_compose_or_queries(QueryParser *self, 
 static Query*
 S_compose_subquery(QueryParser *self, VArray *elems, bool_t enclosed);
 
-// A function that attempts to match a substring and if successful, stores the
-// begin and end of the match in the supplied pointers and returns true.
-typedef bool_t
-(*Lucy_QueryParser_Match_t)(CharBuf *input, char **begin_match,
-                            char **end_match);
-#define match_t Lucy_QueryParser_Match_t
-
-// Find a quote/end-of-string -delimited phrase.
-static bool_t
-S_match_phrase(CharBuf *input, char**begin_match, char **end_match);
-
-// Find a non-nested parethetical group.
-static bool_t
-S_match_bool_group(CharBuf *input, char**begin_match, char **end_match);
-
-// Replace whatever match() matches with a label, storing the matched text as
-// a CharBuf in the supplied storage Hash.
-static CharBuf*
-S_extract_something(QueryParser *self, const CharBuf *query_string,
-                    CharBuf *label, Hash *extractions, match_t match);
-
-// Symbolically replace phrases in a query string.
-static CharBuf*
-S_extract_phrases(QueryParser *self, const CharBuf *query_string,
-                  Hash *extractions);
-
-// Symbolically replace parenthetical groupings in a query string.
-static CharBuf*
-S_extract_paren_groups(QueryParser *self, const CharBuf *query_string,
-                       Hash *extractions);
-
-// Consume text and possibly following whitespace, if there's a match and the
-// matching is bordered on the right by either whitespace or the end of the
-// string.
-static bool_t
-S_consume_ascii_token(ViewCharBuf *qstring, const char *ptr, size_t size);
-
-// Consume the supplied text if there's a match.
-static bool_t
-S_consume_ascii(ViewCharBuf *qstring, const char *ptr, size_t size);
-
-// Consume what looks like a field name followed by a colon.
-static bool_t
-S_consume_field(ViewCharBuf *qstring, ViewCharBuf *target);
-
-// Consume non-whitespace from qstring and store the match in target.
-static bool_t
-S_consume_non_whitespace_non_paren(ViewCharBuf *qstring, ViewCharBuf *target);
-
-#define RAND_STRING_LEN      16
-#define PHRASE_LABEL_LEN     (RAND_STRING_LEN + sizeof("_phrase") - 1)
-#define BOOL_GROUP_LABEL_LEN (RAND_STRING_LEN + sizeof("_bool_group") - 1)
-
 QueryParser*
 QParser_new(Schema *schema, Analyzer *analyzer, const CharBuf *default_boolop,
             VArray *fields) {
@@ -166,7 +107,6 @@ QParser_init(QueryParser *self, Schema *
              const CharBuf *default_boolop, VArray *fields) {
     // Init.
     self->heed_colons = false;
-    self->label_inc   = 0;
     self->lexer       = QueryLexer_new();
 
     // Assign.
@@ -209,17 +149,6 @@ QParser_init(QueryParser *self, Schema *
         THROW(ERR, "Invalid value for default_boolop: %o", self->default_boolop);
     }
 
-    // Create string labels that presumably won't appear in a search.
-    self->phrase_label     = CB_new_from_trusted_utf8("_phrase", 7);
-    self->bool_group_label = CB_new_from_trusted_utf8("_bool_group", 11);
-    CB_Grow(self->phrase_label, PHRASE_LABEL_LEN + 5);
-    CB_Grow(self->bool_group_label, BOOL_GROUP_LABEL_LEN + 5);
-    for (uint32_t i = 0; i < RAND_STRING_LEN; i++) {
-        char rand_char = (rand() % 26) + 'A';
-        CB_Cat_Trusted_Str(self->phrase_label, &rand_char, 1);
-        CB_Cat_Trusted_Str(self->bool_group_label, &rand_char, 1);
-    }
-
     return self;
 }
 
@@ -230,8 +159,6 @@ QParser_destroy(QueryParser *self) {
     DECREF(self->default_boolop);
     DECREF(self->fields);
     DECREF(self->lexer);
-    DECREF(self->phrase_label);
-    DECREF(self->bool_group_label);
     SUPER_DESTROY(self, QUERYPARSER);
 }
 
@@ -291,72 +218,6 @@ QParser_tree(QueryParser *self, const Ch
     return query;
 }
 
-static VArray*
-S_parse_flat_string(QueryParser *self, CharBuf *query_string) {
-    VArray      *parse_tree       = VA_new(0);
-    CharBuf     *qstring_copy     = CB_Clone(query_string);
-    ViewCharBuf *qstring          = (ViewCharBuf*)ZCB_WRAP(qstring_copy);
-
-    ViewCB_Trim(qstring);
-
-    ViewCharBuf *temp = (ViewCharBuf*)ZCB_BLANK();
-    while (ViewCB_Get_Size(qstring)) {
-        ParserElem *token = NULL;
-
-        if (ViewCB_Trim_Top(qstring)) {
-            // Fast-forward past whitespace.
-            continue;
-        }
-        else if (S_consume_ascii(qstring, "(", 1)) {
-            token = ParserElem_new(TOKEN_OPEN_PAREN, NULL);
-        }
-        else if (S_consume_ascii(qstring, ")", 1)) {
-            token = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
-        }
-        else if (S_consume_ascii(qstring, "+", 1)) {
-            if (ViewCB_Trim_Top(qstring)) {
-                token = ParserElem_new(TOKEN_STRING, (Obj*)CB_newf("+"));
-            }
-            else {
-                token = ParserElem_new(TOKEN_PLUS, NULL);
-            }
-        }
-        else if (S_consume_ascii(qstring, "-", 1)) {
-            if (ViewCB_Trim_Top(qstring)) {
-                token = ParserElem_new(TOKEN_STRING, (Obj*)CB_newf("-"));
-            }
-            else {
-                token = ParserElem_new(TOKEN_MINUS, NULL);
-            }
-        }
-        else if (S_consume_ascii_token(qstring, "AND", 3)) {
-            token = ParserElem_new(TOKEN_AND, NULL);
-        }
-        else if (S_consume_ascii_token(qstring, "OR", 2)) {
-            token = ParserElem_new(TOKEN_OR, NULL);
-        }
-        else if (S_consume_ascii_token(qstring, "NOT", 3)) {
-            token = ParserElem_new(TOKEN_NOT, NULL);
-        }
-        else if (self->heed_colons && S_consume_field(qstring, temp)) {
-            token = ParserElem_new(TOKEN_FIELD, (Obj*)ViewCB_Clone(temp));
-        }
-        else if (S_consume_non_whitespace_non_paren(qstring, temp)) {
-            token = ParserElem_new(TOKEN_STRING, (Obj*)ViewCB_Clone(temp));
-        }
-        else {
-            THROW(ERR, "Failed to parse '%o'", qstring);
-        }
-
-        VA_Push(parse_tree, (Obj*)token);
-    }
-
-    // Clean up.
-    DECREF(qstring_copy);
-
-    return parse_tree;
-}
-
 static void
 S_parse_subqueries(QueryParser *self, VArray *elems) {
     while (1) {
@@ -485,37 +346,6 @@ S_balance_parens(QueryParser *self, VArr
     }
 }
 
-static CharBuf*
-S_balance_parens_in_string(CharBuf *qstring) {
-    CharBuf *modified = CB_new_from_trusted_utf8("", 0);
-    ZombieCharBuf *source = ZCB_WRAP(qstring);
-
-    // Count paren balance, eliminate unbalanced right parens.
-    int64_t paren_depth = 0;
-    uint32_t code_point;
-    while (0 != (code_point = ZCB_Nip_One(source))) {
-        if (code_point == '(') {
-            paren_depth++;
-        }
-        else if (code_point == ')') {
-            if (paren_depth > 0) {
-                paren_depth--;
-            }
-            else {
-                continue;
-            }
-        }
-        CB_Cat_Char(modified, code_point);
-    }
-
-    // Insert implicit parens.
-    while (paren_depth--) {
-        CB_Cat_Char(modified, ')');
-    }
-
-    return modified;
-}
-
 static void
 S_compose_inner_queries(QueryParser *self, VArray *elems,
                         CharBuf *default_field) {
@@ -887,100 +717,6 @@ QParser_prune(QueryParser *self, Query *
     return (Query*)INCREF(query);
 }
 
-static bool_t
-S_consume_ascii(ViewCharBuf *qstring, const char *ptr, size_t len) {
-    if (ViewCB_Starts_With_Str(qstring, ptr, len)) {
-        ViewCB_Nip(qstring, len);
-        return true;
-    }
-    return false;
-}
-
-static bool_t
-S_consume_ascii_token(ViewCharBuf *qstring, const char *ptr, size_t len) {
-    if (ViewCB_Starts_With_Str(qstring, ptr, len)) {
-        if (len == ViewCB_Get_Size(qstring)
-            || StrHelp_is_whitespace(ViewCB_Code_Point_At(qstring, len))
-           ) {
-            ViewCB_Nip(qstring, len);
-            ViewCB_Trim_Top(qstring);
-            return true;
-        }
-    }
-    return false;
-}
-
-static bool_t
-S_consume_field(ViewCharBuf *qstring, ViewCharBuf *target) {
-    size_t tick = 0;
-
-    // Field names constructs must start with a letter or underscore.
-    uint32_t code_point = ViewCB_Code_Point_At(qstring, tick);
-    if (isalpha(code_point) || code_point == '_') {
-        tick++;
-    }
-    else {
-        return false;
-    }
-
-    // Only alphanumerics and underscores are allowed  in field names.
-    while (1) {
-        code_point = ViewCB_Code_Point_At(qstring, tick);
-        if (isalnum(code_point) || code_point == '_') {
-            tick++;
-        }
-        else if (code_point == ':') {
-            tick++;
-            break;
-        }
-        else {
-            return false;
-        }
-    }
-
-    // Field name constructs must be followed by something sensible.
-    uint32_t lookahead = ViewCB_Code_Point_At(qstring, tick);
-    if (!(isalnum(lookahead)
-          || lookahead == '_'
-          || lookahead > 127
-          || lookahead == '"'
-          || lookahead == '('
-         )
-       ) {
-        return false;
-    }
-
-    // Consume string data.
-    ViewCB_Assign(target, (CharBuf*)qstring);
-    ViewCB_Set_Size(target, tick - 1);
-    ViewCB_Nip(qstring, tick);
-    return true;
-}
-
-static bool_t
-S_consume_non_whitespace_non_paren(ViewCharBuf *qstring, ViewCharBuf *target) {
-    uint32_t code_point = ViewCB_Code_Point_At(qstring, 0);
-    bool_t   success    = false;
-    ViewCB_Assign(target, (CharBuf*)qstring);
-    while (code_point
-        && !StrHelp_is_whitespace(code_point)
-        && code_point != '('
-        && code_point != ')'
-       ) {
-        ViewCB_Nip_One(qstring);
-        code_point = ViewCB_Code_Point_At(qstring, 0);
-        success = true;
-    }
-    if (!success) {
-        return false;
-    }
-    else {
-        uint32_t new_size = ViewCB_Get_Size(target) - ViewCB_Get_Size(qstring);
-        ViewCB_Set_Size(target, new_size);
-        return true;
-    }
-}
-
 Query*
 QParser_expand(QueryParser *self, Query *query) {
     Query *retval = NULL;
@@ -1203,113 +939,6 @@ QParser_expand_leaf(QueryParser *self, Q
     return retval;
 }
 
-static CharBuf*
-S_extract_something(QueryParser *self, const CharBuf *query_string,
-                    CharBuf *label, Hash *extractions, match_t match) {
-    CharBuf *retval          = CB_Clone(query_string);
-    size_t   qstring_size    = CB_Get_Size(query_string);
-    size_t   orig_label_size = CB_Get_Size(label);
-    char    *begin_match;
-    char    *end_match;
-
-    while (match(retval, &begin_match, &end_match)) {
-        size_t   len          = end_match - begin_match;
-        size_t   retval_size  = CB_Get_Size(retval);
-        char    *retval_buf   = (char*)CB_Get_Ptr8(retval);
-        char    *retval_end   = retval_buf + retval_size;
-        size_t   before_match = begin_match - retval_buf;
-        size_t   after_match  = retval_end - end_match;
-        CharBuf *new_retval   = CB_new(qstring_size);
-
-        // Store inner text.
-        CB_catf(label, "%u32", self->label_inc++);
-        Hash_Store(extractions, (Obj*)label,
-                   (Obj*)CB_new_from_utf8(begin_match, len));
-
-        // Splice the label into the query string.
-        CB_Cat_Str(new_retval, retval_buf, before_match);
-        CB_Cat(new_retval, label);
-        CB_Cat_Str(new_retval, " ", 1); // Extra space for safety.
-        CB_Cat_Str(new_retval, end_match, after_match);
-        DECREF(retval);
-        retval = new_retval;
-        CB_Set_Size(label, orig_label_size);
-    }
-
-    return retval;
-}
-
-static CharBuf*
-S_extract_phrases(QueryParser *self, const CharBuf *query_string,
-                  Hash *extractions) {
-    return S_extract_something(self, query_string, self->phrase_label,
-                               extractions, S_match_phrase);
-}
-
-static bool_t
-S_match_phrase(CharBuf *input, char**begin_match, char **end_match) {
-    ZombieCharBuf *iterator = ZCB_WRAP(input);
-    uint32_t code_point;
-
-    while (0 != (code_point = ZCB_Code_Point_At(iterator, 0))) {
-        if (code_point == '\\') {
-            ZCB_Nip(iterator, 2);
-            continue;
-        }
-        if (code_point == '"') {
-            *begin_match = (char*)ZCB_Get_Ptr8(iterator);
-            *end_match   = *begin_match + ZCB_Get_Size(iterator);
-            ZCB_Nip_One(iterator);
-            while (0 != (code_point = ZCB_Nip_One(iterator))) {
-                if (code_point == '\\') {
-                    ZCB_Nip_One(iterator);
-                    continue;
-                }
-                else if (code_point == '"') {
-                    *end_match = (char*)ZCB_Get_Ptr8(iterator);
-                    return true;
-                }
-            }
-            return true;
-        }
-        ZCB_Nip_One(iterator);
-    }
-    return false;
-}
-
-static CharBuf*
-S_extract_paren_groups(QueryParser *self, const CharBuf *query_string,
-                       Hash *extractions) {
-    return S_extract_something(self, query_string, self->bool_group_label,
-                               extractions, S_match_bool_group);
-}
-
-static bool_t
-S_match_bool_group(CharBuf *input, char**begin_match, char **end_match) {
-    ZombieCharBuf *iterator = ZCB_WRAP(input);
-    uint32_t code_point;
-
-    while (0 != (code_point = ZCB_Code_Point_At(iterator, 0))) {
-        if (code_point == '(') {
-FOUND_OPEN_PAREN:
-            *begin_match = (char*)ZCB_Get_Ptr8(iterator);
-            *end_match   = *begin_match + ZCB_Get_Size(iterator);
-            ZCB_Nip_One(iterator);
-            while (0 != (code_point = ZCB_Code_Point_At(iterator, 0))) {
-                if (code_point == '(') { goto FOUND_OPEN_PAREN; }
-                ZCB_Nip_One(iterator);
-                if (code_point == ')') {
-                    *end_match = (char*)ZCB_Get_Ptr8(iterator);
-                    return true;
-                }
-            }
-            return true;
-        }
-        ZCB_Nip_One(iterator);
-    }
-    return false;
-}
-
 Query*
 QParser_make_term_query(QueryParser *self, const CharBuf *field, Obj *term) {
     UNUSED_VAR(self);
@@ -1348,4 +977,3 @@ QParser_make_req_opt_query(QueryParser *
     return (Query*)ReqOptQuery_new(required_query, optional_query);
 }
 
-

Modified: lucy/trunk/core/Lucy/Search/QueryParser.cfh
URL: http://svn.apache.org/viewvc/lucy/trunk/core/Lucy/Search/QueryParser.cfh?rev=1355639&r1=1355638&r2=1355639&view=diff
==============================================================================
--- lucy/trunk/core/Lucy/Search/QueryParser.cfh (original)
+++ lucy/trunk/core/Lucy/Search/QueryParser.cfh Sat Jun 30 05:42:12 2012
@@ -48,10 +48,7 @@ public class Lucy::Search::QueryParser c
     CharBuf    *default_boolop;
     VArray     *fields;
     QueryLexer *lexer;
-    CharBuf    *phrase_label;
-    CharBuf    *bool_group_label;
     bool_t      heed_colons;
-    uint32_t    label_inc;
     bool_t      default_occur;
 
     inert incremented QueryParser*