You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2005/04/02 20:26:51 UTC

svn commit: r159787 - in incubator/lucene4c/trunk: Makefile.am include/lcn_query.h src/cmdline/main.c src/search/query.c src/search/scorer.c test/search/scorer_test.c

Author: rooneg
Date: Sat Apr  2 10:26:50 2005
New Revision: 159787

URL: http://svn.apache.org/viewcvs?view=rev&rev=159787
Log:
Start implementing more of the scoring code, and along the way move
the lcn_index_t parameter from lcn_weight_scorer to lcn_query_weight,
since it's needed there to correctly implement scoring.

Note that the results of the scorer code are likely totally wrong at
this point, I haven't even looked at them yet.  I just wanted to commit
my work in progress because the diff was getting big and it's better
to get the interface changes I know are needed out there sooner rather
than later.

* include/lcn_query.h
  (lcn_weight_sum_of_squared_weights,
   lcn_weight_normalize): unifdef.
  (lcn_query_weight): add an index parameter.
  (lcn_weight_scorer): remove index parameter, remove out of date comment.

* src/search/query.c
  (query_weight_internal_t): add index parameter.
  (weight_ssw_internal_t,
   weight_normalize_internal_t): new typedefs.
  (weight_scorer_internal_t): remove index parameter.
  (lcn_weight_t): add index, ssw_internal and normalize_internal members.
  (term_scorer_internal): remove index parameter.
  (term_ssw_internal): new function.
  (term_normalize_internal): new function.
  (term_weight_internal): add index parameter, set up new members.
  (boolean_scorer_internal): remove index parameter.
  (boolean_ssw_internal): new function, unimplemented.
  (boolean_normalize_internal): new function, unimplemented.
  (boolean_weight_internal): add index parameter, set up new members.
  (lcn_weight_sum_of_squared_weights): new function.
  (lcn_weight_normalize): new function.
  (lcn_query_weight): add index parameter, call new scoring functions.
  (lcn_weight_scorer): remove index parameter.

* src/search/scorer.c
  (fill_scorers_array): adjust for interface changes.

* src/cmdline/main.c
  (lcn_search_cmd): ditto.

* test/search/scorer_test.c
  (test_term_scorer,
   test_boolean_scorer): account for change in interfaces.

* Makefile.am: link against libm, this should probably be conditional,
  but that will have to wait for someone with the time/inclination to
  mess with the autoconf stuff.

Modified:
    incubator/lucene4c/trunk/Makefile.am
    incubator/lucene4c/trunk/include/lcn_query.h
    incubator/lucene4c/trunk/src/cmdline/main.c
    incubator/lucene4c/trunk/src/search/query.c
    incubator/lucene4c/trunk/src/search/scorer.c
    incubator/lucene4c/trunk/test/search/scorer_test.c

Modified: incubator/lucene4c/trunk/Makefile.am
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/Makefile.am?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/Makefile.am (original)
+++ incubator/lucene4c/trunk/Makefile.am Sat Apr  2 10:26:50 2005
@@ -81,7 +81,7 @@
 
 INCLUDES = -Iinclude -Itest $(LCN_APR_INCLUDES)
 
-LIBS = $(LCN_APR_LIBS)
+LIBS = $(LCN_APR_LIBS) -lm
 
 check:
 	@./test/tests

Modified: incubator/lucene4c/trunk/include/lcn_query.h
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_query.h?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/include/lcn_query.h (original)
+++ incubator/lucene4c/trunk/include/lcn_query.h Sat Apr  2 10:26:50 2005
@@ -73,7 +73,6 @@
 /** Return the value of @a weight. */
 float lcn_weight_value (lcn_weight_t *weight);
 
-#if NOTYET
 /** Return the sum of the squared weight sof contained query clauses. */
 lcn_error_t *
 lcn_weight_sum_of_squared_weights (float *sum, lcn_weight_t *weight);
@@ -82,25 +81,20 @@
 lcn_error_t * lcn_weight_normalize (lcn_weight_t *weight, float norm);
 
 /* XXX leaving out lcn_weight_explain for now... */
-#endif
 
-/** Create an @a weight from @a query. */
+/** Create an @a weight from @a query, to be run over @a index and allocated
+ * from @a pool.
+ */
 lcn_error_t *
 lcn_query_weight (lcn_weight_t **weight,
                   lcn_query_t *query,
+                  lcn_index_t *index,
                   apr_pool_t *pool);
 
-/** Return a @a scorer for @a weight run over @a index, allocated in
- * @a pool.
- *
- * @note the Java Lucene version of this stuff works on a Weight, not a
- * Query, but that's mainly because you are supposed to be able to reuse
- * a Query, so we can make that split later.
- */
+/** Return a @a scorer for @a weight, allocated in @a pool. */
 lcn_error_t *
 lcn_weight_scorer (lcn_scorer_t **scorer,
                    lcn_weight_t *weight,
-                   lcn_index_t *index,
                    apr_pool_t *pool);
 
 #ifdef __cplusplus

Modified: incubator/lucene4c/trunk/src/cmdline/main.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/cmdline/main.c?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/src/cmdline/main.c (original)
+++ incubator/lucene4c/trunk/src/cmdline/main.c Sat Apr  2 10:26:50 2005
@@ -195,9 +195,9 @@
                                        lcn_str_from_cstring (argv[1], pool),
                                        pool));
 
-      LCN_ERR (lcn_query_weight (&weight, query, pool));
+      LCN_ERR (lcn_query_weight (&weight, query, idx, pool));
 
-      LCN_ERR (lcn_weight_scorer (&scorer, weight, idx, pool));
+      LCN_ERR (lcn_weight_scorer (&scorer, weight, pool));
 
       LCN_ERR (print_doc_field (idx,
                                 lcn_scorer_doc (scorer),

Modified: incubator/lucene4c/trunk/src/search/query.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/search/query.c?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/src/search/query.c (original)
+++ incubator/lucene4c/trunk/src/search/query.c Sat Apr  2 10:26:50 2005
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <math.h>
+
 #include "lcn_query.h"
 
 typedef enum {
@@ -23,6 +25,7 @@
 
 typedef lcn_error_t * (*query_weight_internal_t) (lcn_weight_t **weight,
                                                   lcn_query_t *query,
+                                                  lcn_index_t *index,
                                                   apr_pool_t *pool);
 
 struct lcn_query_t {
@@ -35,16 +38,24 @@
 
 typedef float (*weight_value_internal_t) (lcn_weight_t *weight);
 
+typedef lcn_error_t * (*weight_ssw_internal_t) (float *sum,
+                                                lcn_weight_t *weight);
+
+typedef lcn_error_t * (*weight_normalize_internal_t) (lcn_weight_t *weight,
+                                                      float norm);
+
 typedef lcn_error_t * (*weight_scorer_internal_t) (lcn_scorer_t **scorer,
                                                    lcn_weight_t *query,
-                                                   lcn_index_t *index,
                                                    apr_pool_t *pool);
 
 
 struct lcn_weight_t {
   lcn_query_t *query;
+  lcn_index_t *index;
 
   weight_scorer_internal_t scorer_internal;
+  weight_ssw_internal_t ssw_internal;
+  weight_normalize_internal_t normalize_internal;
   weight_value_internal_t value_internal;
 
   void *baton;
@@ -53,10 +64,10 @@
 static lcn_error_t *
 term_scorer_internal (lcn_scorer_t **scorer,
                       lcn_weight_t *weight,
-                      lcn_index_t *index,
                       apr_pool_t *pool)
 {
   lcn_term_t *term = lcn_weight_query (weight)->baton;
+  lcn_index_t *index = weight->index;
   lcn_doc_iter_t *iter;
 
   LCN_ERR (lcn_index_term_docs (&iter, index, term, pool));
@@ -81,19 +92,55 @@
   return twb->value;
 }
 
+static lcn_error_t * 
+term_ssw_internal (float *sum, lcn_weight_t *weight)
+{
+  struct term_weight_baton *twb = weight->baton;
+
+  apr_uint32_t num_docs = lcn_index_max_docs (weight->index);
+  apr_uint32_t doc_freq = 1; /* XXX get the doc freq */
+
+  twb->idf = log(num_docs /(double)(doc_freq + 1)) + 1.0;
+
+  twb->query_weight = twb->idf /* * boost */;
+
+  *sum = twb->query_weight * twb->query_weight;
+
+  return LCN_NO_ERROR;
+}
+
+static lcn_error_t * 
+term_normalize_internal (lcn_weight_t *weight, float norm)
+{
+  struct term_weight_baton *twb = weight->baton;
+
+  twb->query_norm = norm;
+
+  twb->query_weight *= norm;
+
+  twb->value = twb->query_weight * twb->idf;
+
+  return LCN_NO_ERROR;
+}
+
 static lcn_error_t *
 term_weight_internal (lcn_weight_t **weight,
                       lcn_query_t *query,
+                      lcn_index_t *index,
                       apr_pool_t *pool)
 {
+  struct term_weight_baton *twb = apr_pcalloc (pool, sizeof (*twb));
   lcn_weight_t *w = apr_pcalloc (pool, sizeof (*w));
 
   w->query = query;
 
   w->scorer_internal = term_scorer_internal;
+  w->ssw_internal = term_ssw_internal;
+  w->normalize_internal = term_normalize_internal;
   w->value_internal = term_value_internal;
+  w->index = index;
 
-  w->baton = apr_pcalloc (pool, sizeof (struct term_weight_baton));
+  w->baton = twb;
 
   *weight = w;
 
@@ -123,10 +170,10 @@
 static lcn_error_t *
 boolean_scorer_internal (lcn_scorer_t **scorer,
                          lcn_weight_t *weight,
-                         lcn_index_t *index,
                          apr_pool_t *pool)
 {
   boolean_query_baton_t *bqb = lcn_weight_query (weight)->baton;
+  lcn_index_t *index = weight->index;
 
   LCN_ERR (lcn_boolean_scorer_create (scorer,
                                       bqb->must,
@@ -144,9 +191,26 @@
   return 1.0f; /* XXX java impl returns getBoost() here... */
 }
 
+static lcn_error_t * 
+boolean_ssw_internal (float *sum, lcn_weight_t *weight)
+{
+  /* XXX implement me */
+
+  return LCN_NO_ERROR;
+}
+
+static lcn_error_t * 
+boolean_normalize_internal (lcn_weight_t *weight, float norm)
+{
+  /* XXX implement me */
+
+  return LCN_NO_ERROR;
+}
+
 static lcn_error_t *
 boolean_weight_internal (lcn_weight_t **weight,
                          lcn_query_t *query,
+                         lcn_index_t *index,
                          apr_pool_t *pool)
 {
   lcn_weight_t *w = apr_pcalloc (pool, sizeof (*w));
@@ -154,7 +218,11 @@
   w->query = query;
 
   w->scorer_internal = boolean_scorer_internal;
+  w->ssw_internal = boolean_ssw_internal;
   w->value_internal = boolean_value_internal;
+  w->normalize_internal = boolean_normalize_internal;
+
+  w->index = index;
 
   *weight = w;
 
@@ -225,16 +293,40 @@
 }
 
 lcn_error_t *
-lcn_query_weight (lcn_weight_t **weight, lcn_query_t *query, apr_pool_t *pool)
+lcn_weight_sum_of_squared_weights (float *sum, lcn_weight_t *weight)
 {
-  return query->weight_internal (weight, query, pool);
+  return weight->ssw_internal (sum, weight);
+}
+
+lcn_error_t *
+lcn_weight_normalize (lcn_weight_t *weight, float norm)
+{
+  return weight->normalize_internal (weight, norm);
+}
+
+lcn_error_t *
+lcn_query_weight (lcn_weight_t **weight,
+                  lcn_query_t *query,
+                  lcn_index_t *index,
+                  apr_pool_t *pool)
+{
+  float sum, norm;
+
+  LCN_ERR (query->weight_internal (weight, query, index, pool));
+
+  LCN_ERR (lcn_weight_sum_of_squared_weights (&sum, *weight));
+
+  norm = 1.0f; /* XXX get norm based on sum... */
+
+  LCN_ERR (lcn_weight_normalize (*weight, norm));
+
+  return LCN_NO_ERROR;
 }
 
 lcn_error_t *
 lcn_weight_scorer (lcn_scorer_t **scorer,
                    lcn_weight_t *weight,
-                   lcn_index_t *index,
                    apr_pool_t *pool)
 {
-  return weight->scorer_internal (scorer, weight, index, pool);
+  return weight->scorer_internal (scorer, weight, pool);
 }

Modified: incubator/lucene4c/trunk/src/search/scorer.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/search/scorer.c?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/src/search/scorer.c (original)
+++ incubator/lucene4c/trunk/src/search/scorer.c Sat Apr  2 10:26:50 2005
@@ -294,11 +294,10 @@
 
       lcn_query_t *query = APR_ARRAY_IDX (query_array, i, lcn_query_t *);
 
-      LCN_ERR (lcn_query_weight (&weight, query, pool));
+      LCN_ERR (lcn_query_weight (&weight, query, index, pool));
 
       LCN_ERR (lcn_weight_scorer (&scorer,
                                   weight,
-                                  index,
                                   pool));
 
       APR_ARRAY_PUSH (scorer_array, lcn_scorer_t *) = scorer;

Modified: incubator/lucene4c/trunk/test/search/scorer_test.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/test/search/scorer_test.c?view=diff&r1=159786&r2=159787
==============================================================================
--- incubator/lucene4c/trunk/test/search/scorer_test.c (original)
+++ incubator/lucene4c/trunk/test/search/scorer_test.c Sat Apr  2 10:26:50 2005
@@ -40,9 +40,9 @@
                                                            p),
                                   p));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
-  CHK_ERR (lcn_weight_scorer (&scorer, weight, index, p));
+  CHK_ERR (lcn_weight_scorer (&scorer, weight, p));
 
   ABTS_INT_EQUAL (tc, 1, lcn_scorer_doc (scorer));
 
@@ -82,10 +82,10 @@
 
   CHK_ERR (lcn_boolean_query_create (&query, p));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
   /* should fail if we don't have any queries added */
-  err = lcn_weight_scorer (&scorer, weight, index, p);
+  err = lcn_weight_scorer (&scorer, weight, p);
 
   ABTS_PTR_NOTNULL (tc, err);
 
@@ -101,10 +101,10 @@
 
   CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_MUST));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
   /* at this point results should be identical to just using the term query. */
-  CHK_ERR (lcn_weight_scorer (&scorer, weight, index, p));
+  CHK_ERR (lcn_weight_scorer (&scorer, weight, p));
 
   ABTS_INT_EQUAL (tc, 1, lcn_scorer_doc (scorer));
 
@@ -132,10 +132,10 @@
 
   CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_MUST));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
   /* now we should get results that contain both 'lucene' 'cutting' */
-  CHK_ERR (lcn_weight_scorer (&scorer, weight, index, p));
+  CHK_ERR (lcn_weight_scorer (&scorer, weight, p));
 
   ABTS_INT_EQUAL (tc, 40, lcn_scorer_doc (scorer));
 
@@ -175,9 +175,9 @@
 
   CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_SHOULD));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
-  CHK_ERR (lcn_weight_scorer (&scorer, weight, index, p));
+  CHK_ERR (lcn_weight_scorer (&scorer, weight, p));
 
   ABTS_INT_EQUAL (tc, 1, lcn_scorer_doc (scorer));
 
@@ -213,9 +213,9 @@
 
   CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_MUST));
 
-  CHK_ERR (lcn_query_weight (&weight, query, p));
+  CHK_ERR (lcn_query_weight (&weight, query, index, p));
 
-  CHK_ERR (lcn_weight_scorer (&scorer, weight, index, p));
+  CHK_ERR (lcn_weight_scorer (&scorer, weight, p));
 
   count = 1;