You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/06/22 08:22:36 UTC
svn commit: r956784 - in /lucene/lucy/trunk: core/Lucy/Search/ANDScorer.bp
core/Lucy/Search/ANDScorer.c perl/lib/Lucy/Search/ANDScorer.pm
perl/t/514-and_scorer.t
Author: marvin
Date: Tue Jun 22 06:22:35 2010
New Revision: 956784
URL: http://svn.apache.org/viewvc?rev=956784&view=rev
Log:
LUCY-117:
Add ANDScorer.
(and_scorer.patch)
Added:
lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp (with props)
lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c (with props)
lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm (with props)
lucene/lucy/trunk/perl/t/514-and_scorer.t (with props)
Added: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp Tue Jun 22 06:22:35 2010
@@ -0,0 +1,48 @@
+parcel Lucy;
+
+/** Intersect multiple required Matchers.
+ */
+
+class Lucy::Search::ANDScorer extends Lucy::Search::PolyMatcher {
+
+ Matcher **kids;
+ bool_t more;
+ bool_t first_time;
+
+ inert incremented ANDScorer*
+ new(VArray *children, Similarity *sim);
+
+ inert ANDScorer*
+ init(ANDScorer *self, VArray *children, Similarity *similarity);
+
+ public void
+ Destroy(ANDScorer *self);
+
+ public int32_t
+ Next(ANDScorer *self);
+
+ public int32_t
+ Advance(ANDScorer *self, int32_t target);
+
+ public float
+ Score(ANDScorer *self);
+
+ public int32_t
+ Get_Doc_ID(ANDScorer *self);
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c (added)
+++ lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c Tue Jun 22 06:22:35 2010
@@ -0,0 +1,165 @@
+#define C_LUCY_ANDSCORER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Search/ANDScorer.h"
+#include "Lucy/Index/Similarity.h"
+
+ANDScorer*
+ANDScorer_new(VArray *children, Similarity *sim)
+{
+ ANDScorer *self = (ANDScorer*)VTable_Make_Obj(ANDSCORER);
+ return ANDScorer_init(self, children, sim);
+}
+
+ANDScorer*
+ANDScorer_init(ANDScorer *self, VArray *children, Similarity *sim)
+{
+ uint32_t i;
+
+ // Init.
+ PolyMatcher_init((PolyMatcher*)self, children, sim);
+ self->first_time = true;
+
+ // Assign.
+ self->more = self->num_kids ? true : false;
+ self->kids = (Matcher**)MALLOCATE(self->num_kids * sizeof(Matcher*));
+ for (i = 0; i < self->num_kids; i++) {
+ Matcher *child = (Matcher*)VA_Fetch(children, i);
+ self->kids[i] = child;
+ if (!Matcher_Next(child)) self->more = false;
+ }
+
+ // Derive.
+ self->matching_kids = self->num_kids;
+
+ return self;
+}
+
+void
+ANDScorer_destroy(ANDScorer *self)
+{
+ FREEMEM(self->kids);
+ SUPER_DESTROY(self, ANDSCORER);
+}
+
+int32_t
+ANDScorer_next(ANDScorer *self)
+{
+ if (self->first_time) {
+ return ANDScorer_Advance(self, 1);
+ }
+ if (self->more) {
+ const int32_t target = Matcher_Get_Doc_ID(self->kids[0]) + 1;
+ return ANDScorer_Advance(self, target);
+ }
+ else {
+ return 0;
+ }
+}
+
+int32_t
+ANDScorer_advance(ANDScorer *self, int32_t target)
+{
+ Matcher **const kids = self->kids;
+ const uint32_t num_kids = self->num_kids;
+ int32_t highest = 0;
+
+ if (!self->more) return 0;
+
+ // First step: Advance first child and use its doc as a starting point.
+ if (self->first_time) {
+ self->first_time = false;
+ }
+ else {
+ highest = Matcher_Advance(kids[0], target);
+ if (!highest) {
+ self->more = false;
+ return 0;
+ }
+ }
+
+ // Second step: reconcile.
+ while(1) {
+ uint32_t i;
+ bool_t agreement = true;
+
+ // Scoot all scorers up.
+ for (i = 0; i < num_kids; i++) {
+ Matcher *const child = kids[i];
+ int32_t candidate = Matcher_Get_Doc_ID(child);
+
+ // If this child is highest, others will need to catch up.
+ if (highest < candidate)
+ highest = candidate;
+
+ // If least doc scorers can agree on exceeds target, raise bar.
+ if (target < highest)
+ target = highest;
+
+ // Scoot this scorer up if not already at highest.
+ if (candidate < target) {
+ // This scorer is definitely the highest right now.
+ highest = Matcher_Advance(child, target);
+ if (!highest) {
+ self->more = false;
+ return 0;
+ }
+ }
+ }
+
+ // If scorers don't agree, send back through the loop.
+ for (i = 0; i < num_kids; i++) {
+ Matcher *const child = kids[i];
+ const int32_t candidate = Matcher_Get_Doc_ID(child);
+ if (candidate != highest) {
+ agreement = false;
+ break;
+ }
+ }
+
+ if (!agreement)
+ continue;
+ if (highest >= target)
+ break;
+ }
+
+ return highest;
+}
+
+int32_t
+ANDScorer_get_doc_id(ANDScorer *self)
+{
+ return Matcher_Get_Doc_ID(self->kids[0]);
+}
+
+float
+ANDScorer_score(ANDScorer *self)
+{
+ uint32_t i;
+ Matcher **const kids = self->kids;
+ float score = 0.0f;
+
+ for (i = 0; i < self->num_kids; i++) {
+ score += Matcher_Score(kids[i]);
+ }
+
+ score *= self->coord_factors[ self->matching_kids ];
+
+ return score;
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm (added)
+++ lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm Tue Jun 22 06:22:35 2010
@@ -0,0 +1,33 @@
+package Lucy::Search::ANDScorer;
+use Lucy;
+
+1;
+
+__END__
+
+__BINDING__
+
+Clownfish::Binding::Perl::Class->register(
+ parcel => "Lucy",
+ class_name => "Lucy::Search::ANDScorer",
+ bind_constructors => ["new"],
+);
+
+__COPYRIGHT__
+
+ /**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/perl/t/514-and_scorer.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/514-and_scorer.t?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/514-and_scorer.t (added)
+++ lucene/lucy/trunk/perl/t/514-and_scorer.t Tue Jun 22 06:22:35 2010
@@ -0,0 +1,61 @@
+use strict;
+use warnings;
+use lib 'buildlib';
+
+use Test::More skip_all => 'requires SortCollector';
+# use Test::More tests => 1362;
+use Lucy::Test;
+use LucyX::Search::MockScorer;
+use Lucy::Test::TestUtils qw( modulo_set doc_ids_from_td_coll );
+
+my $sim = Lucy::Index::Similarity::LuceneSimilarity->new;
+
+for my $interval_a ( reverse 1 .. 17 ) {
+ for my $interval_b ( reverse 10 .. 17 ) {
+ check_scorer( $interval_a, $interval_b );
+ for my $interval_c ( 30, 75 ) {
+ check_scorer( $interval_a, $interval_b, $interval_c );
+ check_scorer( $interval_c, $interval_b, $interval_a );
+ }
+ }
+}
+check_scorer(1000);
+
+sub check_scorer {
+ my @intervals = @_;
+ my @doc_id_arrays = map { modulo_set( $_, 100 ) } @intervals;
+ my @children = map {
+ LucyX::Search::MockScorer->new(
+ doc_ids => $_,
+ scores => [ (0) x scalar @$_ ],
+ )
+ } @doc_id_arrays;
+ my $and_scorer = Lucy::Search::ANDScorer->new(
+ children => \@children,
+ similarity => $sim,
+ );
+ my @expected = intersect(@doc_id_arrays);
+ my $collector
+ = Lucy::Search::Collector::SortCollector->new( wanted => 1000 );
+ $and_scorer->collect( collector => $collector );
+ is( $collector->get_total_hits,
+ scalar @expected,
+ "correct num hits @intervals"
+ );
+ my ( $by_score, $by_id ) = doc_ids_from_td_coll($collector);
+ is_deeply( $by_id, \@expected, "correct doc nums @intervals" );
+}
+
+sub intersect {
+ my @arrays = @_;
+ my @out = @{ $arrays[0] };
+ for my $array (@arrays) {
+ my %hash;
+ @hash{@$array} = (1) x @$array;
+ @out = grep { exists $hash{$_} } @out;
+ }
+ return @out;
+}
+
+# Trigger destruction.
+undef $sim;
Propchange: lucene/lucy/trunk/perl/t/514-and_scorer.t
------------------------------------------------------------------------------
svn:eol-style = native