You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/06/22 08:22:36 UTC

svn commit: r956784 - in /lucene/lucy/trunk: core/Lucy/Search/ANDScorer.bp core/Lucy/Search/ANDScorer.c perl/lib/Lucy/Search/ANDScorer.pm perl/t/514-and_scorer.t

Author: marvin
Date: Tue Jun 22 06:22:35 2010
New Revision: 956784

URL: http://svn.apache.org/viewvc?rev=956784&view=rev
Log:
LUCY-117:
Add ANDScorer.
(and_scorer.patch)

Added:
    lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c   (with props)
    lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm   (with props)
    lucene/lucy/trunk/perl/t/514-and_scorer.t   (with props)

Added: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp Tue Jun 22 06:22:35 2010
@@ -0,0 +1,48 @@
+parcel Lucy;
+
+/** Intersect multiple required Matchers.
+ */
+
+class Lucy::Search::ANDScorer extends Lucy::Search::PolyMatcher {
+
+    Matcher     **kids;
+    bool_t        more;
+    bool_t        first_time;
+
+    inert incremented ANDScorer* 
+    new(VArray *children, Similarity *sim);
+
+    inert ANDScorer* 
+    init(ANDScorer *self, VArray *children, Similarity *similarity);
+
+    public void
+    Destroy(ANDScorer *self);
+
+    public int32_t
+    Next(ANDScorer *self);
+
+    public int32_t
+    Advance(ANDScorer *self, int32_t target);
+
+    public float
+    Score(ANDScorer *self);
+
+    public int32_t 
+    Get_Doc_ID(ANDScorer *self);
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c (added)
+++ lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c Tue Jun 22 06:22:35 2010
@@ -0,0 +1,165 @@
+#define C_LUCY_ANDSCORER
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Search/ANDScorer.h"
+#include "Lucy/Index/Similarity.h"
+
+ANDScorer*
+ANDScorer_new(VArray *children, Similarity *sim) 
+{
+    ANDScorer *self = (ANDScorer*)VTable_Make_Obj(ANDSCORER);
+    return ANDScorer_init(self, children, sim);
+}
+
+ANDScorer*
+ANDScorer_init(ANDScorer *self, VArray *children, Similarity *sim) 
+{
+    uint32_t i;
+
+    // Init. 
+    PolyMatcher_init((PolyMatcher*)self, children, sim);
+    self->first_time       = true;
+
+    // Assign. 
+    self->more             = self->num_kids ? true : false;
+    self->kids             = (Matcher**)MALLOCATE(self->num_kids * sizeof(Matcher*));
+    for (i = 0; i < self->num_kids; i++) {
+        Matcher *child = (Matcher*)VA_Fetch(children, i);
+        self->kids[i] = child;
+        if (!Matcher_Next(child)) self->more = false;
+    }
+
+    // Derive. 
+    self->matching_kids = self->num_kids;
+
+    return self;
+}
+
+void
+ANDScorer_destroy(ANDScorer *self) 
+{
+    FREEMEM(self->kids);
+    SUPER_DESTROY(self, ANDSCORER);
+}
+
+int32_t
+ANDScorer_next(ANDScorer *self)
+{
+    if (self->first_time) {
+        return ANDScorer_Advance(self, 1);
+    }
+    if (self->more) {
+        const int32_t target = Matcher_Get_Doc_ID(self->kids[0]) + 1;
+        return ANDScorer_Advance(self, target);
+    }
+    else {
+        return 0;
+    }
+}
+
+int32_t
+ANDScorer_advance(ANDScorer *self, int32_t target)
+{
+    Matcher **const kids = self->kids;
+    const uint32_t  num_kids   = self->num_kids;
+    int32_t         highest    = 0;
+
+    if (!self->more) return 0;
+
+    // First step: Advance first child and use its doc as a starting point. 
+    if (self->first_time) {
+        self->first_time = false;
+    }
+    else {
+        highest = Matcher_Advance(kids[0], target);
+        if (!highest) { 
+            self->more = false;
+            return 0;
+        }
+    }
+
+    // Second step: reconcile. 
+    while(1) {
+        uint32_t i;
+        bool_t agreement = true;
+
+        // Scoot all scorers up. 
+        for (i = 0; i < num_kids; i++) {
+            Matcher *const child = kids[i];
+            int32_t candidate = Matcher_Get_Doc_ID(child);
+
+            // If this child is highest, others will need to catch up. 
+            if (highest < candidate)
+                highest = candidate;
+
+            // If least doc scorers can agree on exceeds target, raise bar. 
+            if (target < highest)
+                target = highest;
+
+            // Scoot this scorer up if not already at highest. 
+            if (candidate < target) {
+                // This scorer is definitely the highest right now. 
+                highest = Matcher_Advance(child, target);
+                if (!highest) {
+                    self->more = false;
+                    return 0;
+                }
+            }
+        }
+
+        // If scorers don't agree, send back through the loop. 
+        for (i = 0; i < num_kids; i++) {
+            Matcher *const child = kids[i];
+            const int32_t candidate = Matcher_Get_Doc_ID(child);
+            if (candidate != highest) {
+                agreement = false;
+                break;
+            }
+        }
+
+        if (!agreement)
+            continue;
+        if (highest >= target)
+            break;
+    } 
+
+    return highest;
+}
+
+int32_t
+ANDScorer_get_doc_id(ANDScorer *self)
+{
+    return Matcher_Get_Doc_ID(self->kids[0]);
+}
+
+float
+ANDScorer_score(ANDScorer *self)
+{
+    uint32_t i;
+    Matcher **const kids = self->kids;
+    float score = 0.0f;
+
+    for (i = 0; i < self->num_kids; i++) {
+        score += Matcher_Score(kids[i]);
+    }
+
+    score *= self->coord_factors[ self->matching_kids ];
+
+    return score;
+}
+
+/* Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Search/ANDScorer.c
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm (added)
+++ lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm Tue Jun 22 06:22:35 2010
@@ -0,0 +1,33 @@
+package Lucy::Search::ANDScorer;
+use Lucy;
+
+1;
+
+__END__
+
+__BINDING__
+
+Clownfish::Binding::Perl::Class->register(
+    parcel            => "Lucy",
+    class_name        => "Lucy::Search::ANDScorer",
+    bind_constructors => ["new"],
+);
+
+__COPYRIGHT__
+
+    /**
+     * Copyright 2010 The Apache Software Foundation
+     *
+     * Licensed under the Apache License, Version 2.0 (the "License");
+     * you may not use this file except in compliance with the License.
+     * You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+     * implied.  See the License for the specific language governing
+     * permissions and limitations under the License.
+     */
+

Propchange: lucene/lucy/trunk/perl/lib/Lucy/Search/ANDScorer.pm
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/perl/t/514-and_scorer.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/514-and_scorer.t?rev=956784&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/514-and_scorer.t (added)
+++ lucene/lucy/trunk/perl/t/514-and_scorer.t Tue Jun 22 06:22:35 2010
@@ -0,0 +1,61 @@
+use strict;
+use warnings;
+use lib 'buildlib';
+
+use Test::More skip_all => 'requires SortCollector';
+# use Test::More tests => 1362;
+use Lucy::Test;
+use LucyX::Search::MockScorer;
+use Lucy::Test::TestUtils qw( modulo_set doc_ids_from_td_coll );
+
+my $sim = Lucy::Index::Similarity::LuceneSimilarity->new;
+
+for my $interval_a ( reverse 1 .. 17 ) {
+    for my $interval_b ( reverse 10 .. 17 ) {
+        check_scorer( $interval_a, $interval_b );
+        for my $interval_c ( 30, 75 ) {
+            check_scorer( $interval_a, $interval_b, $interval_c );
+            check_scorer( $interval_c, $interval_b, $interval_a );
+        }
+    }
+}
+check_scorer(1000);
+
+sub check_scorer {
+    my @intervals     = @_;
+    my @doc_id_arrays = map { modulo_set( $_, 100 ) } @intervals;
+    my @children      = map {
+        LucyX::Search::MockScorer->new(
+            doc_ids => $_,
+            scores  => [ (0) x scalar @$_ ],
+            )
+    } @doc_id_arrays;
+    my $and_scorer = Lucy::Search::ANDScorer->new(
+        children   => \@children,
+        similarity => $sim,
+    );
+    my @expected = intersect(@doc_id_arrays);
+    my $collector
+        = Lucy::Search::Collector::SortCollector->new( wanted => 1000 );
+    $and_scorer->collect( collector => $collector );
+    is( $collector->get_total_hits,
+        scalar @expected,
+        "correct num hits @intervals"
+    );
+    my ( $by_score, $by_id ) = doc_ids_from_td_coll($collector);
+    is_deeply( $by_id, \@expected, "correct doc nums @intervals" );
+}
+
+sub intersect {
+    my @arrays = @_;
+    my @out    = @{ $arrays[0] };
+    for my $array (@arrays) {
+        my %hash;
+        @hash{@$array} = (1) x @$array;
+        @out = grep { exists $hash{$_} } @out;
+    }
+    return @out;
+}
+
+# Trigger destruction.
+undef $sim;

Propchange: lucene/lucy/trunk/perl/t/514-and_scorer.t
------------------------------------------------------------------------------
    svn:eol-style = native