You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2007/07/18 02:52:08 UTC

svn commit: r557105 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/

Author: gsingers
Date: Tue Jul 17 17:52:07 2007
New Revision: 557105

URL: http://svn.apache.org/viewvc?view=rev&rev=557105
Log:
LUCENE-960:  Added SpanFilter mechanism that provides BitSet information and Span information in a filter.  This can be used to get position information on where in a Document that is "on" in the filter matched.

Added:
    lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=557105&r1=557104&r2=557105
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Jul 17 17:52:07 2007
@@ -45,6 +45,8 @@
  1. LUCENE-906: Elision filter for French.
     (Mathieu Lecarme via Otis Gospodnetic)
 
+ 2. LUCENE-960: Added a SpanQueryFilter and related classes to allow for not only filtering, but knowing where in a Document a Filter matches (Grant Ingersoll)
+
 Optimizations
 
  1. LUCENE-937: CachingTokenFilter now uses an iterator to access the 

Added: lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,84 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+/**
+ * Wraps another SpanFilter's result and caches it.  The purpose is to allow
+ * filters to simply filter, and then wrap with this class to add caching.
+ */
+public class CachingSpanFilter extends SpanFilter {
+  protected SpanFilter filter;
+
+  /**
+   * A transient Filter cache.  To cache Filters even when using {@link org.apache.lucene.search.RemoteSearchable} use
+   * {@link org.apache.lucene.search.RemoteCachingWrapperFilter} instead.
+   */
+  protected transient Map cache;
+
+  /**
+   * @param filter Filter to cache results of
+   */
+  public CachingSpanFilter(SpanFilter filter) {
+    this.filter = filter;
+  }
+
+  public BitSet bits(IndexReader reader) throws IOException {
+    SpanFilterResult result = getCachedResult(reader);
+    return result != null ? result.getBits() : null;
+  }
+
+  private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
+    SpanFilterResult result = null;
+    if (cache == null) {
+      cache = new WeakHashMap();
+    }
+
+    synchronized (cache) {  // check cache
+      result = (SpanFilterResult) cache.get(reader);
+      if (result == null) {
+        result = filter.bitSpans(reader);
+        cache.put(reader, result);
+      }
+    }
+    return result;
+  }
+
+
+  public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
+    return getCachedResult(reader);
+  }
+
+  public String toString() {
+    return "CachingSpanFilter("+filter+")";
+  }
+
+  public boolean equals(Object o) {
+    if (!(o instanceof CachingSpanFilter)) return false;
+    return this.filter.equals(((CachingSpanFilter)o).filter);
+  }
+
+  public int hashCode() {
+    return filter.hashCode() ^ 0x1117BF25;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,38 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+
+/** Abstract base class providing a mechanism to restrict searches to a subset
+ of an index and also maintains and returns position information.
+
+ This is useful if you want to compare the positions from a SpanQuery with the positions of items in
+ a filter.  For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
+ and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
+ then compare position information for post processing.
+ */
+public abstract class SpanFilter extends Filter{
+  /** Returns a SpanFilterResult with true for documents which should be permitted in
+    search results, and false for those that should not and Spans for where the true docs match.
+   * @param reader The {@link org.apache.lucene.index.IndexReader} to load position and bitset information from
+   * @return A {@link SpanFilterResult}
+   * @throws java.io.IOException if there was an issue accessing the necessary information
+   * */
+  public abstract SpanFilterResult bitSpans(IndexReader reader) throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,117 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+
+/**
+ *  The results of a SpanQueryFilter.  Wraps the BitSet and the position infomration from the SpanQuery
+ *
+ *<p/>
+ * NOTE: This API is still experimental and subject to change. 
+ *
+ **/
+public class SpanFilterResult {
+  private BitSet bits;
+  private List positions;//Spans spans;
+
+  /**
+   *
+   * @param bits The bits for the Filter
+   * @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
+   */
+  public SpanFilterResult(BitSet bits, List positions) {
+    this.bits = bits;
+    this.positions = positions;
+  }
+
+  /**
+   * The first entry in the array corresponds to the first "on" bit.
+   * Entries are increasing by document order
+   * @return A List of PositionInfo objects
+   */
+  public List getPositions() {
+    return positions;
+  }
+
+  public BitSet getBits() {
+    return bits;
+  }
+
+  
+
+  public static class PositionInfo {
+    private int doc;
+    private List positions;
+
+
+    public PositionInfo(int doc) {
+      this.doc = doc;
+      positions = new ArrayList();
+    }
+
+    public void addPosition(int start, int end)
+    {
+      positions.add(new StartEnd(start, end));
+    }
+
+    public int getDoc() {
+      return doc;
+    }
+
+    /**
+     *
+     * @return A List of {@link org.apache.lucene.search.SpanFilterResult.StartEnd} objects
+     */
+    public List getPositions() {
+      return positions;
+    }
+  }
+
+  public static class StartEnd
+  {
+    private int start;
+    private int end;
+
+
+    public StartEnd(int start, int end) {
+      this.start = start;
+      this.end = end;
+    }
+
+    /**
+     *
+     * @return The end position of this match
+     */
+    public int getEnd() {
+      return end;
+    }
+
+    /**
+     * The Start position
+     * @return The start position of this match
+     */
+    public int getStart() {
+      return start;
+    }
+
+  }
+}
+
+

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,101 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+/**
+ * Constrains search results to only match those which also match a provided
+ * query. Also provides position information about where each document matches
+ * at the cost of extra space compared with the QueryWrapperFilter.
+ * There is an added cost to this above what is stored in a {@link QueryWrapperFilter}.  Namely,
+ * the position information for each matching document is stored.
+ * <p/>
+ * This filter does not cache.  See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that
+ * caches.
+ *
+ *
+ * @version $Id:$
+ */
+public class SpanQueryFilter extends SpanFilter {
+  protected SpanQuery query;
+
+  protected SpanQueryFilter()
+  {
+    
+  }
+
+  /** Constructs a filter which only matches documents matching
+   * <code>query</code>.
+   * @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter.
+   */
+  public SpanQueryFilter(SpanQuery query) {
+    this.query = query;
+  }
+
+  public BitSet bits(IndexReader reader) throws IOException {
+    SpanFilterResult result = bitSpans(reader);
+    return result.getBits();
+  }
+
+
+  public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
+
+    final BitSet bits = new BitSet(reader.maxDoc());
+    Spans spans = query.getSpans(reader);
+    List tmp = new ArrayList(20);
+    int currentDoc = -1;
+    SpanFilterResult.PositionInfo currentInfo = null;
+    while (spans.next())
+    {
+      int doc = spans.doc();
+      bits.set(doc);
+      if (currentDoc != doc)
+      {
+        currentInfo = new SpanFilterResult.PositionInfo(doc);
+        tmp.add(currentInfo);
+        currentDoc = doc;
+      }
+      currentInfo.addPosition(spans.start(), spans.end());
+    }
+    return new SpanFilterResult(bits, tmp);
+  }
+
+
+  public SpanQuery getQuery() {
+    return query;
+  }
+
+  public String toString() {
+    return "QueryWrapperFilter(" + query + ")";
+  }
+
+  public boolean equals(Object o) {
+    return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query);
+  }
+
+  public int hashCode() {
+    return query.hashCode() ^ 0x923F64B9;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,81 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.English;
+
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+
+public class TestSpanQueryFilter extends TestCase {
+
+
+  public TestSpanQueryFilter(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void testFilterWorks() throws Exception {
+    Directory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
+    for (int i = 0; i < 500; i++) {
+      Document document = new Document();
+      document.add(new Field("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
+              Field.Store.NO, Field.Index.TOKENIZED));
+      writer.addDocument(document);
+    }
+    writer.close();
+
+    IndexReader reader = IndexReader.open(dir);
+
+    SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
+    SpanQueryFilter filter = new SpanQueryFilter(query);
+    SpanFilterResult result = filter.bitSpans(reader);
+    BitSet bits = result.getBits();
+    assertTrue("bits is null and it shouldn't be", bits != null);
+    assertTrue("tenth bit is not on", bits.get(10));
+    List spans = result.getPositions();
+    assertTrue("spans is null and it shouldn't be", spans != null);
+    assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality());
+    for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
+       SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
+      assertTrue("info is null and it shouldn't be", info != null);
+      //The doc should indicate the bit is on
+      assertTrue("Bit is not on and it should be", bits.get(info.getDoc()));
+      //There should be two positions in each
+      assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
+    }
+    reader.close();
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native