You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2007/07/18 02:52:08 UTC
svn commit: r557105 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/
Author: gsingers
Date: Tue Jul 17 17:52:07 2007
New Revision: 557105
URL: http://svn.apache.org/viewvc?view=rev&rev=557105
Log:
LUCENE-960: Added SpanFilter mechanism that provides BitSet information and Span information in a filter. This can be used to get position information on where in a Document that is "on" in the filter matched.
Added:
lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java (with props)
lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java (with props)
lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java (with props)
lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java (with props)
lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (with props)
Modified:
lucene/java/trunk/CHANGES.txt
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=557105&r1=557104&r2=557105
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Jul 17 17:52:07 2007
@@ -45,6 +45,8 @@
1. LUCENE-906: Elision filter for French.
(Mathieu Lecarme via Otis Gospodnetic)
+ 2. LUCENE-960: Added a SpanQueryFilter and related classes to allow for not only filtering, but knowing where in a Document a Filter matches (Grant Ingersoll)
+
Optimizations
1. LUCENE-937: CachingTokenFilter now uses an iterator to access the
Added: lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,84 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+/**
+ * Wraps another SpanFilter's result and caches it. The purpose is to allow
+ * filters to simply filter, and then wrap with this class to add caching.
+ */
+public class CachingSpanFilter extends SpanFilter {
+ protected SpanFilter filter;
+
+ /**
+ * A transient Filter cache. To cache Filters even when using {@link org.apache.lucene.search.RemoteSearchable} use
+ * {@link org.apache.lucene.search.RemoteCachingWrapperFilter} instead.
+ */
+ protected transient Map cache;
+
+ /**
+ * @param filter Filter to cache results of
+ */
+ public CachingSpanFilter(SpanFilter filter) {
+ this.filter = filter;
+ }
+
+ public BitSet bits(IndexReader reader) throws IOException {
+ SpanFilterResult result = getCachedResult(reader);
+ return result != null ? result.getBits() : null;
+ }
+
+ private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
+ SpanFilterResult result = null;
+ if (cache == null) {
+ cache = new WeakHashMap();
+ }
+
+ synchronized (cache) { // check cache
+ result = (SpanFilterResult) cache.get(reader);
+ if (result == null) {
+ result = filter.bitSpans(reader);
+ cache.put(reader, result);
+ }
+ }
+ return result;
+ }
+
+
+ public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
+ return getCachedResult(reader);
+ }
+
+ public String toString() {
+ return "CachingSpanFilter("+filter+")";
+ }
+
+ public boolean equals(Object o) {
+ if (!(o instanceof CachingSpanFilter)) return false;
+ return this.filter.equals(((CachingSpanFilter)o).filter);
+ }
+
+ public int hashCode() {
+ return filter.hashCode() ^ 0x1117BF25;
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/CachingSpanFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,38 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+
+/** Abstract base class providing a mechanism to restrict searches to a subset
+ of an index and also maintains and returns position information.
+
+ This is useful if you want to compare the positions from a SpanQuery with the positions of items in
+ a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
+ and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
+ then compare position information for post processing.
+ */
+public abstract class SpanFilter extends Filter{
+ /** Returns a SpanFilterResult with true for documents which should be permitted in
+ search results, and false for those that should not and Spans for where the true docs match.
+ * @param reader The {@link org.apache.lucene.index.IndexReader} to load position and bitset information from
+ * @return A {@link SpanFilterResult}
+ * @throws java.io.IOException if there was an issue accessing the necessary information
+ * */
+ public abstract SpanFilterResult bitSpans(IndexReader reader) throws IOException;
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,117 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+
+/**
+ * The results of a SpanQueryFilter. Wraps the BitSet and the position infomration from the SpanQuery
+ *
+ *<p/>
+ * NOTE: This API is still experimental and subject to change.
+ *
+ **/
+public class SpanFilterResult {
+ private BitSet bits;
+ private List positions;//Spans spans;
+
+ /**
+ *
+ * @param bits The bits for the Filter
+ * @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
+ */
+ public SpanFilterResult(BitSet bits, List positions) {
+ this.bits = bits;
+ this.positions = positions;
+ }
+
+ /**
+ * The first entry in the array corresponds to the first "on" bit.
+ * Entries are increasing by document order
+ * @return A List of PositionInfo objects
+ */
+ public List getPositions() {
+ return positions;
+ }
+
+ public BitSet getBits() {
+ return bits;
+ }
+
+
+
+ public static class PositionInfo {
+ private int doc;
+ private List positions;
+
+
+ public PositionInfo(int doc) {
+ this.doc = doc;
+ positions = new ArrayList();
+ }
+
+ public void addPosition(int start, int end)
+ {
+ positions.add(new StartEnd(start, end));
+ }
+
+ public int getDoc() {
+ return doc;
+ }
+
+ /**
+ *
+ * @return A List of {@link org.apache.lucene.search.SpanFilterResult.StartEnd} objects
+ */
+ public List getPositions() {
+ return positions;
+ }
+ }
+
+ public static class StartEnd
+ {
+ private int start;
+ private int end;
+
+
+ public StartEnd(int start, int end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ /**
+ *
+ * @return The end position of this match
+ */
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ * The Start position
+ * @return The start position of this match
+ */
+ public int getStart() {
+ return start;
+ }
+
+ }
+}
+
+
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanFilterResult.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,101 @@
+package org.apache.lucene.search;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+/**
+ * Constrains search results to only match those which also match a provided
+ * query. Also provides position information about where each document matches
+ * at the cost of extra space compared with the QueryWrapperFilter.
+ * There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely,
+ * the position information for each matching document is stored.
+ * <p/>
+ * This filter does not cache. See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that
+ * caches.
+ *
+ *
+ * @version $Id:$
+ */
+public class SpanQueryFilter extends SpanFilter {
+ protected SpanQuery query;
+
+ protected SpanQueryFilter()
+ {
+
+ }
+
+ /** Constructs a filter which only matches documents matching
+ * <code>query</code>.
+ * @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter.
+ */
+ public SpanQueryFilter(SpanQuery query) {
+ this.query = query;
+ }
+
+ public BitSet bits(IndexReader reader) throws IOException {
+ SpanFilterResult result = bitSpans(reader);
+ return result.getBits();
+ }
+
+
+ public SpanFilterResult bitSpans(IndexReader reader) throws IOException {
+
+ final BitSet bits = new BitSet(reader.maxDoc());
+ Spans spans = query.getSpans(reader);
+ List tmp = new ArrayList(20);
+ int currentDoc = -1;
+ SpanFilterResult.PositionInfo currentInfo = null;
+ while (spans.next())
+ {
+ int doc = spans.doc();
+ bits.set(doc);
+ if (currentDoc != doc)
+ {
+ currentInfo = new SpanFilterResult.PositionInfo(doc);
+ tmp.add(currentInfo);
+ currentDoc = doc;
+ }
+ currentInfo.addPosition(spans.start(), spans.end());
+ }
+ return new SpanFilterResult(bits, tmp);
+ }
+
+
+ public SpanQuery getQuery() {
+ return query;
+ }
+
+ public String toString() {
+ return "QueryWrapperFilter(" + query + ")";
+ }
+
+ public boolean equals(Object o) {
+ return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query);
+ }
+
+ public int hashCode() {
+ return query.hashCode() ^ 0x923F64B9;
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SpanQueryFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java?view=auto&rev=557105
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java Tue Jul 17 17:52:07 2007
@@ -0,0 +1,81 @@
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.English;
+
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+
+public class TestSpanQueryFilter extends TestCase {
+
+
+ public TestSpanQueryFilter(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void testFilterWorks() throws Exception {
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
+ for (int i = 0; i < 500; i++) {
+ Document document = new Document();
+ document.add(new Field("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
+ Field.Store.NO, Field.Index.TOKENIZED));
+ writer.addDocument(document);
+ }
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+
+ SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
+ SpanQueryFilter filter = new SpanQueryFilter(query);
+ SpanFilterResult result = filter.bitSpans(reader);
+ BitSet bits = result.getBits();
+ assertTrue("bits is null and it shouldn't be", bits != null);
+ assertTrue("tenth bit is not on", bits.get(10));
+ List spans = result.getPositions();
+ assertTrue("spans is null and it shouldn't be", spans != null);
+ assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality());
+ for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
+ SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
+ assertTrue("info is null and it shouldn't be", info != null);
+ //The doc should indicate the bit is on
+ assertTrue("Bit is not on and it should be", bits.get(info.getDoc()));
+ //There should be two positions in each
+ assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
+ }
+ reader.close();
+ }
+}
\ No newline at end of file
Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
------------------------------------------------------------------------------
svn:eol-style = native