You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/12/15 00:37:37 UTC
svn commit: r1214551 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/ java/org/apache/lucene/search/
java/org/apache/lucene/util/ test/org/apache/lucene/index/
test/org/apache/lucene/util/
Author: uschindler
Date: Wed Dec 14 23:37:36 2011
New Revision: 1214551
URL: http://svn.apache.org/viewvc?rev=1214551&view=rev
Log:
LUCENE-3531: Version 2 of the CachingWrapperFilter with deletes/acceptDocs caching
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/WeakIdentityHashMap.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestWeakIdentityHashMap.java
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1214551&r1=1214550&r2=1214551&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Dec 14 23:37:36 2011
@@ -388,15 +388,24 @@ public class FilterIndexReader extends I
return in.fields();
}
- /** If the subclass of FilteredIndexReader modifies the
- * contents of the FieldCache, you must override this
- * method to provide a different key */
+ /** {@inheritDoc}
+ * <p>If the subclass of FilteredIndexReader modifies the
+ * contents (but not liveDocs) of the index, you must override this
+ * method to provide a different key. */
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
- /** {@inheritDoc} */
+ /** {@inheritDoc}
+ * <p>If the subclass of FilteredIndexReader modifies the
+ * liveDocs, you must override this
+ * method to provide a different key. */
+ @Override
+ public Object getCombinedCoreAndDeletesKey() {
+ return in.getCombinedCoreAndDeletesKey();
+ }
+
@Override
public String toString() {
final StringBuilder buffer = new StringBuilder("FilterReader(");
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1214551&r1=1214550&r2=1214551&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Wed Dec 14 23:37:36 2011
@@ -1100,13 +1100,24 @@ public abstract class IndexReader implem
*/
public abstract ReaderContext getTopReaderContext();
- /** Expert */
+ /** Expert: Returns a key for this IndexReader, so FieldCache/CachingWrapperFilter can find
+ * it again.
+ * This key must not have equals()/hashCode() methods, so "equals" means "identical". */
public Object getCoreCacheKey() {
// Don't can ensureOpen since FC calls this (to evict)
// on close
return this;
}
+ /** Expert: Returns a key for this IndexReader that also includes deletions,
+ * so FieldCache/CachingWrapperFilter can find it again.
+ * This key must not have equals()/hashCode() methods, so "equals" means "identical". */
+ public Object getCombinedCoreAndDeletesKey() {
+ // Don't can ensureOpen since FC calls this (to evict)
+ // on close
+ return this;
+ }
+
/** Returns the number of unique terms (across all fields)
* in this reader.
*
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1214551&r1=1214550&r2=1214551&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java Wed Dec 14 23:37:36 2011
@@ -47,7 +47,8 @@ public final class SegmentReader extends
final CloseableThreadLocal<StoredFieldsReader> fieldsReaderLocal = new FieldsReaderLocal();
final CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
- volatile BitVector liveDocs;
+ volatile BitVector liveDocs = null;
+ volatile Object combinedCoreAndDeletesKey;
AtomicInteger liveDocsRef = null;
boolean hasChanges = false;
private boolean liveDocsDirty = false;
@@ -159,8 +160,11 @@ public final class SegmentReader extends
if (liveDocs.size() != si.docCount) {
throw new CorruptIndexException("document count mismatch: deleted docs count " + liveDocs.size() + " vs segment doc count " + si.docCount + " segment=" + si.name);
}
- } else
+ } else {
assert si.getDelCount() == 0;
+ }
+ // we need a key reflecting actual deletes (if existent or not):
+ combinedCoreAndDeletesKey = new Object();
}
@Override
@@ -411,6 +415,11 @@ public final class SegmentReader extends
}
@Override
+ public Object getCombinedCoreAndDeletesKey() {
+ return combinedCoreAndDeletesKey;
+ }
+
+ @Override
public int getTermInfosIndexDivisor() {
return core.termsIndexDivisor;
}
@@ -465,6 +474,7 @@ public final class SegmentReader extends
core.incRef();
clone.core = core;
clone.pendingDeleteCount = pendingDeleteCount;
+ clone.combinedCoreAndDeletesKey = combinedCoreAndDeletesKey;
if (!openReadOnly && hasChanges) {
// My pending changes transfer to the new reader
@@ -592,6 +602,9 @@ public final class SegmentReader extends
liveDocsRef = new AtomicInteger(1);
oldRef.decrementAndGet();
}
+ // we need a key reflecting actual deletes (if existent or not):
+ combinedCoreAndDeletesKey = new Object();
+ // liveDocs are now dirty:
liveDocsDirty = true;
if (liveDocs.getAndClear(docNum)) {
pendingDeleteCount++;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java?rev=1214551&r1=1214550&r2=1214551&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java Wed Dec 14 23:37:36 2011
@@ -18,17 +18,17 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.lang.ref.SoftReference;
+import java.util.Collections;
+import java.util.Map;
import java.util.WeakHashMap;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.WeakIdentityHashMap;
/**
- * Wraps another filter's result and caches it. The purpose is to allow
+ * Wraps another {@link Filter}'s result and caches it. The purpose is to allow
* filters to simply filter, and then wrap with this class
* to add caching.
*/
@@ -37,43 +37,33 @@ public class CachingWrapperFilter extend
// specify the actual readers key or something similar to indicate on which
// level of the readers hierarchy it should be cached.
private final Filter filter;
- private final FilterCache cache = new FilterCache();
+ private final Map<Object,DocIdSet> cache = Collections.synchronizedMap(new WeakHashMap<Object,DocIdSet>());
private final boolean recacheDeletes;
- private static class FilterCache {
- private final WeakHashMap<Object,WeakIdentityHashMap<Bits,SoftReference<DocIdSet>>> cache =
- new WeakHashMap<Object,WeakIdentityHashMap<Bits,SoftReference<DocIdSet>>>();
-
- public synchronized DocIdSet get(IndexReader reader, Bits acceptDocs) throws IOException {
- final Object coreKey = reader.getCoreCacheKey();
- WeakIdentityHashMap<Bits,SoftReference<DocIdSet>> innerCache = cache.get(coreKey);
- if (innerCache == null) {
- innerCache = new WeakIdentityHashMap<Bits,SoftReference<DocIdSet>>();
- cache.put(coreKey, innerCache);
- }
-
- final SoftReference<DocIdSet> innerRef = innerCache.get(acceptDocs);
- return innerRef == null ? null : innerRef.get();
- }
-
- public synchronized void put(IndexReader reader, Bits acceptDocs, DocIdSet value) {
- cache.get(reader.getCoreCacheKey()).put(acceptDocs, new SoftReference<DocIdSet>(value));
- }
- }
-
/** Wraps another filter's result and caches it.
+ * Deletions are not cached and AND'd in on the fly, see
+ * {@link #CachingWrapperFilter(Filter,boolean)} for an explanation.
+ * This constructor is recommended for often changing indexes.
* @param filter Filter to cache results of
+ * @see #CachingWrapperFilter(Filter,boolean)
*/
public CachingWrapperFilter(Filter filter) {
this(filter, false);
}
- /** Wraps another filter's result and caches it. If
- * recacheDeletes is true, then new deletes (for example
- * after {@link IndexReader#openIfChanged}) will be AND'd
- * and cached again.
+ /** Wraps another filter's result and caches it. If
+ * {@code recacheDeletes} is {@code true}, then new deletes (for example
+ * after {@link IndexReader#openIfChanged}) will cause the filter
+ * {@link DocIdSet} to be recached.
*
- * @param filter Filter to cache results of
+ * <p>If your index changes seldom, it is recommended to use {@code recacheDeletes=true},
+ * as recaching will only occur when the index is reopened.
+ * For near-real-time indexes or indexes that are often
+ * reopened with (e.g., {@link IndexReader#openIfChanged} is used), you should
+ * pass {@code recacheDeletes=false}. This will cache the filter results omitting
+ * deletions and will AND them in while scoring.
+ * @param filter Filter to cache results of
+ * @param recacheDeletes if deletions on the underlying index should recache
*/
public CachingWrapperFilter(Filter filter, boolean recacheDeletes) {
this.filter = filter;
@@ -84,7 +74,7 @@ public class CachingWrapperFilter extend
* by the wrapped Filter.
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
- * an {@link FixedBitSet}.
+ * a {@link FixedBitSet}.
*/
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
if (docIdSet == null) {
@@ -116,26 +106,31 @@ public class CachingWrapperFilter extend
// Only cache if incoming acceptDocs is == live docs;
// if Lucene passes in more interesting acceptDocs in
- // the future we don't want to over-cache:
- final boolean doCacheSubAcceptDocs = recacheDeletes && acceptDocs == reader.getLiveDocs();
-
- final Bits subAcceptDocs;
- if (doCacheSubAcceptDocs) {
- subAcceptDocs = acceptDocs;
+ // the future (@UweSays: it already does when you chain FilteredQuery) we don't want to over-cache:
+ final Bits liveDocs = reader.getLiveDocs();
+ final boolean doCacheAcceptDocs = (recacheDeletes && acceptDocs == liveDocs);
+
+ final Object key;
+ final Bits cacheAcceptDocs;
+ if (doCacheAcceptDocs) {
+ assert acceptDocs == liveDocs;
+ key = reader.getCombinedCoreAndDeletesKey();
+ cacheAcceptDocs = acceptDocs;
} else {
- subAcceptDocs = null;
+ key = reader.getCoreCacheKey();
+ cacheAcceptDocs = null;
}
- DocIdSet docIdSet = cache.get(reader, subAcceptDocs);
+ DocIdSet docIdSet = cache.get(key);
if (docIdSet != null) {
hitCount++;
} else {
missCount++;
- docIdSet = docIdSetToCache(filter.getDocIdSet(context, subAcceptDocs), reader);
- cache.put(reader, subAcceptDocs, docIdSet);
+ docIdSet = docIdSetToCache(filter.getDocIdSet(context, cacheAcceptDocs), reader);
+ cache.put(key, docIdSet);
}
- if (doCacheSubAcceptDocs) {
+ if (doCacheAcceptDocs) {
return docIdSet;
} else {
return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=1214551&r1=1214550&r2=1214551&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Wed Dec 14 23:37:36 2011
@@ -78,14 +78,44 @@ public class TestSegmentReader extends L
DocHelper.setupDoc(docToDelete);
SegmentInfo info = DocHelper.writeDoc(random, dir, docToDelete);
SegmentReader deleteReader = SegmentReader.getRW(info, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
- assertTrue(deleteReader != null);
- assertTrue(deleteReader.numDocs() == 1);
+ assertNotNull(deleteReader);
+ assertEquals(1, deleteReader.numDocs());
+ final Object combKey = deleteReader.getCombinedCoreAndDeletesKey();
+ final Object coreKey = deleteReader.getCoreCacheKey();
+ assertNotNull(combKey);
+ assertNotNull(coreKey);
+ assertNotSame(combKey, coreKey);
+
+ SegmentReader clone1 = (SegmentReader) deleteReader.clone();
+ assertSame(coreKey, clone1.getCoreCacheKey());
+ assertSame(combKey, clone1.getCombinedCoreAndDeletesKey());
+
deleteReader.deleteDocument(0);
+ final Object newCombKey = deleteReader.getCombinedCoreAndDeletesKey();
+ assertNotNull(newCombKey);
+ assertNotSame(combKey, newCombKey);
+ assertSame(coreKey, deleteReader.getCoreCacheKey());
assertFalse(deleteReader.getLiveDocs().get(0));
- assertTrue(deleteReader.hasDeletions() == true);
+ assertTrue(deleteReader.hasDeletions());
assertTrue(deleteReader.numDocs() == 0);
+
+ SegmentReader clone2 = (SegmentReader) deleteReader.clone();
+ assertSame(coreKey, clone2.getCoreCacheKey());
+ assertSame(newCombKey, clone2.getCombinedCoreAndDeletesKey());
+ assertFalse(clone2.getLiveDocs().get(0));
+ assertTrue(clone2.hasDeletions());
+ assertEquals(0, clone2.numDocs());
+ clone2.close();
+
+ assertSame(coreKey, clone1.getCoreCacheKey());
+ assertSame(combKey, clone1.getCombinedCoreAndDeletesKey());
+ assertNull(clone1.getLiveDocs());
+ assertFalse(clone1.hasDeletions());
+ assertEquals(1, clone2.numDocs());
+ clone1.close();
+
deleteReader.close();
- }
+ }
public void testGetFieldNameVariations() {
Collection<String> result = reader.getFieldNames(IndexReader.FieldOption.ALL);