You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by Uwe Schindler <uw...@thetaphi.de> on 2010/09/26 06:13:37 UTC
RE: svn commit: r1001303 [1/2] - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/search/cache/ src/java/org/apache/lucene/util/ src/test/org/apache/lucene/search/cache/
Additionally we could generate a branch like for realtime to develop such things. So then we can do RTC (as the real commit is when branch is merged back to trunk).
But it's no problem I am fine at the moment. :-)
-----
Uwe Schindler
H.-H.-Meier-Allee 63, D-28213 Bremen
http://www.thetaphi.de
eMail: uwe@thetaphi.de
> -----Original Message-----
> From: ryan@apache.org [mailto:ryan@apache.org]
> Sent: Saturday, September 25, 2010 12:33 PM
> To: commits@lucene.apache.org
> Subject: svn commit: r1001303 [1/2] - in /lucene/dev/trunk/lucene: ./
> src/java/org/apache/lucene/search/
> src/java/org/apache/lucene/search/cache/ src/java/org/apache/lucene/util/
> src/test/org/apache/lucene/search/cache/
>
> Author: ryan
> Date: Sat Sep 25 19:32:37 2010
> New Revision: 1001303
>
> URL: http://svn.apache.org/viewvc?rev=1001303&view=rev
> Log:
> LUCENE-2649: Objects in the FieldCache can optionally store valid Bits
>
> Apologies for 'CTR' rather then 'RTC' -- we can always revert if I jumped the
> gun!
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/ByteValue
> sCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ay.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ayCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTerms
> Creator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsI
> ndexCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DoubleVal
> uesCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryCreat
> or.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryCreat
> orWithOptions.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryKey.j
> ava
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/FloatValue
> sCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/IntValuesC
> reator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/LongValue
> sCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/ShortValue
> sCreator.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/SimpleEntr
> yKey.java
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/cache/
>
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/cache/TestEntryC
> reators.java
> Modified:
> lucene/dev/trunk/lucene/CHANGES.txt
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
>
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.j
> ava
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/Bits.java
>
> Modified: lucene/dev/trunk/lucene/CHANGES.txt
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1001
> 303&r1=1001302&r2=1001303&view=diff
> ================================================================
> ==============
> --- lucene/dev/trunk/lucene/CHANGES.txt (original)
> +++ lucene/dev/trunk/lucene/CHANGES.txt Sat Sep 25 19:32:37 2010
> @@ -231,6 +231,9 @@ New features
>
> * LUCENE-2648: PackedInts.Iterator now supports to advance by more than a
> single ordinal. (Simon Willnauer)
> +
> +* LUCENE-2649: Objects in the FieldCache can optionally store Bits
> + that mark which docs have real values in the native[] (ryan)
>
> Optimizations
>
>
> Modified:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/FieldCache.java?rev=1001303&r1=1001302&r2=1001303&view=di
> ff
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
> (original)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
> Sat Sep 25 19:32:37 2010
> @@ -19,6 +19,8 @@ package org.apache.lucene.search;
>
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.cache.EntryCreator;
> +import org.apache.lucene.search.cache.CachedArray.*;
> import org.apache.lucene.util.NumericUtils;
> import org.apache.lucene.util.RamUsageEstimator;
> import org.apache.lucene.util.BytesRef;
> @@ -47,6 +49,14 @@ public interface FieldCache {
> }
>
> /**
> + * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
> + * processing terms and returns the current FieldCache
> + * array.
> + */
> + public static final class StopFillCacheException extends RuntimeException {
> + }
> +
> + /**
> * Marker interface as super-interface to all parsers. It
> * is used to specify a custom parser to {@link
> * SortField#SortField(String, FieldCache.Parser)}.
> @@ -314,6 +324,19 @@ public interface FieldCache {
> public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
> throws IOException;
>
> + /** Checks the internal cache for an appropriate entry, and if none is found,
> + * reads the terms in <code>field</code> as bytes and returns an array of
> + * size <code>reader.maxDoc()</code> of the value each document has in
> the
> + * given field.
> + * @param reader Used to get field values.
> + * @param field Which field contains the bytes.
> + * @param creator Used to make the ByteValues
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public ByteValues getBytes(IndexReader reader, String field,
> EntryCreator<ByteValues> creator ) throws IOException;
> +
> +
> /** Checks the internal cache for an appropriate entry, and if none is
> * found, reads the terms in <code>field</code> as shorts and returns an
> array
> * of size <code>reader.maxDoc()</code> of the value each document
> @@ -338,6 +361,20 @@ public interface FieldCache {
> */
> public short[] getShorts (IndexReader reader, String field, ShortParser parser)
> throws IOException;
> +
> +
> + /** Checks the internal cache for an appropriate entry, and if none is found,
> + * reads the terms in <code>field</code> as shorts and returns an array of
> + * size <code>reader.maxDoc()</code> of the value each document has in
> the
> + * given field.
> + * @param reader Used to get field values.
> + * @param field Which field contains the shorts.
> + * @param creator Computes short for string values.
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public ShortValues getShorts(IndexReader reader, String field,
> EntryCreator<ShortValues> creator ) throws IOException;
> +
>
> /** Checks the internal cache for an appropriate entry, and if none is
> * found, reads the terms in <code>field</code> as integers and returns an
> array
> @@ -364,6 +401,19 @@ public interface FieldCache {
> public int[] getInts (IndexReader reader, String field, IntParser parser)
> throws IOException;
>
> + /** Checks the internal cache for an appropriate entry, and if none is found,
> + * reads the terms in <code>field</code> as integers and returns an array of
> + * size <code>reader.maxDoc()</code> of the value each document has in
> the
> + * given field.
> + * @param reader Used to get field values.
> + * @param field Which field contains the integers.
> + * @param creator Computes integer for string values.
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public IntValues getInts(IndexReader reader, String field,
> EntryCreator<IntValues> creator ) throws IOException;
> +
> +
> /** Checks the internal cache for an appropriate entry, and if
> * none is found, reads the terms in <code>field</code> as floats and returns
> an array
> * of size <code>reader.maxDoc()</code> of the value each document
> @@ -388,6 +438,19 @@ public interface FieldCache {
> */
> public float[] getFloats (IndexReader reader, String field,
> FloatParser parser) throws IOException;
> +
> + /** Checks the internal cache for an appropriate entry, and if
> + * none is found, reads the terms in <code>field</code> as floats and returns
> an array
> + * of size <code>reader.maxDoc()</code> of the value each document
> + * has in the given field.
> + * @param reader Used to get field values.
> + * @param field Which field contains the floats.
> + * @param creator Computes float for string values.
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public FloatValues getFloats(IndexReader reader, String field,
> EntryCreator<FloatValues> creator ) throws IOException;
> +
>
> /**
> * Checks the internal cache for an appropriate entry, and if none is
> @@ -418,6 +481,20 @@ public interface FieldCache {
> public long[] getLongs(IndexReader reader, String field, LongParser parser)
> throws IOException;
>
> + /**
> + * Checks the internal cache for an appropriate entry, and if none is found,
> + * reads the terms in <code>field</code> as longs and returns an array of
> + * size <code>reader.maxDoc()</code> of the value each document has in
> the
> + * given field.
> + *
> + * @param reader Used to get field values.
> + * @param field Which field contains the longs.
> + * @param creator Computes integer for string values.
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public LongValues getLongs(IndexReader reader, String field,
> EntryCreator<LongValues> creator ) throws IOException;
> +
>
> /**
> * Checks the internal cache for an appropriate entry, and if none is
> @@ -448,6 +525,21 @@ public interface FieldCache {
> public double[] getDoubles(IndexReader reader, String field, DoubleParser
> parser)
> throws IOException;
>
> + /**
> + * Checks the internal cache for an appropriate entry, and if none is found,
> + * reads the terms in <code>field</code> as doubles and returns an array of
> + * size <code>reader.maxDoc()</code> of the value each document has in
> the
> + * given field.
> + *
> + * @param reader Used to get field values.
> + * @param field Which field contains the doubles.
> + * @param creator Computes integer for string values.
> + * @return The values in the given field for each document.
> + * @throws IOException If any error occurs.
> + */
> + public DoubleValues getDoubles(IndexReader reader, String field,
> EntryCreator<DoubleValues> creator ) throws IOException;
> +
> +
> /** Returned by {@link #getTerms} */
> public abstract static class DocTerms {
> /** The BytesRef argument must not be null; the method
>
> Modified:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.j
> ava
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/FieldCacheImpl.java?rev=1001303&r1=1001302&r2=1001303&vie
> w=diff
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.j
> ava (original)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.j
> ava Sat Sep 25 19:32:37 2010
> @@ -19,27 +19,43 @@ package org.apache.lucene.search;
>
> import java.io.IOException;
> import java.io.PrintStream;
> -import java.util.*;
> -
> -import org.apache.lucene.index.*;
> -import org.apache.lucene.util.PagedBytes;
> -import org.apache.lucene.util.packed.PackedInts;
> -import org.apache.lucene.util.packed.GrowableWriter;
> -import org.apache.lucene.util.BytesRef;
> -import org.apache.lucene.util.ArrayUtil;
> -import org.apache.lucene.util.Bits;
> -import org.apache.lucene.util.StringHelper;
> +import java.util.ArrayList;
> +import java.util.HashMap;
> +import java.util.List;
> +import java.util.Map;
> +import java.util.WeakHashMap;
> +
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.search.cache.ByteValuesCreator;
> +import org.apache.lucene.search.cache.DocTermsCreator;
> +import org.apache.lucene.search.cache.DocTermsIndexCreator;
> +import org.apache.lucene.search.cache.DoubleValuesCreator;
> +import org.apache.lucene.search.cache.EntryCreator;
> +import org.apache.lucene.search.cache.FloatValuesCreator;
> +import org.apache.lucene.search.cache.IntValuesCreator;
> +import org.apache.lucene.search.cache.LongValuesCreator;
> +import org.apache.lucene.search.cache.ShortValuesCreator;
> +import org.apache.lucene.search.cache.CachedArray.ByteValues;
> +import org.apache.lucene.search.cache.CachedArray.DoubleValues;
> +import org.apache.lucene.search.cache.CachedArray.FloatValues;
> +import org.apache.lucene.search.cache.CachedArray.IntValues;
> +import org.apache.lucene.search.cache.CachedArray.LongValues;
> +import org.apache.lucene.search.cache.CachedArray.ShortValues;
> import org.apache.lucene.util.FieldCacheSanityChecker;
> +import org.apache.lucene.util.StringHelper;
>
> /**
> * Expert: The default cache implementation, storing all values in memory.
> * A WeakHashMap is used for storage.
> *
> * <p>Created: May 19, 2004 4:40:36 PM
> - *
> + *
> + * @lucene.internal -- this is now public so that the tests can use reflection
> + * to call methods. It will likely be removed without (much) notice.
> + *
> * @since lucene 1.4
> */
> -class FieldCacheImpl implements FieldCache {
> +public class FieldCacheImpl implements FieldCache { // Made Public so that
>
> private Map<Class<?>,Cache> caches;
> FieldCacheImpl() {
> @@ -47,16 +63,16 @@ class FieldCacheImpl implements FieldCac
> }
> private synchronized void init() {
> caches = new HashMap<Class<?>,Cache>(7);
> - caches.put(Byte.TYPE, new ByteCache(this));
> - caches.put(Short.TYPE, new ShortCache(this));
> - caches.put(Integer.TYPE, new IntCache(this));
> - caches.put(Float.TYPE, new FloatCache(this));
> - caches.put(Long.TYPE, new LongCache(this));
> - caches.put(Double.TYPE, new DoubleCache(this));
> - caches.put(DocTermsIndex.class, new DocTermsIndexCache(this));
> - caches.put(DocTerms.class, new DocTermsCache(this));
> + caches.put(Byte.TYPE, new Cache<ByteValues>(this));
> + caches.put(Short.TYPE, new Cache<ShortValues>(this));
> + caches.put(Integer.TYPE, new Cache<IntValues>(this));
> + caches.put(Float.TYPE, new Cache<FloatValues>(this));
> + caches.put(Long.TYPE, new Cache<LongValues>(this));
> + caches.put(Double.TYPE, new Cache<DoubleValues>(this));
> + caches.put(DocTermsIndex.class, new Cache<DocTermsIndex>(this));
> + caches.put(DocTerms.class, new Cache<DocTerms>(this));
> }
> -
> +
> public synchronized void purgeAllCaches() {
> init();
> }
> @@ -70,17 +86,15 @@ class FieldCacheImpl implements FieldCac
> public synchronized CacheEntry[] getCacheEntries() {
> List<CacheEntry> result = new ArrayList<CacheEntry>(17);
> for(final Map.Entry<Class<?>,Cache> cacheEntry: caches.entrySet()) {
> - final Cache cache = cacheEntry.getValue();
> + final Cache<?> cache = cacheEntry.getValue();
> final Class<?> cacheType = cacheEntry.getKey();
> synchronized(cache.readerCache) {
> - for (final Map.Entry<Object,Map<Entry, Object>> readerCacheEntry :
> cache.readerCache.entrySet()) {
> - final Object readerKey = readerCacheEntry.getKey();
> - if (readerKey == null) continue;
> - final Map<Entry, Object> innerCache = readerCacheEntry.getValue();
> - for (final Map.Entry<Entry, Object> mapEntry : innerCache.entrySet()) {
> - Entry entry = mapEntry.getKey();
> + for( Object readerKey : cache.readerCache.keySet() ) {
> + Map<?, Object> innerCache = cache.readerCache.get(readerKey);
> + for (final Map.Entry<?, Object> mapEntry : innerCache.entrySet()) {
> + Entry entry = (Entry)mapEntry.getKey();
> result.add(new CacheEntryImpl(readerKey, entry.field,
> - cacheType, entry.custom,
> + cacheType, entry.creator,
> mapEntry.getValue()));
> }
> }
> @@ -93,11 +107,11 @@ class FieldCacheImpl implements FieldCac
> private final Object readerKey;
> private final String fieldName;
> private final Class<?> cacheType;
> - private final Object custom;
> + private final EntryCreator custom;
> private final Object value;
> CacheEntryImpl(Object readerKey, String fieldName,
> Class<?> cacheType,
> - Object custom,
> + EntryCreator custom,
> Object value) {
> this.readerKey = readerKey;
> this.fieldName = fieldName;
> @@ -123,16 +137,8 @@ class FieldCacheImpl implements FieldCac
> public Object getValue() { return value; }
> }
>
> - /**
> - * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
> - * processing terms and returns the current FieldCache
> - * array.
> - */
> - static final class StopFillCacheException extends RuntimeException {
> - }
> -
> /** Expert: Internal cache. */
> - abstract static class Cache {
> + final static class Cache<T> {
> Cache() {
> this.wrapper = null;
> }
> @@ -143,10 +149,11 @@ class FieldCacheImpl implements FieldCac
>
> final FieldCache wrapper;
>
> - final Map<Object,Map<Entry,Object>> readerCache = new
> WeakHashMap<Object,Map<Entry,Object>>();
> -
> - protected abstract Object createValue(IndexReader reader, Entry key)
> - throws IOException;
> + final Map<Object,Map<Entry<T>,Object>> readerCache = new
> WeakHashMap<Object,Map<Entry<T>,Object>>();
> +
> + protected Object createValue(IndexReader reader, Entry entryKey) throws
> IOException {
> + return entryKey.creator.create( reader );
> + }
>
> /** Remove this reader from the cache, if present. */
> public void purge(IndexReader r) {
> @@ -156,14 +163,14 @@ class FieldCacheImpl implements FieldCac
> }
> }
>
> - public Object get(IndexReader reader, Entry key) throws IOException {
> - Map<Entry,Object> innerCache;
> + public Object get(IndexReader reader, Entry<T> key) throws IOException {
> + Map<Entry<T>,Object> innerCache;
> Object value;
> final Object readerKey = reader.getCoreCacheKey();
> synchronized (readerCache) {
> innerCache = readerCache.get(readerKey);
> if (innerCache == null) {
> - innerCache = new HashMap<Entry,Object>();
> + innerCache = new HashMap<Entry<T>,Object>();
> readerCache.put(readerKey, innerCache);
> value = null;
> } else {
> @@ -186,7 +193,7 @@ class FieldCacheImpl implements FieldCac
> // Only check if key.custom (the parser) is
> // non-null; else, we check twice for a single
> // call to FieldCache.getXXX
> - if (key.custom != null && wrapper != null) {
> + if (key.creator != null && wrapper != null) {
> final PrintStream infoStream = wrapper.getInfoStream();
> if (infoStream != null) {
> printNewInsanity(infoStream, progress.value);
> @@ -196,6 +203,11 @@ class FieldCacheImpl implements FieldCac
> return progress.value;
> }
> }
> +
> + // Validate new entries
> + if( key.creator.shouldValidate() ) {
> + key.creator.validate( (T)value, reader);
> + }
> return value;
> }
>
> @@ -218,14 +230,14 @@ class FieldCacheImpl implements FieldCac
> }
>
> /** Expert: Every composite-key in the internal cache is of this type. */
> - static class Entry {
> + static class Entry<T> {
> final String field; // which Fieldable
> - final Object custom; // which custom comparator or parser
> + final EntryCreator<T> creator; // which custom comparator or parser
>
> /** Creates one of these objects for a custom comparator/parser. */
> - Entry (String field, Object custom) {
> + Entry (String field, EntryCreator<T> custom) {
> this.field = StringHelper.intern(field);
> - this.custom = custom;
> + this.creator = custom;
> }
>
> /** Two of these are equal iff they reference the same field and type. */
> @@ -234,9 +246,9 @@ class FieldCacheImpl implements FieldCac
> if (o instanceof Entry) {
> Entry other = (Entry) o;
> if (other.field == field) {
> - if (other.custom == null) {
> - if (custom == null) return true;
> - } else if (other.custom.equals (custom)) {
> + if (other.creator == null) {
> + if (creator == null) return true;
> + } else if (other.creator.equals (creator)) {
> return true;
> }
> }
> @@ -247,769 +259,129 @@ class FieldCacheImpl implements FieldCac
> /** Composes a hashcode based on the field and type. */
> @Override
> public int hashCode() {
> - return field.hashCode() ^ (custom==null ? 0 : custom.hashCode());
> + return field.hashCode() ^ (creator==null ? 0 : creator.hashCode());
> }
> }
>
> // inherit javadocs
> public byte[] getBytes (IndexReader reader, String field) throws IOException {
> - return getBytes(reader, field, null);
> + return getBytes(reader, field, new ByteValuesCreator(field, null)).values;
> }
>
> // inherit javadocs
> - public byte[] getBytes(IndexReader reader, String field, ByteParser parser)
> - throws IOException {
> - return (byte[]) caches.get(Byte.TYPE).get(reader, new Entry(field, parser));
> + public byte[] getBytes(IndexReader reader, String field, ByteParser parser)
> throws IOException {
> + return getBytes(reader, field, new ByteValuesCreator(field, parser)).values;
> }
>
> - static final class ByteCache extends Cache {
> - ByteCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> - Entry entry = entryKey;
> - String field = entry.field;
> - ByteParser parser = (ByteParser) entry.custom;
> - if (parser == null) {
> - return wrapper.getBytes(reader, field,
> FieldCache.DEFAULT_BYTE_PARSER);
> - }
> - final byte[] retArray = new byte[reader.maxDoc()];
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final byte termval = parser.parseByte(term);
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> - return retArray;
> - }
> + public ByteValues getBytes(IndexReader reader, String field,
> EntryCreator<ByteValues> creator ) throws IOException
> + {
> + return (ByteValues)caches.get(Byte.TYPE).get(reader, new Entry(field,
> creator));
> }
>
> // inherit javadocs
> public short[] getShorts (IndexReader reader, String field) throws IOException
> {
> - return getShorts(reader, field, null);
> + return getShorts(reader, field, new ShortValuesCreator(field,null)).values;
> }
>
> // inherit javadocs
> - public short[] getShorts(IndexReader reader, String field, ShortParser parser)
> - throws IOException {
> - return (short[]) caches.get(Short.TYPE).get(reader, new Entry(field, parser));
> + public short[] getShorts(IndexReader reader, String field, ShortParser parser)
> throws IOException {
> + return getShorts(reader, field, new
> ShortValuesCreator(field,parser)).values;
> }
>
> - static final class ShortCache extends Cache {
> - ShortCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> - Entry entry = entryKey;
> - String field = entry.field;
> - ShortParser parser = (ShortParser) entry.custom;
> - if (parser == null) {
> - return wrapper.getShorts(reader, field,
> FieldCache.DEFAULT_SHORT_PARSER);
> - }
> - final short[] retArray = new short[reader.maxDoc()];
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final short termval = parser.parseShort(term);
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> - return retArray;
> - }
> + public ShortValues getShorts(IndexReader reader, String field,
> EntryCreator<ShortValues> creator ) throws IOException
> + {
> + return (ShortValues)caches.get(Short.TYPE).get(reader, new Entry(field,
> creator));
> }
>
> // inherit javadocs
> public int[] getInts (IndexReader reader, String field) throws IOException {
> - return getInts(reader, field, null);
> + return getInts(reader, field, new IntValuesCreator( field, null )).values;
> }
>
> // inherit javadocs
> - public int[] getInts(IndexReader reader, String field, IntParser parser)
> - throws IOException {
> - return (int[]) caches.get(Integer.TYPE).get(reader, new Entry(field, parser));
> + public int[] getInts(IndexReader reader, String field, IntParser parser) throws
> IOException {
> + return getInts(reader, field, new IntValuesCreator( field, parser )).values;
> }
>
> - static final class IntCache extends Cache {
> - IntCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> - Entry entry = entryKey;
> - String field = entry.field;
> - IntParser parser = (IntParser) entry.custom;
> - if (parser == null) {
> - try {
> - return wrapper.getInts(reader, field, DEFAULT_INT_PARSER);
> - } catch (NumberFormatException ne) {
> - return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER);
> - }
> - }
> - int[] retArray = null;
> -
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final int termval = parser.parseInt(term);
> - if (retArray == null) {
> - // late init so numeric fields don't double allocate
> - retArray = new int[reader.maxDoc()];
> - }
> -
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> -
> - if (retArray == null) {
> - // no values
> - retArray = new int[reader.maxDoc()];
> - }
> - return retArray;
> - }
> + public IntValues getInts(IndexReader reader, String field,
> EntryCreator<IntValues> creator ) throws IOException
> + {
> + return (IntValues)caches.get(Integer.TYPE).get(reader, new Entry(field,
> creator));
> }
> -
> -
> +
> // inherit javadocs
> - public float[] getFloats (IndexReader reader, String field)
> - throws IOException {
> - return getFloats(reader, field, null);
> + public float[] getFloats (IndexReader reader, String field) throws IOException
> {
> + return getFloats(reader, field, new FloatValuesCreator( field, null ) ).values;
> }
>
> // inherit javadocs
> - public float[] getFloats(IndexReader reader, String field, FloatParser parser)
> - throws IOException {
> -
> - return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser));
> + public float[] getFloats(IndexReader reader, String field, FloatParser parser)
> throws IOException {
> + return getFloats(reader, field, new FloatValuesCreator( field, parser )
> ).values;
> }
>
> - static final class FloatCache extends Cache {
> - FloatCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> - Entry entry = entryKey;
> - String field = entry.field;
> - FloatParser parser = (FloatParser) entry.custom;
> - if (parser == null) {
> - try {
> - return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER);
> - } catch (NumberFormatException ne) {
> - return wrapper.getFloats(reader, field,
> NUMERIC_UTILS_FLOAT_PARSER);
> - }
> - }
> - float[] retArray = null;
> -
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final float termval = parser.parseFloat(term);
> - if (retArray == null) {
> - // late init so numeric fields don't double allocate
> - retArray = new float[reader.maxDoc()];
> - }
> -
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> -
> - if (retArray == null) {
> - // no values
> - retArray = new float[reader.maxDoc()];
> - }
> - return retArray;
> - }
> + public FloatValues getFloats(IndexReader reader, String field,
> EntryCreator<FloatValues> creator ) throws IOException
> + {
> + return (FloatValues)caches.get(Float.TYPE).get(reader, new Entry(field,
> creator));
> }
>
> -
> public long[] getLongs(IndexReader reader, String field) throws IOException {
> - return getLongs(reader, field, null);
> + return getLongs(reader, field, new LongValuesCreator( field, null ) ).values;
> }
>
> // inherit javadocs
> - public long[] getLongs(IndexReader reader, String field,
> FieldCache.LongParser parser)
> - throws IOException {
> - return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser));
> + public long[] getLongs(IndexReader reader, String field,
> FieldCache.LongParser parser) throws IOException {
> + return getLongs(reader, field, new LongValuesCreator( field, parser )
> ).values;
> }
>
> - static final class LongCache extends Cache {
> - LongCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entry)
> - throws IOException {
> - String field = entry.field;
> - FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom;
> - if (parser == null) {
> - try {
> - return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER);
> - } catch (NumberFormatException ne) {
> - return wrapper.getLongs(reader, field,
> NUMERIC_UTILS_LONG_PARSER);
> - }
> - }
> - long[] retArray = null;
> -
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final long termval = parser.parseLong(term);
> - if (retArray == null) {
> - // late init so numeric fields don't double allocate
> - retArray = new long[reader.maxDoc()];
> - }
> -
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> -
> - if (retArray == null) {
> - // no values
> - retArray = new long[reader.maxDoc()];
> - }
> - return retArray;
> - }
> + public LongValues getLongs(IndexReader reader, String field,
> EntryCreator<LongValues> creator ) throws IOException
> + {
> + return (LongValues)caches.get(Long.TYPE).get(reader, new Entry(field,
> creator));
> }
> -
> +
> // inherit javadocs
> - public double[] getDoubles(IndexReader reader, String field)
> - throws IOException {
> - return getDoubles(reader, field, null);
> + public double[] getDoubles(IndexReader reader, String field) throws
> IOException {
> + return getDoubles(reader, field, new DoubleValuesCreator( field, null )
> ).values;
> }
>
> // inherit javadocs
> - public double[] getDoubles(IndexReader reader, String field,
> FieldCache.DoubleParser parser)
> - throws IOException {
> - return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field,
> parser));
> + public double[] getDoubles(IndexReader reader, String field,
> FieldCache.DoubleParser parser) throws IOException {
> + return getDoubles(reader, field, new DoubleValuesCreator( field, parser )
> ).values;
> }
>
> - static final class DoubleCache extends Cache {
> - DoubleCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> - Entry entry = entryKey;
> - String field = entry.field;
> - FieldCache.DoubleParser parser = (FieldCache.DoubleParser)
> entry.custom;
> - if (parser == null) {
> - try {
> - return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER);
> - } catch (NumberFormatException ne) {
> - return wrapper.getDoubles(reader, field,
> NUMERIC_UTILS_DOUBLE_PARSER);
> - }
> - }
> - double[] retArray = null;
> -
> - Terms terms = MultiFields.getTerms(reader, field);
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - try {
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final double termval = parser.parseDouble(term);
> - if (retArray == null) {
> - // late init so numeric fields don't double allocate
> - retArray = new double[reader.maxDoc()];
> - }
> -
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - retArray[docID] = termval;
> - }
> - }
> - } catch (StopFillCacheException stop) {
> - }
> - }
> - if (retArray == null) // no values
> - retArray = new double[reader.maxDoc()];
> - return retArray;
> - }
> + public DoubleValues getDoubles(IndexReader reader, String field,
> EntryCreator<DoubleValues> creator ) throws IOException
> + {
> + return (DoubleValues)caches.get(Double.TYPE).get(reader, new Entry(field,
> creator));
> }
>
> - public static class DocTermsIndexImpl extends DocTermsIndex {
> - private final PagedBytes.Reader bytes;
> - private final PackedInts.Reader termOrdToBytesOffset;
> - private final PackedInts.Reader docToTermOrd;
> - private final int numOrd;
> -
> - public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader
> termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
> - this.bytes = bytes;
> - this.docToTermOrd = docToTermOrd;
> - this.termOrdToBytesOffset = termOrdToBytesOffset;
> - this.numOrd = numOrd;
> - }
> -
> - @Override
> - public PackedInts.Reader getDocToOrd() {
> - return docToTermOrd;
> - }
> -
> - @Override
> - public int numOrd() {
> - return numOrd;
> - }
> -
> - @Override
> - public int getOrd(int docID) {
> - return (int) docToTermOrd.get(docID);
> - }
> -
> - @Override
> - public int size() {
> - return docToTermOrd.size();
> - }
> -
> - @Override
> - public BytesRef lookup(int ord, BytesRef ret) {
> - return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord));
> - }
> -
> - @Override
> - public TermsEnum getTermsEnum() {
> - return this.new DocTermsIndexEnum();
> - }
> -
> - class DocTermsIndexEnum extends TermsEnum {
> - int currentOrd;
> - int currentBlockNumber;
> - int end; // end position in the current block
> - final byte[][] blocks;
> - final int[] blockEnds;
> -
> - final BytesRef term = new BytesRef();
> -
> - public DocTermsIndexEnum() {
> - currentOrd = 0;
> - currentBlockNumber = 0;
> - blocks = bytes.getBlocks();
> - blockEnds = bytes.getBlockEnds();
> - currentBlockNumber = bytes.fillUsingLengthPrefix2(term,
> termOrdToBytesOffset.get(0));
> - end = blockEnds[currentBlockNumber];
> - }
> -
> - @Override
> - public SeekStatus seek(BytesRef text, boolean useCache) throws
> IOException {
> - // TODO - we can support with binary search
> - throw new UnsupportedOperationException();
> - }
> -
> - @Override
> - public SeekStatus seek(long ord) throws IOException {
> - assert(ord >= 0 && ord <= numOrd);
> - // TODO: if gap is small, could iterate from current position? Or let user
> decide that?
> - currentBlockNumber = bytes.fillUsingLengthPrefix2(term,
> termOrdToBytesOffset.get((int)ord));
> - end = blockEnds[currentBlockNumber];
> - currentOrd = (int)ord;
> - return SeekStatus.FOUND;
> - }
> -
> - @Override
> - public BytesRef next() throws IOException {
> - int start = term.offset + term.length;
> - if (start >= end) {
> - // switch byte blocks
> - if (currentBlockNumber +1 >= blocks.length) {
> - return null;
> - }
> - currentBlockNumber++;
> - term.bytes = blocks[currentBlockNumber];
> - end = blockEnds[currentBlockNumber];
> - start = 0;
> - if (end<=0) return null; // special case of empty last array
> - }
> -
> - currentOrd++;
> -
> - byte[] block = term.bytes;
> - if ((block[start] & 128) == 0) {
> - term.length = block[start];
> - term.offset = start+1;
> - } else {
> - term.length = (((int) (block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
> - term.offset = start+2;
> - }
> -
> - return term;
> - }
> -
> - @Override
> - public BytesRef term() throws IOException {
> - return term;
> - }
> -
> - @Override
> - public long ord() throws IOException {
> - return currentOrd;
> - }
> -
> - @Override
> - public int docFreq() {
> - throw new UnsupportedOperationException();
> - }
> -
> - @Override
> - public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException
> {
> - throw new UnsupportedOperationException();
> - }
> -
> - @Override
> - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs,
> DocsAndPositionsEnum reuse) throws IOException {
> - throw new UnsupportedOperationException();
> - }
> -
> - @Override
> - public Comparator<BytesRef> getComparator() throws IOException {
> - throw new UnsupportedOperationException();
> - }
> - }
> - }
> -
> - private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true;
> -
> - public DocTermsIndex getTermsIndex(IndexReader reader, String field)
> throws IOException {
> - return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
> + public DocTermsIndex getTermsIndex(IndexReader reader, String field)
> throws IOException {
> + return getTermsIndex(reader, field, new
> DocTermsIndexCreator<DocTermsIndex>( field ) );
> }
>
> - public DocTermsIndex getTermsIndex(IndexReader reader, String field,
> boolean fasterButMoreRAM) throws IOException {
> - return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new
> Entry(field, Boolean.valueOf(fasterButMoreRAM)));
> - }
> -
> - static class DocTermsIndexCache extends Cache {
> - DocTermsIndexCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> -
> - String field = StringHelper.intern(entryKey.field);
> - Terms terms = MultiFields.getTerms(reader, field);
> -
> - final boolean fasterButMoreRAM = ((Boolean)
> entryKey.custom).booleanValue();
> -
> - final PagedBytes bytes = new PagedBytes(15);
> -
> - int startBytesBPV;
> - int startTermsBPV;
> - int startNumUniqueTerms;
> -
> - int maxDoc = reader.maxDoc();
> - final int termCountHardLimit;
> - if (maxDoc == Integer.MAX_VALUE) {
> - termCountHardLimit = Integer.MAX_VALUE;
> - } else {
> - termCountHardLimit = maxDoc+1;
> - }
> -
> - if (terms != null) {
> - // Try for coarse estimate for number of bits; this
> - // should be an underestimate most of the time, which
> - // is fine -- GrowableWriter will reallocate as needed
> - long numUniqueTerms = 0;
> - try {
> - numUniqueTerms = terms.getUniqueTermCount();
> - } catch (UnsupportedOperationException uoe) {
> - numUniqueTerms = -1;
> - }
> - if (numUniqueTerms != -1) {
> -
> - if (numUniqueTerms > termCountHardLimit) {
> - // app is misusing the API (there is more than
> - // one term per doc); in this case we make best
> - // effort to load what we can (see LUCENE-2142)
> - numUniqueTerms = termCountHardLimit;
> - }
> -
> - startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
> - startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
> -
> - startNumUniqueTerms = (int) numUniqueTerms;
> - } else {
> - startBytesBPV = 1;
> - startTermsBPV = 1;
> - startNumUniqueTerms = 1;
> - }
> - } else {
> - startBytesBPV = 1;
> - startTermsBPV = 1;
> - startNumUniqueTerms = 1;
> - }
> -
> - GrowableWriter termOrdToBytesOffset = new
> GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
> - final GrowableWriter docToTermOrd = new
> GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM);
> -
> - // 0 is reserved for "unset"
> - bytes.copyUsingLengthPrefix(new BytesRef());
> - int termOrd = 1;
> -
> - if (terms != null) {
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> -
> - while(true) {
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - if (termOrd >= termCountHardLimit) {
> - break;
> - }
> -
> - if (termOrd == termOrdToBytesOffset.size()) {
> - // NOTE: this code only runs if the incoming
> - // reader impl doesn't implement
> - // getUniqueTermCount (which should be uncommon)
> - termOrdToBytesOffset =
> termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
> - }
> - termOrdToBytesOffset.set(termOrd,
> bytes.copyUsingLengthPrefix(term));
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - docToTermOrd.set(docID, termOrd);
> - }
> - termOrd++;
> - }
> -
> - if (termOrdToBytesOffset.size() > termOrd) {
> - termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
> - }
> - }
> -
> - // maybe an int-only impl?
> - return new DocTermsIndexImpl(bytes.freeze(true),
> termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
> - }
> + public DocTermsIndex getTermsIndex(IndexReader reader, String field,
> boolean fasterButMoreRAM) throws IOException {
> + return getTermsIndex(reader, field, new
> DocTermsIndexCreator<DocTermsIndex>( field,
> + fasterButMoreRAM ? DocTermsIndexCreator.FASTER_BUT_MORE_RAM :
> 0 ) );
> }
>
> - private static class DocTermsImpl extends DocTerms {
> - private final PagedBytes.Reader bytes;
> - private final PackedInts.Reader docToOffset;
> -
> - public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader
> docToOffset) {
> - this.bytes = bytes;
> - this.docToOffset = docToOffset;
> - }
> -
> - @Override
> - public int size() {
> - return docToOffset.size();
> - }
> -
> - @Override
> - public boolean exists(int docID) {
> - return docToOffset.get(docID) == 0;
> - }
> -
> - @Override
> - public BytesRef getTerm(int docID, BytesRef ret) {
> - final int pointer = (int) docToOffset.get(docID);
> - return bytes.fillUsingLengthPrefix(ret, pointer);
> - }
> + public DocTermsIndex getTermsIndex(IndexReader reader, String field,
> EntryCreator<DocTermsIndex> creator) throws IOException
> + {
> + return (DocTermsIndex)caches.get(DocTermsIndex.class).get(reader, new
> Entry(field, creator));
> }
>
> // TODO: this if DocTermsIndex was already created, we
> // should share it...
> public DocTerms getTerms(IndexReader reader, String field) throws
> IOException {
> - return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM);
> + return getTerms(reader, field, new DocTermsCreator<DocTerms>( field ) );
> }
>
> public DocTerms getTerms(IndexReader reader, String field, boolean
> fasterButMoreRAM) throws IOException {
> - return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field,
> Boolean.valueOf(fasterButMoreRAM)));
> + return getTerms(reader, field, new DocTermsCreator<DocTerms>( field,
> + fasterButMoreRAM ? DocTermsCreator.FASTER_BUT_MORE_RAM : 0 ) );
> }
>
> - static final class DocTermsCache extends Cache {
> - DocTermsCache(FieldCache wrapper) {
> - super(wrapper);
> - }
> -
> - @Override
> - protected Object createValue(IndexReader reader, Entry entryKey)
> - throws IOException {
> -
> - String field = StringHelper.intern(entryKey.field);
> - Terms terms = MultiFields.getTerms(reader, field);
> -
> - final boolean fasterButMoreRAM = ((Boolean)
> entryKey.custom).booleanValue();
> -
> - final int termCountHardLimit = reader.maxDoc();
> -
> - // Holds the actual term data, expanded.
> - final PagedBytes bytes = new PagedBytes(15);
> -
> - int startBPV;
> -
> - if (terms != null) {
> - // Try for coarse estimate for number of bits; this
> - // should be an underestimate most of the time, which
> - // is fine -- GrowableWriter will reallocate as needed
> - long numUniqueTerms = 0;
> - try {
> - numUniqueTerms = terms.getUniqueTermCount();
> - } catch (UnsupportedOperationException uoe) {
> - numUniqueTerms = -1;
> - }
> - if (numUniqueTerms != -1) {
> - if (numUniqueTerms > termCountHardLimit) {
> - numUniqueTerms = termCountHardLimit;
> - }
> - startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
> - } else {
> - startBPV = 1;
> - }
> - } else {
> - startBPV = 1;
> - }
> -
> - final GrowableWriter docToOffset = new GrowableWriter(startBPV,
> reader.maxDoc(), fasterButMoreRAM);
> -
> - // pointer==0 means not set
> - bytes.copyUsingLengthPrefix(new BytesRef());
> -
> - if (terms != null) {
> - int termCount = 0;
> - final TermsEnum termsEnum = terms.iterator();
> - final Bits delDocs = MultiFields.getDeletedDocs(reader);
> - DocsEnum docs = null;
> - while(true) {
> - if (termCount++ == termCountHardLimit) {
> - // app is misusing the API (there is more than
> - // one term per doc); in this case we make best
> - // effort to load what we can (see LUCENE-2142)
> - break;
> - }
> -
> - final BytesRef term = termsEnum.next();
> - if (term == null) {
> - break;
> - }
> - final long pointer = bytes.copyUsingLengthPrefix(term);
> - docs = termsEnum.docs(delDocs, docs);
> - while (true) {
> - final int docID = docs.nextDoc();
> - if (docID == DocsEnum.NO_MORE_DOCS) {
> - break;
> - }
> - docToOffset.set(docID, pointer);
> - }
> - }
> - }
> -
> - // maybe an int-only impl?
> - return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable());
> - }
> + public DocTerms getTerms(IndexReader reader, String field,
> EntryCreator<DocTerms> creator) throws IOException
> + {
> + return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field,
> creator));
> }
> +
> private volatile PrintStream infoStream;
>
> public void setInfoStream(PrintStream stream) {
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/ByteValue
> sCreator.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/ByteValuesCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/ByteValue
> sCreator.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/ByteValue
> sCreator.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,131 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +
> +import org.apache.lucene.index.DocsEnum;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.MultiFields;
> +import org.apache.lucene.index.Terms;
> +import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.DocIdSetIterator;
> +import org.apache.lucene.search.FieldCache;
> +import org.apache.lucene.search.FieldCache.ByteParser;
> +import org.apache.lucene.search.cache.CachedArray.ByteValues;
> +import org.apache.lucene.util.Bits;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.OpenBitSet;
> +
> +public class ByteValuesCreator extends CachedArrayCreator<ByteValues>
> +{
> + protected ByteParser parser;
> +
> + public ByteValuesCreator( String field, ByteParser parser, int options )
> + {
> + super( field, options );
> + this.parser = parser;
> + }
> +
> + public ByteValuesCreator( String field, ByteParser parser )
> + {
> + super( field );
> + this.parser = parser;
> + }
> +
> + @Override
> + public Class getArrayType() {
> + return Byte.class;
> + }
> +
> + //--------------------------------------------------------------------------------
> + //--------------------------------------------------------------------------------
> +
> + @Override
> + public ByteValues create(IndexReader reader) throws IOException {
> + return validate( new ByteValues(), reader );
> + }
> +
> + @Override
> + public ByteValues validate(ByteValues entry, IndexReader reader) throws
> IOException {
> + boolean ok = false;
> + if( hasOption(OPTION_CACHE_VALUES) ) {
> + ok = true;
> + if( entry.values == null ) {
> + fillByteValues(entry, reader, field);
> + }
> + }
> + if( hasOption(OPTION_CACHE_BITS) ) {
> + ok = true;
> + if( entry.valid == null ) {
> + fillValidBits(entry, reader, field);
> + }
> + }
> + if( !ok ) {
> + throw new RuntimeException( "the config must cache values and/or bits"
> );
> + }
> + return entry;
> + }
> +
> + protected void fillByteValues( ByteValues vals, IndexReader reader, String
> field ) throws IOException
> + {
> + if( parser == null ) {
> + parser = FieldCache.DEFAULT_BYTE_PARSER;
> + }
> + assertSameParserAndResetCounts(vals, parser);
> +
> + Terms terms = MultiFields.getTerms(reader, field);
> + int maxDoc = reader.maxDoc();
> + vals.values = new byte[maxDoc];
> + if (terms != null) {
> + final TermsEnum termsEnum = terms.iterator();
> + final Bits delDocs = MultiFields.getDeletedDocs(reader);
> + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new
> OpenBitSet( maxDoc ) : null;
> + DocsEnum docs = null;
> + try {
> + while(true) {
> + final BytesRef term = termsEnum.next();
> + if (term == null) {
> + break;
> + }
> + final byte termval = parser.parseByte(term);
> + docs = termsEnum.docs(delDocs, docs);
> + while (true) {
> + final int docID = docs.nextDoc();
> + if (docID == DocIdSetIterator.NO_MORE_DOCS) {
> + break;
> + }
> + vals.values[docID] = termval;
> + vals.numDocs++;
> + if( validBits != null ) {
> + validBits.set( docID );
> + }
> + }
> + vals.numTerms++;
> + }
> + } catch (FieldCache.StopFillCacheException stop) {}
> +
> + if( vals.valid == null ) {
> + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc
> );
> + }
> + }
> + if( vals.valid == null && vals.numDocs < 1 ) {
> + vals.valid = new Bits.MatchNoBits( maxDoc );
> + }
> + }
> +}
> \ No newline at end of file
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ay.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/CachedArray.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ay.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ay.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,78 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import org.apache.lucene.util.Bits;
> +
> +public abstract class CachedArray
> +{
> + public Integer parserHashCode; // a flag to make sure you don't change what
> you are asking for in subsequent requests
> + public int numDocs;
> + public int numTerms;
> +
> + /**
> + * NOTE: these Bits may have false positives for deleted documents. That is,
> + * Documents that are deleted may be marked as valid but the array value is
> not.
> + */
> + public Bits valid;
> +
> + public CachedArray() {
> + this.parserHashCode = null;
> + this.numDocs = 0;
> + this.numTerms = 0;
> + }
> +
> + /**
> + * @return the native array
> + */
> + public abstract Object getRawArray();
> +
> + //-------------------------------------------------------------
> + // Concrete Values
> + //-------------------------------------------------------------
> +
> + public static class ByteValues extends CachedArray {
> + public byte[] values = null;
> + @Override public byte[] getRawArray() { return values; }
> + };
> +
> + public static class ShortValues extends CachedArray {
> + public short[] values = null;
> + @Override public short[] getRawArray() { return values; }
> + };
> +
> + public static class IntValues extends CachedArray {
> + public int[] values = null;
> + @Override public int[] getRawArray() { return values; }
> + };
> +
> + public static class FloatValues extends CachedArray {
> + public float[] values = null;
> + @Override public float[] getRawArray() { return values; }
> + };
> +
> + public static class LongValues extends CachedArray {
> + public long[] values = null;
> + @Override public long[] getRawArray() { return values; }
> + };
> +
> + public static class DoubleValues extends CachedArray {
> + public double[] values = null;
> + @Override public double[] getRawArray() { return values; }
> + };
> +}
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ayCreator.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/CachedArrayCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ayCreator.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/CachedArr
> ayCreator.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,148 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +
> +import org.apache.lucene.index.DocsEnum;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.MultiFields;
> +import org.apache.lucene.index.Terms;
> +import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.DocIdSetIterator;
> +import org.apache.lucene.search.FieldCache.Parser;
> +import org.apache.lucene.util.Bits;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.OpenBitSet;
> +
> +public abstract class CachedArrayCreator<T extends CachedArray> extends
> EntryCreatorWithOptions<T>
> +{
> + public static final int OPTION_VALIDATE = 1;
> + public static final int OPTION_CACHE_VALUES = 2;
> + public static final int OPTION_CACHE_BITS = 4;
> +
> + // Composite Options Fields
> + public static final int CACHE_VALUES_AND_BITS = OPTION_CACHE_VALUES ^
> OPTION_CACHE_BITS;
> + public static final int CACHE_VALUES_AND_BITS_VALIDATE =
> OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS ^ OPTION_VALIDATE;
> +
> + public String field;
> +
> + public CachedArrayCreator( String field )
> + {
> + super( OPTION_CACHE_VALUES ^ OPTION_VALIDATE );
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + public CachedArrayCreator( String field, int flags )
> + {
> + super( flags );
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + /**
> + * Note that the 'flags' are not part of the key -- subsequent calls to the cache
> + * with different options will use the same cache entry.
> + */
> + @Override
> + public EntryKey getCacheKey() {
> + return new SimpleEntryKey( CachedArray.class, getArrayType(), field );
> + //return new Integer( CachedArrayCreator.class.hashCode() ^
> getArrayType().hashCode() ^ field.hashCode() );
> + }
> +
> + /** Return the type that the array will hold */
> + public abstract Class getArrayType();
> +
> + protected void assertSameParserAndResetCounts(T value, Parser parser)
> + {
> + int parserHashCode = parser.hashCode();
> + if( value.parserHashCode != null && value.parserHashCode !=
> parserHashCode ) {
> + throw new RuntimeException( "Parser changed in subsequet call. "
> + +value.parserHashCode+" != "+parserHashCode + " :: " + parser );
> + }
> + value.parserHashCode = parserHashCode;
> + value.numDocs = value.numTerms = 0;
> + }
> +
> + /**
> + * Utility function to help check what bits are valid
> + */
> + protected Bits checkMatchAllBits( Bits deleted, OpenBitSet valid, int
> maxDocs, int numDocs )
> + {
> + if( numDocs != maxDocs ) {
> + if( hasOption( OPTION_CACHE_BITS ) ) {
> + if( deleted == null ) {
> + for( int i=0; i<maxDocs; i++ ) {
> + if( !valid.get(i) ) {
> + return valid;
> + }
> + }
> + }
> + else {
> + for( int i=0; i<maxDocs; i++ ) {
> + if( !deleted.get(i) && !valid.get(i) ) {
> + return valid;
> + }
> + }
> + }
> + }
> + else {
> + return null;
> + }
> + }
> + return new Bits.MatchAllBits( maxDocs );
> + }
> +
> + public void fillValidBits( T vals, IndexReader reader, String field ) throws
> IOException
> + {
> + vals.numDocs = vals.numTerms = 0;
> + Terms terms = MultiFields.getTerms(reader, field);
> + if (terms != null) {
> + final TermsEnum termsEnum = terms.iterator();
> + final Bits delDocs = MultiFields.getDeletedDocs(reader);
> + OpenBitSet validBits = new OpenBitSet( reader.maxDoc() );
> + DocsEnum docs = null;
> + while(true) {
> + final BytesRef term = termsEnum.next();
> + if (term == null) {
> + break;
> + }
> + docs = termsEnum.docs(delDocs, docs);
> + while (true) {
> + final int docID = docs.nextDoc();
> + if (docID == DocIdSetIterator.NO_MORE_DOCS) {
> + break;
> + }
> + validBits.set( docID );
> + vals.numDocs++;
> + }
> + vals.numTerms++;
> + }
> +
> + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs,
> reader.maxDoc() );
> + }
> + if( vals.numDocs < 1 ) {
> + vals.valid = new Bits.MatchNoBits( reader.maxDoc() );
> + }
> + }
> +}
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTerms
> Creator.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/DocTermsCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTerms
> Creator.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTerms
> Creator.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,171 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +
> +import org.apache.lucene.index.DocsEnum;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.MultiFields;
> +import org.apache.lucene.index.Terms;
> +import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.DocIdSetIterator;
> +import org.apache.lucene.search.FieldCache.DocTerms;
> +import org.apache.lucene.util.Bits;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.PagedBytes;
> +import org.apache.lucene.util.StringHelper;
> +import org.apache.lucene.util.packed.GrowableWriter;
> +import org.apache.lucene.util.packed.PackedInts;
> +
> +// TODO: this if DocTermsIndex was already created, we should share it...
> +public class DocTermsCreator<T extends DocTerms> extends
> EntryCreatorWithOptions<T>
> +{
> + public static final int FASTER_BUT_MORE_RAM = 2;
> +
> + public String field;
> +
> + public DocTermsCreator( String field )
> + {
> + super( FASTER_BUT_MORE_RAM ); // By default turn on
> FASTER_BUT_MORE_RAM
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + public DocTermsCreator( String field, int flags )
> + {
> + super( flags );
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + @Override
> + public SimpleEntryKey getCacheKey() {
> + return new SimpleEntryKey( DocTermsCreator.class, field );
> + }
> +
> + @Override
> + public T create(IndexReader reader) throws IOException {
> +
> + String field = StringHelper.intern(this.field); // TODO?? necessary?
> + Terms terms = MultiFields.getTerms(reader, field);
> +
> + final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM );
> + final int termCountHardLimit = reader.maxDoc();
> +
> + // Holds the actual term data, expanded.
> + final PagedBytes bytes = new PagedBytes(15);
> +
> + int startBPV;
> +
> + if (terms != null) {
> + // Try for coarse estimate for number of bits; this
> + // should be an underestimate most of the time, which
> + // is fine -- GrowableWriter will reallocate as needed
> + long numUniqueTerms = 0;
> + try {
> + numUniqueTerms = terms.getUniqueTermCount();
> + } catch (UnsupportedOperationException uoe) {
> + numUniqueTerms = -1;
> + }
> + if (numUniqueTerms != -1) {
> + if (numUniqueTerms > termCountHardLimit) {
> + numUniqueTerms = termCountHardLimit;
> + }
> + startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
> + } else {
> + startBPV = 1;
> + }
> + } else {
> + startBPV = 1;
> + }
> +
> + final GrowableWriter docToOffset = new GrowableWriter(startBPV,
> reader.maxDoc(), fasterButMoreRAM);
> +
> + // pointer==0 means not set
> + bytes.copyUsingLengthPrefix(new BytesRef());
> +
> + if (terms != null) {
> + int termCount = 0;
> + final TermsEnum termsEnum = terms.iterator();
> + final Bits delDocs = MultiFields.getDeletedDocs(reader);
> + DocsEnum docs = null;
> + while(true) {
> + if (termCount++ == termCountHardLimit) {
> + // app is misusing the API (there is more than
> + // one term per doc); in this case we make best
> + // effort to load what we can (see LUCENE-2142)
> + break;
> + }
> +
> + final BytesRef term = termsEnum.next();
> + if (term == null) {
> + break;
> + }
> + final long pointer = bytes.copyUsingLengthPrefix(term);
> + docs = termsEnum.docs(delDocs, docs);
> + while (true) {
> + final int docID = docs.nextDoc();
> + if (docID == DocIdSetIterator.NO_MORE_DOCS) {
> + break;
> + }
> + docToOffset.set(docID, pointer);
> + }
> + }
> + }
> +
> + // maybe an int-only impl?
> + return (T)new DocTermsImpl(bytes.freeze(true),
> docToOffset.getMutable());
> + }
> +
> + @Override
> + public T validate(T entry, IndexReader reader) throws IOException {
> + // TODO? nothing? perhaps subsequent call with
> FASTER_BUT_MORE_RAM?
> + return entry;
> + }
> +
> + private static class DocTermsImpl extends DocTerms {
> + private final PagedBytes.Reader bytes;
> + private final PackedInts.Reader docToOffset;
> +
> + public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader
> docToOffset) {
> + this.bytes = bytes;
> + this.docToOffset = docToOffset;
> + }
> +
> + @Override
> + public int size() {
> + return docToOffset.size();
> + }
> +
> + @Override
> + public boolean exists(int docID) {
> + return docToOffset.get(docID) == 0;
> + }
> +
> + @Override
> + public BytesRef getTerm(int docID, BytesRef ret) {
> + final int pointer = (int) docToOffset.get(docID);
> + return bytes.fillUsingLengthPrefix(ret, pointer);
> + }
> + }
> +}
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsI
> ndexCreator.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/DocTermsIndexCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsI
> ndexCreator.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsI
> ndexCreator.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,318 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +import java.util.Comparator;
> +
> +import org.apache.lucene.index.DocsAndPositionsEnum;
> +import org.apache.lucene.index.DocsEnum;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.MultiFields;
> +import org.apache.lucene.index.Terms;
> +import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.DocIdSetIterator;
> +import org.apache.lucene.search.FieldCache.DocTermsIndex;
> +import org.apache.lucene.util.ArrayUtil;
> +import org.apache.lucene.util.Bits;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.PagedBytes;
> +import org.apache.lucene.util.StringHelper;
> +import org.apache.lucene.util.packed.GrowableWriter;
> +import org.apache.lucene.util.packed.PackedInts;
> +
> +public class DocTermsIndexCreator<T extends DocTermsIndex> extends
> EntryCreatorWithOptions<T>
> +{
> + public static final int FASTER_BUT_MORE_RAM = 2;
> +
> + public String field;
> +
> + public DocTermsIndexCreator( String field )
> + {
> + super( FASTER_BUT_MORE_RAM ); // By default turn on
> FASTER_BUT_MORE_RAM
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + public DocTermsIndexCreator( String field, int flags )
> + {
> + super( flags );
> + if( field == null ) {
> + throw new IllegalArgumentException( "field can not be null" );
> + }
> + this.field = field;
> + }
> +
> + @Override
> + public EntryKey getCacheKey() {
> + return new SimpleEntryKey( DocTermsIndexCreator.class, field );
> + }
> +
> + @Override
> + public T create(IndexReader reader) throws IOException
> + {
> + String field = StringHelper.intern(this.field); // TODO?? necessary?
> + Terms terms = MultiFields.getTerms(reader, field);
> +
> + final boolean fasterButMoreRAM = hasOption(FASTER_BUT_MORE_RAM);
> +
> + final PagedBytes bytes = new PagedBytes(15);
> +
> + int startBytesBPV;
> + int startTermsBPV;
> + int startNumUniqueTerms;
> +
> + int maxDoc = reader.maxDoc();
> + final int termCountHardLimit;
> + if (maxDoc == Integer.MAX_VALUE) {
> + termCountHardLimit = Integer.MAX_VALUE;
> + } else {
> + termCountHardLimit = maxDoc+1;
> + }
> +
> + if (terms != null) {
> + // Try for coarse estimate for number of bits; this
> + // should be an underestimate most of the time, which
> + // is fine -- GrowableWriter will reallocate as needed
> + long numUniqueTerms = 0;
> + try {
> + numUniqueTerms = terms.getUniqueTermCount();
> + } catch (UnsupportedOperationException uoe) {
> + numUniqueTerms = -1;
> + }
> + if (numUniqueTerms != -1) {
> +
> + if (numUniqueTerms > termCountHardLimit) {
> + // app is misusing the API (there is more than
> + // one term per doc); in this case we make best
> + // effort to load what we can (see LUCENE-2142)
> + numUniqueTerms = termCountHardLimit;
> + }
> +
> + startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
> + startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
> +
> + startNumUniqueTerms = (int) numUniqueTerms;
> + } else {
> + startBytesBPV = 1;
> + startTermsBPV = 1;
> + startNumUniqueTerms = 1;
> + }
> + } else {
> + startBytesBPV = 1;
> + startTermsBPV = 1;
> + startNumUniqueTerms = 1;
> + }
> +
> + GrowableWriter termOrdToBytesOffset = new
> GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
> + final GrowableWriter docToTermOrd = new
> GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM);
> +
> + // 0 is reserved for "unset"
> + bytes.copyUsingLengthPrefix(new BytesRef());
> + int termOrd = 1;
> +
> + if (terms != null) {
> + final TermsEnum termsEnum = terms.iterator();
> + final Bits delDocs = MultiFields.getDeletedDocs(reader);
> + DocsEnum docs = null;
> +
> + while(true) {
> + final BytesRef term = termsEnum.next();
> + if (term == null) {
> + break;
> + }
> + if (termOrd >= termCountHardLimit) {
> + break;
> + }
> +
> + if (termOrd == termOrdToBytesOffset.size()) {
> + // NOTE: this code only runs if the incoming
> + // reader impl doesn't implement
> + // getUniqueTermCount (which should be uncommon)
> + termOrdToBytesOffset =
> termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
> + }
> + termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
> + docs = termsEnum.docs(delDocs, docs);
> + while (true) {
> + final int docID = docs.nextDoc();
> + if (docID == DocIdSetIterator.NO_MORE_DOCS) {
> + break;
> + }
> + docToTermOrd.set(docID, termOrd);
> + }
> + termOrd++;
> + }
> +
> + if (termOrdToBytesOffset.size() > termOrd) {
> + termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
> + }
> + }
> +
> + // maybe an int-only impl?
> + return (T)new DocTermsIndexImpl(bytes.freeze(true),
> termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
> + }
> +
> + @Override
> + public T validate(T entry, IndexReader reader) throws IOException {
> + // TODO? nothing? perhaps subsequent call with
> FASTER_BUT_MORE_RAM?
> + return entry;
> + }
> +
> + //-----------------------------------------------------------------------------
> + //-----------------------------------------------------------------------------
> +
> + public static class DocTermsIndexImpl extends DocTermsIndex {
> + private final PagedBytes.Reader bytes;
> + private final PackedInts.Reader termOrdToBytesOffset;
> + private final PackedInts.Reader docToTermOrd;
> + private final int numOrd;
> +
> + public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader
> termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
> + this.bytes = bytes;
> + this.docToTermOrd = docToTermOrd;
> + this.termOrdToBytesOffset = termOrdToBytesOffset;
> + this.numOrd = numOrd;
> + }
> +
> + @Override
> + public PackedInts.Reader getDocToOrd() {
> + return docToTermOrd;
> + }
> +
> + @Override
> + public int numOrd() {
> + return numOrd;
> + }
> +
> + @Override
> + public int getOrd(int docID) {
> + return (int) docToTermOrd.get(docID);
> + }
> +
> + @Override
> + public int size() {
> + return docToTermOrd.size();
> + }
> +
> + @Override
> + public BytesRef lookup(int ord, BytesRef ret) {
> + return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord));
> + }
> +
> + @Override
> + public TermsEnum getTermsEnum() {
> + return this.new DocTermsIndexEnum();
> + }
> +
> + class DocTermsIndexEnum extends TermsEnum {
> + int currentOrd;
> + int currentBlockNumber;
> + int end; // end position in the current block
> + final byte[][] blocks;
> + final int[] blockEnds;
> +
> + final BytesRef term = new BytesRef();
> +
> + public DocTermsIndexEnum() {
> + currentOrd = 0;
> + currentBlockNumber = 0;
> + blocks = bytes.getBlocks();
> + blockEnds = bytes.getBlockEnds();
> + currentBlockNumber = bytes.fillUsingLengthPrefix2(term,
> termOrdToBytesOffset.get(0));
> + end = blockEnds[currentBlockNumber];
> + }
> +
> + @Override
> + public SeekStatus seek(BytesRef text, boolean useCache) throws
> IOException {
> + // TODO - we can support with binary search
> + throw new UnsupportedOperationException();
> + }
> +
> + @Override
> + public SeekStatus seek(long ord) throws IOException {
> + assert(ord >= 0 && ord <= numOrd);
> + // TODO: if gap is small, could iterate from current position? Or let user
> decide that?
> + currentBlockNumber = bytes.fillUsingLengthPrefix2(term,
> termOrdToBytesOffset.get((int)ord));
> + end = blockEnds[currentBlockNumber];
> + currentOrd = (int)ord;
> + return SeekStatus.FOUND;
> + }
> +
> + @Override
> + public BytesRef next() throws IOException {
> + int start = term.offset + term.length;
> + if (start >= end) {
> + // switch byte blocks
> + if (currentBlockNumber +1 >= blocks.length) {
> + return null;
> + }
> + currentBlockNumber++;
> + term.bytes = blocks[currentBlockNumber];
> + end = blockEnds[currentBlockNumber];
> + start = 0;
> + if (end<=0) return null; // special case of empty last array
> + }
> +
> + currentOrd++;
> +
> + byte[] block = term.bytes;
> + if ((block[start] & 128) == 0) {
> + term.length = block[start];
> + term.offset = start+1;
> + } else {
> + term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
> + term.offset = start+2;
> + }
> +
> + return term;
> + }
> +
> + @Override
> + public BytesRef term() throws IOException {
> + return term;
> + }
> +
> + @Override
> + public long ord() throws IOException {
> + return currentOrd;
> + }
> +
> + @Override
> + public int docFreq() {
> + throw new UnsupportedOperationException();
> + }
> +
> + @Override
> + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException
> {
> + throw new UnsupportedOperationException();
> + }
> +
> + @Override
> + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs,
> DocsAndPositionsEnum reuse) throws IOException {
> + throw new UnsupportedOperationException();
> + }
> +
> + @Override
> + public Comparator<BytesRef> getComparator() throws IOException {
> + throw new UnsupportedOperationException();
> + }
> + }
> + }
> +}
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DoubleVal
> uesCreator.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/DoubleValuesCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DoubleVal
> uesCreator.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DoubleVal
> uesCreator.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,150 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +
> +import org.apache.lucene.index.DocsEnum;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.MultiFields;
> +import org.apache.lucene.index.Terms;
> +import org.apache.lucene.index.TermsEnum;
> +import org.apache.lucene.search.DocIdSetIterator;
> +import org.apache.lucene.search.FieldCache;
> +import org.apache.lucene.search.FieldCache.DoubleParser;
> +import org.apache.lucene.search.cache.CachedArray.DoubleValues;
> +import org.apache.lucene.util.Bits;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.OpenBitSet;
> +
> +public class DoubleValuesCreator extends
> CachedArrayCreator<DoubleValues>
> +{
> + protected DoubleParser parser;
> +
> + public DoubleValuesCreator( String field, DoubleParser parser, int options )
> + {
> + super( field, options );
> + this.parser = parser;
> + }
> +
> + public DoubleValuesCreator( String field, DoubleParser parser )
> + {
> + super( field );
> + this.parser = parser;
> + }
> +
> + @Override
> + public Class getArrayType() {
> + return Double.class;
> + }
> +
> +
> + //--------------------------------------------------------------------------------
> + //--------------------------------------------------------------------------------
> +
> + @Override
> + public DoubleValues create(IndexReader reader) throws IOException {
> + return validate( new DoubleValues(), reader );
> + }
> +
> + @Override
> + public DoubleValues validate(DoubleValues entry, IndexReader reader)
> throws IOException {
> + boolean ok = false;
> + if( hasOption(OPTION_CACHE_VALUES) ) {
> + ok = true;
> + if( entry.values == null ) {
> + fillDoubleValues(entry, reader, field);
> + }
> + }
> + if( hasOption(OPTION_CACHE_BITS) ) {
> + ok = true;
> + if( entry.valid == null ) {
> + fillValidBits(entry, reader, field);
> + }
> + }
> + if( !ok ) {
> + throw new RuntimeException( "the config must cache values and/or bits"
> );
> + }
> + return entry;
> + }
> +
> + protected void fillDoubleValues( DoubleValues vals, IndexReader reader,
> String field ) throws IOException
> + {
> + if( parser == null ) {
> + try {
> + parser = FieldCache.DEFAULT_DOUBLE_PARSER;
> + fillDoubleValues( vals, reader, field );
> + return;
> + }
> + catch (NumberFormatException ne) {
> + vals.parserHashCode = null; // wipe the previous one
> + parser = FieldCache.NUMERIC_UTILS_DOUBLE_PARSER;
> + fillDoubleValues( vals, reader, field );
> + return;
> + }
> + }
> + assertSameParserAndResetCounts(vals, parser);
> +
> + Terms terms = MultiFields.getTerms(reader, field);
> + int maxDoc = reader.maxDoc();
> + vals.values = null;
> + if (terms != null) {
> + final TermsEnum termsEnum = terms.iterator();
> + final Bits delDocs = MultiFields.getDeletedDocs(reader);
> + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new
> OpenBitSet( maxDoc ) : null;
> + DocsEnum docs = null;
> + try {
> + while(true) {
> + final BytesRef term = termsEnum.next();
> + if (term == null) {
> + break;
> + }
> + final double termval = parser.parseDouble(term);
> + docs = termsEnum.docs(delDocs, docs);
> + while (true) {
> + final int docID = docs.nextDoc();
> + if (docID == DocIdSetIterator.NO_MORE_DOCS) {
> + break;
> + }
> + if(vals.values == null) {
> + vals.values = new double[maxDoc];
> + }
> + vals.values[docID] = termval;
> + vals.numDocs++;
> + if( validBits != null ) {
> + validBits.set( docID );
> + }
> + }
> + vals.numTerms++;
> + }
> + } catch (FieldCache.StopFillCacheException stop) {}
> +
> + if( vals.valid == null ) {
> + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc
> );
> + }
> + }
> +
> + if(vals.values == null) {
> + vals.values = new double[maxDoc];
> + }
> +
> + if( vals.valid == null && vals.numDocs < 1 ) {
> + vals.valid = new Bits.MatchNoBits( maxDoc );
> + }
> + }
> +}
> \ No newline at end of file
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryCreat
> or.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lu
> cene/search/cache/EntryCreator.java?rev=1001303&view=auto
> ================================================================
> ==============
> ---
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryCreat
> or.java (added)
> +++
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/EntryCreat
> or.java Sat Sep 25 19:32:37 2010
> @@ -0,0 +1,72 @@
> +package org.apache.lucene.search.cache;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import java.io.IOException;
> +import java.io.Serializable;
> +
> +import org.apache.lucene.index.IndexReader;
> +
> +/**
> + * Create Cached Values for a given key
> + *
> + * @lucene.experimental
> + */
> +public abstract class EntryCreator<T> implements Serializable
> +{
> + public abstract T create( IndexReader reader ) throws IOException;
> + public abstract T validate( T entry, IndexReader reader ) throws IOException;
> +
> + /**
> + * Indicate if a cached cached value should be checked before usage.
> + * This is useful if an application wants to support subsequent calls
> + * to the same cached object that may alter the cached object. If
> + * an application wants to avoid this (synchronized) check, it should
> + * return 'false'
> + *
> + * @return 'true' if the Cache should call 'validate' before returning a cached
> object
> + */
> + public boolean shouldValidate() {
> + return true;
> + }
> +
> + /**
> + * @return A key to identify valid cache entries for subsequent requests
> + */
> + public abstract EntryKey getCacheKey();
> +
> +
> + //------------------------------------------------------------------------
> + // The Following code is a hack to make things work while the
> + // EntryCreator is stored in in the FieldCache.
> + // When the FieldCache is replaced with a simpler map LUCENE-2665
> + // This can be removed
> + //------------------------------------------------------------------------
> +
> + public boolean equals(Object obj) {
> + if( obj instanceof EntryCreator ) {
> + return getCacheKey().equals( ((EntryCreator)obj).getCacheKey() );
> + }
> + return false;
> + }
> +
> + @Override
> + public int hashCode() {
> + return getCacheKey().hashCode();
> + }
> +}
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org