You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2011/08/16 19:44:16 UTC
svn commit: r1158393 - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/search/ src/java/org/apache/lucene/search/cache/
src/test/org/apache/lucene/search/
Author: mvg
Date: Tue Aug 16 17:44:16 2011
New Revision: 1158393
URL: http://svn.apache.org/viewvc?rev=1158393&view=rev
Log:
LUCENE-3354: FieldCache can cache DocTermOrds.
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Aug 16 17:44:16 2011
@@ -473,6 +473,8 @@ New features
different usecases like merging, flushing and reading.
(Simon Willnauer, Mike McCandless, Varun Thacker)
+* LUCENE-3354: FieldCache can cache DocTermOrds. (Martijn van Groningen)
+
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java Tue Aug 16 17:44:16 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.cache.EntryCreator;
@@ -654,6 +655,18 @@ public interface FieldCache {
throws IOException;
/**
+ * Checks the internal cache for an appropriate entry, and if none is found, reads the term values
+ * in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
+ * the terms (as ords) per document.
+ *
+ * @param reader Used to build a {@link DocTermOrds} instance
+ * @param field Which field contains the strings.
+ * @return a {@link DocTermOrds} instance
+ * @throws IOException If any error occurs.
+ */
+ public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException;
+
+ /**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
* Can be useful for logging/debugging.
* @lucene.experimental
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Aug 16 17:44:16 2011
@@ -17,32 +17,16 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.WeakHashMap;
-
+import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.cache.ByteValuesCreator;
-import org.apache.lucene.search.cache.DocTermsCreator;
-import org.apache.lucene.search.cache.DocTermsIndexCreator;
-import org.apache.lucene.search.cache.DoubleValuesCreator;
-import org.apache.lucene.search.cache.EntryCreator;
-import org.apache.lucene.search.cache.FloatValuesCreator;
-import org.apache.lucene.search.cache.IntValuesCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
-import org.apache.lucene.search.cache.ShortValuesCreator;
-import org.apache.lucene.search.cache.CachedArray.ByteValues;
-import org.apache.lucene.search.cache.CachedArray.DoubleValues;
-import org.apache.lucene.search.cache.CachedArray.FloatValues;
-import org.apache.lucene.search.cache.CachedArray.IntValues;
-import org.apache.lucene.search.cache.CachedArray.LongValues;
-import org.apache.lucene.search.cache.CachedArray.ShortValues;
+import org.apache.lucene.search.cache.*;
+import org.apache.lucene.search.cache.CachedArray.*;
import org.apache.lucene.util.FieldCacheSanityChecker;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.*;
+
/**
* Expert: The default cache implementation, storing all values in memory.
* A WeakHashMap is used for storage.
@@ -70,6 +54,7 @@ public class FieldCacheImpl implements F
caches.put(Double.TYPE, new Cache<DoubleValues>(this));
caches.put(DocTermsIndex.class, new Cache<DocTermsIndex>(this));
caches.put(DocTerms.class, new Cache<DocTerms>(this));
+ caches.put(DocTermOrds.class, new Cache<DocTermOrds>(this));
}
public synchronized void purgeAllCaches() {
@@ -393,6 +378,11 @@ public class FieldCacheImpl implements F
return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator));
}
+ @SuppressWarnings("unchecked")
+ public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException {
+ return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, new DocTermOrdsCreator(field, 0)));
+ }
+
private volatile PrintStream infoStream;
public void setInfoStream(PrintStream stream) {
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java?rev=1158393&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java Tue Aug 16 17:44:16 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.search.cache;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocTermOrds;
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+
+/**
+ * Creates {@link DocTermOrds} instances.
+ */
+public class DocTermOrdsCreator extends EntryCreatorWithOptions<DocTermOrds> {
+
+ private final String field;
+
+ public DocTermOrdsCreator(String field, int flag) {
+ super(flag);
+ this.field = field;
+ }
+
+ @Override
+ public DocTermOrds create(IndexReader reader) throws IOException {
+ return new DocTermOrds(reader, field);
+ }
+
+ @Override
+ public DocTermOrds validate(DocTermOrds entry, IndexReader reader) throws IOException {
+ return entry;
+ }
+
+ @Override
+ public EntryKey getCacheKey() {
+ return new SimpleEntryKey(DocTermOrdsCreator.class, field);
+ }
+}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Tue Aug 16 17:44:16 2011
@@ -19,28 +19,33 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
+
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
public class TestFieldCache extends LuceneTestCase {
protected IndexReader reader;
private int NUM_DOCS;
+ private int NUM_ORDS;
private String[] unicodeStrings;
+ private BytesRef[][] multiValued;
private Directory directory;
@Override
public void setUp() throws Exception {
super.setUp();
NUM_DOCS = atLeast(1000);
+ NUM_ORDS = atLeast(2);
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
long theLong = Long.MAX_VALUE;
@@ -50,6 +55,7 @@ public class TestFieldCache extends Luce
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
+ multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
@@ -65,21 +71,19 @@ public class TestFieldCache extends Luce
// sometimes skip the field:
if (random.nextInt(40) != 17) {
- String s = null;
- if (i > 0 && random.nextInt(3) == 1) {
- // reuse past string -- try to find one that's not null
- for(int iter=0;iter<10 && s==null;iter++) {
- s = unicodeStrings[random.nextInt(i)];
- }
- if (s == null) {
- s = _TestUtil.randomUnicodeString(random, 250);
- }
- } else {
- s = _TestUtil.randomUnicodeString(random, 250);
- }
- unicodeStrings[i] = s;
+ unicodeStrings[i] = generateString(i);
doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
}
+
+ // sometimes skip the field:
+ if (random.nextInt(10) != 8) {
+ for (int j = 0; j < NUM_ORDS; j++) {
+ String newValue = generateString(i);
+ multiValued[i][j] = new BytesRef(newValue);
+ doc.add(newField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+ }
+ Arrays.sort(multiValued[i]);
+ }
writer.addDocument(doc);
}
reader = writer.getReader();
@@ -210,6 +214,47 @@ public class TestFieldCache extends Luce
// test bad field
terms = cache.getTerms(reader, "bogusfield");
+ // getDocTermOrds
+ DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+ TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
+ assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
+ DocTermOrds.TermOrdsIterator reuse = null;
+ for (int i = 0; i < NUM_DOCS; i++) {
+ reuse = termOrds.lookup(i, reuse);
+ final int[] buffer = new int[5];
+ // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
+ List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
+ for (;;) {
+ int chunk = reuse.read(buffer);
+ if (chunk == 0) {
+ for (int ord = 0; ord < values.size(); ord++) {
+ BytesRef term = values.get(ord);
+ assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
+ }
+ break;
+ }
+
+ for(int idx=0; idx < chunk; idx++) {
+ int key = buffer[idx];
+ termsEnum.seekExact((long) key);
+ String actual = termsEnum.term().utf8ToString();
+ String expected = values.get(idx).utf8ToString();
+ if (!expected.equals(actual)) {
+ reuse = termOrds.lookup(i, reuse);
+ reuse.read(buffer);
+ }
+ assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
+ }
+
+ if (chunk < buffer.length) {
+ break;
+ }
+ }
+ }
+
+ // test bad field
+ termOrds = cache.getDocTermOrds(reader, "bogusfield");
+
FieldCache.DEFAULT.purge(reader);
}
@@ -223,4 +268,21 @@ public class TestFieldCache extends Luce
r.close();
dir.close();
}
+
+ private String generateString(int i) {
+ String s = null;
+ if (i > 0 && random.nextInt(3) == 1) {
+ // reuse past string -- try to find one that's not null
+ for(int iter = 0; iter < 10 && s == null;iter++) {
+ s = unicodeStrings[random.nextInt(i)];
+ }
+ if (s == null) {
+ s = _TestUtil.randomUnicodeString(random, 250);
+ }
+ } else {
+ s = _TestUtil.randomUnicodeString(random, 250);
+ }
+ return s;
+ }
+
}