You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2011/08/16 19:44:16 UTC

svn commit: r1158393 - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/search/cache/ src/test/org/apache/lucene/search/

Author: mvg
Date: Tue Aug 16 17:44:16 2011
New Revision: 1158393

URL: http://svn.apache.org/viewvc?rev=1158393&view=rev
Log:
LUCENE-3354: FieldCache can cache DocTermOrds.

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Aug 16 17:44:16 2011
@@ -473,6 +473,8 @@ New features
   different usecases like merging, flushing and reading.
   (Simon Willnauer, Mike McCandless, Varun Thacker)
 
+* LUCENE-3354: FieldCache can cache DocTermOrds. (Martijn van Groningen)
+
 Optimizations
 
 * LUCENE-2588: Don't store unnecessary suffixes when writing the terms

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCache.java Tue Aug 16 17:44:16 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.cache.EntryCreator;
@@ -654,6 +655,18 @@ public interface FieldCache {
   throws IOException;
 
   /**
+   * Checks the internal cache for an appropriate entry, and if none is found, reads the term values
+   * in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
+   * the terms (as ords) per document.
+   *
+   * @param reader  Used to build a {@link DocTermOrds} instance
+   * @param field   Which field contains the strings.
+   * @return a {@link DocTermOrds} instance
+   * @throws IOException  If any error occurs.
+   */
+  public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException;
+
+  /**
    * EXPERT: A unique Identifier/Description for each item in the FieldCache. 
    * Can be useful for logging/debugging.
    * @lucene.experimental

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Aug 16 17:44:16 2011
@@ -17,32 +17,16 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.WeakHashMap;
-
+import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.cache.ByteValuesCreator;
-import org.apache.lucene.search.cache.DocTermsCreator;
-import org.apache.lucene.search.cache.DocTermsIndexCreator;
-import org.apache.lucene.search.cache.DoubleValuesCreator;
-import org.apache.lucene.search.cache.EntryCreator;
-import org.apache.lucene.search.cache.FloatValuesCreator;
-import org.apache.lucene.search.cache.IntValuesCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
-import org.apache.lucene.search.cache.ShortValuesCreator;
-import org.apache.lucene.search.cache.CachedArray.ByteValues;
-import org.apache.lucene.search.cache.CachedArray.DoubleValues;
-import org.apache.lucene.search.cache.CachedArray.FloatValues;
-import org.apache.lucene.search.cache.CachedArray.IntValues;
-import org.apache.lucene.search.cache.CachedArray.LongValues;
-import org.apache.lucene.search.cache.CachedArray.ShortValues;
+import org.apache.lucene.search.cache.*;
+import org.apache.lucene.search.cache.CachedArray.*;
 import org.apache.lucene.util.FieldCacheSanityChecker;
 
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.*;
+
 /**
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
@@ -70,6 +54,7 @@ public class FieldCacheImpl implements F
     caches.put(Double.TYPE, new Cache<DoubleValues>(this));
     caches.put(DocTermsIndex.class, new Cache<DocTermsIndex>(this));
     caches.put(DocTerms.class, new Cache<DocTerms>(this));
+    caches.put(DocTermOrds.class, new Cache<DocTermOrds>(this));
   }
   
   public synchronized void purgeAllCaches() {
@@ -393,6 +378,11 @@ public class FieldCacheImpl implements F
     return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator));
   }
 
+  @SuppressWarnings("unchecked")
+  public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException {
+    return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, new DocTermOrdsCreator(field, 0)));
+  }
+
   private volatile PrintStream infoStream;
 
   public void setInfoStream(PrintStream stream) {

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java?rev=1158393&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java Tue Aug 16 17:44:16 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.search.cache;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocTermOrds;
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+
+/**
+ * Creates {@link DocTermOrds} instances.
+ */
+public class DocTermOrdsCreator extends EntryCreatorWithOptions<DocTermOrds> {
+
+  private final String field;
+
+  public DocTermOrdsCreator(String field, int flag) {
+    super(flag);
+    this.field = field;
+  }
+
+  @Override
+  public DocTermOrds create(IndexReader reader) throws IOException {
+    return new DocTermOrds(reader, field);
+  }
+
+  @Override
+  public DocTermOrds validate(DocTermOrds entry, IndexReader reader) throws IOException {
+    return entry;
+  }
+
+  @Override
+  public EntryKey getCacheKey() {
+    return new SimpleEntryKey(DocTermOrdsCreator.class, field);
+  }
+}

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1158393&r1=1158392&r2=1158393&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Tue Aug 16 17:44:16 2011
@@ -19,28 +19,33 @@ package org.apache.lucene.search;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
+
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
 
 public class TestFieldCache extends LuceneTestCase {
   protected IndexReader reader;
   private int NUM_DOCS;
+  private int NUM_ORDS;
   private String[] unicodeStrings;
+  private BytesRef[][] multiValued;
   private Directory directory;
 
   @Override
   public void setUp() throws Exception {
     super.setUp();
     NUM_DOCS = atLeast(1000);
+    NUM_ORDS = atLeast(2);
     directory = newDirectory();
     RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
     long theLong = Long.MAX_VALUE;
@@ -50,6 +55,7 @@ public class TestFieldCache extends Luce
     int theInt = Integer.MAX_VALUE;
     float theFloat = Float.MAX_VALUE;
     unicodeStrings = new String[NUM_DOCS];
+    multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
     if (VERBOSE) {
       System.out.println("TEST: setUp");
     }
@@ -65,21 +71,19 @@ public class TestFieldCache extends Luce
 
       // sometimes skip the field:
       if (random.nextInt(40) != 17) {
-        String s = null;
-        if (i > 0 && random.nextInt(3) == 1) {
-          // reuse past string -- try to find one that's not null
-          for(int iter=0;iter<10 && s==null;iter++) {
-            s = unicodeStrings[random.nextInt(i)];
-          }
-          if (s == null) {
-            s = _TestUtil.randomUnicodeString(random, 250);
-          }
-        } else {
-          s = _TestUtil.randomUnicodeString(random, 250);
-        }
-        unicodeStrings[i] = s;
+        unicodeStrings[i] = generateString(i);
         doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
       }
+
+      // sometimes skip the field:
+      if (random.nextInt(10) != 8) {
+        for (int j = 0; j < NUM_ORDS; j++) {
+          String newValue = generateString(i);
+          multiValued[i][j] = new BytesRef(newValue);
+          doc.add(newField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+        }
+        Arrays.sort(multiValued[i]);
+      }
       writer.addDocument(doc);
     }
     reader = writer.getReader();
@@ -210,6 +214,47 @@ public class TestFieldCache extends Luce
     // test bad field
     terms = cache.getTerms(reader, "bogusfield");
 
+    // getDocTermOrds
+    DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+    TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
+    assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
+    DocTermOrds.TermOrdsIterator reuse = null;
+    for (int i = 0; i < NUM_DOCS; i++) {
+      reuse = termOrds.lookup(i, reuse);
+      final int[] buffer = new int[5];
+      // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
+      List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
+      for (;;) {
+        int chunk = reuse.read(buffer);
+        if (chunk == 0) {
+          for (int ord = 0; ord < values.size(); ord++) {
+            BytesRef term = values.get(ord);
+            assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
+          }
+          break;
+        }
+
+        for(int idx=0; idx < chunk; idx++) {
+          int key = buffer[idx];
+          termsEnum.seekExact((long) key);
+          String actual = termsEnum.term().utf8ToString();
+          String expected = values.get(idx).utf8ToString();
+          if (!expected.equals(actual)) {
+              reuse = termOrds.lookup(i, reuse);
+              reuse.read(buffer);
+          }
+          assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
+        }
+
+        if (chunk < buffer.length) {
+          break;
+        }
+      }
+    }
+
+    // test bad field
+    termOrds = cache.getDocTermOrds(reader, "bogusfield");
+
     FieldCache.DEFAULT.purge(reader);
   }
 
@@ -223,4 +268,21 @@ public class TestFieldCache extends Luce
     r.close();
     dir.close();
   }
+
+  private String generateString(int i) {
+    String s = null;
+    if (i > 0 && random.nextInt(3) == 1) {
+      // reuse past string -- try to find one that's not null
+      for(int iter = 0; iter < 10 && s == null;iter++) {
+        s = unicodeStrings[random.nextInt(i)];
+      }
+      if (s == null) {
+        s = _TestUtil.randomUnicodeString(random, 250);
+      }
+    } else {
+      s = _TestUtil.randomUnicodeString(random, 250);
+    }
+    return s;
+  }
+
 }