You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/04/05 16:08:31 UTC

svn commit: r1309866 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/codecs/lucene3x/ core/src/java/org/apache/lucene/util/

Author: mikemccand
Date: Thu Apr  5 14:08:30 2012
New Revision: 1309866

URL: http://svn.apache.org/viewvc?rev=1309866&view=rev
Log:
LUCENE-3932: speed up Lucene3X's loading of the terms index by pre-sizing the in-memory PackedInts based on size of the .tii file

Added:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/MathUtil.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1309866&r1=1309865&r2=1309866&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Apr  5 14:08:30 2012
@@ -690,6 +690,9 @@ Optimizations
 * LUCENE-3795: Replace contrib/spatial with modules/spatial.  This includes 
   a basic spatial strategy interface.  (David Smiley, Chris Male, ryan)
     
+* LUCENE-3932: Lucene3x codec loads terms index faster, by
+  pre-allocating the packed ints array based on the .tii file size
+  (Sean Bridges via Mike McCandless)
   
 Bug fixes
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java?rev=1309866&r1=1309865&r2=1309866&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java Thu Apr  5 14:08:30 2012
@@ -22,6 +22,7 @@ import java.util.Arrays;
 
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.MathUtil;
 
 /**
  * This abstract class reads skip lists with multiple levels.
@@ -184,21 +185,9 @@ public abstract class MultiLevelSkipList
     }
   }
   
-  /** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */
-  static int log(int x, int base) {
-    assert base >= 2;
-    int ret = 0;
-    long n = base; // needs to be a long to avoid overflow
-    while (x >= n) {
-      n *= base;
-      ret++;
-    }
-    return ret;
-  }
-  
   /** Loads the skip levels  */
   private void loadSkipLevels() throws IOException {
-    numberOfSkipLevels = log(docCount, skipInterval[0]);
+    numberOfSkipLevels = MathUtil.log(docCount, skipInterval[0]);
     if (numberOfSkipLevels > maxNumberOfSkipLevels) {
       numberOfSkipLevels = maxNumberOfSkipLevels;
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java?rev=1309866&r1=1309865&r2=1309866&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java Thu Apr  5 14:08:30 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.MathUtil;
 
 /**
  * This abstract class writes skip lists with multiple levels.
@@ -61,7 +62,7 @@ public abstract class MultiLevelSkipList
     this.skipInterval = skipInterval;
     
     // calculate the maximum number of skip levels for this document frequency
-    numberOfSkipLevels = MultiLevelSkipListReader.log(df, skipInterval);
+    numberOfSkipLevels = MathUtil.log(df, skipInterval);
     
     // make sure it does not exceed maxSkipLevels
     if (numberOfSkipLevels > maxSkipLevels) {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java?rev=1309866&r1=1309865&r2=1309866&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java Thu Apr  5 14:08:30 2012
@@ -25,6 +25,7 @@ import java.util.List;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
 import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
 import org.apache.lucene.util.PagedBytes;
@@ -72,7 +73,9 @@ class TermInfosReaderIndex {
     PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
     PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
 
-    GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
+    final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
+    GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false);
+
     String currentField = null;
     List<String> fieldStrs = new ArrayList<String>();
     int fieldCounter = -1;

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/MathUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/MathUtil.java?rev=1309866&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/MathUtil.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/MathUtil.java Thu Apr  5 14:08:30 2012
@@ -0,0 +1,36 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class MathUtil {
+
+  // No instance:
+  private MathUtil() {
+  }
+
+  /** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */
+  public static int log(long x, int base) {
+    assert base > 1;
+    int ret = 0;
+    while (x >= base) {
+      x /= base;
+      ret++;
+    }
+    return ret;
+  }
+}