You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/01/28 12:40:17 UTC

svn commit: r904044 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/ java/org/apache/lucene/index/ java/org/apache/lucene/store/ java/org/apache/lucene/util/ test/org/apache/lucene/util/

Author: mikemccand
Date: Thu Jan 28 11:40:16 2010
New Revision: 904044

URL: http://svn.apache.org/viewvc?rev=904044&view=rev
Log:
LUCENE-2217: cutover more places where arrays grow to ArrayUtil.oversize/grow

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java
    lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java
    lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java
    lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
    lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java
    lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java
    lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java Thu Jan 28 11:40:16 2010
@@ -44,7 +44,12 @@
 */
 
 
-import java.io.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.FileInputStream;
+
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
+import org.apache.lucene.util.ArrayUtil;
 
 /**
  *
@@ -61,11 +66,10 @@
   private int i,    /* offset into b */
     j, k, k0;
   private boolean dirty = false;
-  private static final int INC = 50; /* unit of size whereby b is increased */
-  private static final int EXTRA = 1;
+  private static final int INITIAL_SIZE = 50;
 
   public PorterStemmer() {
-    b = new char[INC];
+    b = new char[INITIAL_SIZE];
     i = 0;
   }
 
@@ -81,10 +85,8 @@
    * adding characters, you can call stem(void) to process the word.
    */
   public void add(char ch) {
-    if (b.length <= i + EXTRA) {
-      char[] new_b = new char[b.length+INC];
-      System.arraycopy(b, 0, new_b, 0, b.length);
-      b = new_b;
+    if (b.length <= i) {
+      b = ArrayUtil.grow(b, i+1);
     }
     b[i++] = ch;
   }
@@ -451,8 +453,7 @@
   public boolean stem(char[] wordBuffer, int offset, int wordLen) {
     reset();
     if (b.length < wordLen) {
-      char[] new_b = new char[wordLen + EXTRA];
-      b = new_b;
+      b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
     }
     System.arraycopy(wordBuffer, offset, b, 0, wordLen);
     i = wordLen;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java Thu Jan 28 11:40:16 2010
@@ -34,6 +34,9 @@
  * hit a non-zero byte. */
 
 import java.util.Arrays;
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+
 
 final class ByteBlockPool {
 
@@ -83,7 +86,8 @@
 
   public void nextBuffer() {
     if (1+bufferUpto == buffers.length) {
-      byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
+      byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
+                                                        NUM_BYTES_OBJECT_REF)][];
       System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
       buffers = newBuffers;
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java Thu Jan 28 11:40:16 2010
@@ -17,6 +17,9 @@
  * limitations under the License.
  */
 
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+
 final class CharBlockPool {
 
   public char[][] buffers = new char[10][];
@@ -42,7 +45,8 @@
 
   public void nextBuffer() {
     if (1+bufferUpto == buffers.length) {
-      char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
+      char[][] newBuffers = new char[ArrayUtil.oversize(buffers.length+1,
+                                                        NUM_BYTES_OBJECT_REF)][];
       System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
       buffers = newBuffers;
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java Thu Jan 28 11:40:16 2010
@@ -17,14 +17,14 @@
  * limitations under the License.
  */
 
-import org.apache.lucene.util.PriorityQueue;
-
 import java.io.IOException;
 import java.util.Arrays;
-
 import java.util.LinkedList;
 import java.util.List;
 
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.ArrayUtil;
+
 /**
  * Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as
  * a single {@link TermPositions}.
@@ -83,10 +83,8 @@
     }
 
     private void growArray() {
-      int[] newArray = new int[_arraySize * 2];
-      System.arraycopy(_array, 0, newArray, 0, _arraySize);
-      _array = newArray;
-      _arraySize *= 2;
+      _array = ArrayUtil.grow(_array, _arraySize+1);
+      _arraySize = _array.length;
     }
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java Thu Jan 28 11:40:16 2010
@@ -22,6 +22,8 @@
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.ArrayUtil;
+
 
 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
   Directory.  A TermInfos can be written once, in order.  */
@@ -207,9 +209,7 @@
     output.writeBytes(termBytes, start, length);  // write delta bytes
     output.writeVInt(fieldNumber); // write field num
     if (lastTermBytes.length < termBytesLength) {
-      byte[] newArray = new byte[(int) (termBytesLength*1.5)];
-      System.arraycopy(lastTermBytes, 0, newArray, 0, start);
-      lastTermBytes = newArray;
+      lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
     }
     System.arraycopy(termBytes, start, lastTermBytes, start, length);
     lastTermBytesLength = termBytesLength;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Thu Jan 28 11:40:16 2010
@@ -20,6 +20,7 @@
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -460,18 +461,14 @@
       if (preUTF8) {
         // Term stored as java chars
         if (charBuffer.length < totalLength) {
-          char[] newCharBuffer = new char[(int) (1.5*totalLength)];
-          System.arraycopy(charBuffer, 0, newCharBuffer, 0, start);
-          charBuffer = newCharBuffer;
+          charBuffer = ArrayUtil.grow(charBuffer, totalLength);
         }
         tvf.readChars(charBuffer, start, deltaLength);
         term = new String(charBuffer, 0, totalLength);
       } else {
         // Term stored as utf8 bytes
         if (byteBuffer.length < totalLength) {
-          byte[] newByteBuffer = new byte[(int) (1.5*totalLength)];
-          System.arraycopy(byteBuffer, 0, newByteBuffer, 0, start);
-          byteBuffer = newByteBuffer;
+          byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
         }
         tvf.readBytes(byteBuffer, start, deltaLength);
         term = new String(byteBuffer, 0, totalLength, "UTF-8");

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java Thu Jan 28 11:40:16 2010
@@ -22,6 +22,9 @@
 import java.util.Map;
 import java.util.HashMap;
 
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
 /** Abstract base class for input from a file in a {@link Directory}.  A
  * random-access input stream.  Used for all Lucene index input operations.
  * @see Directory
@@ -122,16 +125,18 @@
     if (preUTF8Strings)
       return readModifiedUTF8String();
     int length = readVInt();
-    if (bytes == null || length > bytes.length)
-      bytes = new byte[(int) (length*1.25)];
+    if (bytes == null || length > bytes.length) {
+      bytes = new byte[ArrayUtil.oversize(length, 1)];
+    }
     readBytes(bytes, 0, length);
     return new String(bytes, 0, length, "UTF-8");
   }
 
   private String readModifiedUTF8String() throws IOException {
     int length = readVInt();
-    if (chars == null || length > chars.length)
-      chars = new char[length];
+    if (chars == null || length > chars.length) {
+      chars = new char[ArrayUtil.oversize(length, RamUsageEstimator.NUM_BYTES_CHAR)];
+    }
     readChars(chars, 0, length);
     return new String(chars, 0, length);
   }
@@ -157,10 +162,11 @@
       else if ((b & 0xE0) != 0xE0) {
 	buffer[i] = (char)(((b & 0x1F) << 6)
 		 | (readByte() & 0x3F));
-      } else
+      } else {
 	buffer[i] = (char)(((b & 0x0F) << 12)
 		| ((readByte() & 0x3F) << 6)
 	        |  (readByte() & 0x3F));
+      }
     }
   }
 
@@ -181,10 +187,9 @@
       byte b = readByte();
       if ((b & 0x80) == 0){
         //do nothing, we only need one byte
-      }
-      else if ((b & 0xE0) != 0xE0) {
+      } else if ((b & 0xE0) != 0xE0) {
         readByte();//read an additional byte
-      } else{      
+      } else {      
         //read two additional bytes.
         readByte();
         readByte();

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java Thu Jan 28 11:40:16 2010
@@ -289,6 +289,29 @@
       return array;
   }
 
+  public static char[] grow(char[] array, int minSize) {
+    if (array.length < minSize) {
+      char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)];
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static char[] grow(char[] array) {
+    return grow(array, 1 + array.length);
+  }
+
+  public static char[] shrink(char[] array, int targetSize) {
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR);
+    if (newSize != array.length) {
+      char[] newArray = new char[newSize];
+      System.arraycopy(array, 0, newArray, 0, newSize);
+      return newArray;
+    } else
+      return array;
+  }
+
   /**
    * Returns hash of chars in range start (inclusive) to
    * end (inclusive)

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java Thu Jan 28 11:40:16 2010
@@ -128,8 +128,8 @@
       }
   
       if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
-        // biggest possible int does not fit
-        resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
+        // Biggest possible int does not fit.
+        resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
       }
   
       // See org.apache.lucene.store.IndexOutput.writeVInt()

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java Thu Jan 28 11:40:16 2010
@@ -78,9 +78,7 @@
 
     public void setLength(int newLength) {
       if (result.length < newLength) {
-        byte[] newArray = new byte[(int) (1.5*newLength)];
-        System.arraycopy(result, 0, newArray, 0, length);
-        result = newArray;
+        result = ArrayUtil.grow(result, newLength);
       }
       length = newLength;
     }
@@ -93,9 +91,7 @@
 
     public void setLength(int newLength) {
       if (result.length < newLength) {
-        char[] newArray = new char[(int) (1.5*newLength)];
-        System.arraycopy(result, 0, newArray, 0, length);
-        result = newArray;
+        result = ArrayUtil.grow(result, newLength);
       }
       length = newLength;
     }
@@ -120,10 +116,7 @@
       final int code = (int) source[i++];
 
       if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
       }
       if (code < 0x80)
         out[upto++] = (byte) code;
@@ -179,10 +172,7 @@
       final int code = (int) source[i++];
 
       if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
       }
       if (code < 0x80)
         out[upto++] = (byte) code;
@@ -233,10 +223,7 @@
       final int code = (int) s.charAt(i);
 
       if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
       }
       if (code < 0x80)
         out[upto++] = (byte) code;
@@ -283,9 +270,7 @@
     final int end = offset + length;
     char[] out = result.result;
     if (result.offsets.length <= end) {
-      int[] newOffsets = new int[2*end];
-      System.arraycopy(result.offsets, 0, newOffsets, 0, result.offsets.length);
-      result.offsets  = newOffsets;
+      result.offsets = ArrayUtil.grow(result.offsets, end+1);
     }
     final int[] offsets = result.offsets;
 
@@ -299,9 +284,7 @@
 
     // Pre-allocate for worst case 1-for-1
     if (outUpto+length >= out.length) {
-      char[] newOut = new char[2*(outUpto+length)];
-      System.arraycopy(out, 0, newOut, 0, outUpto);
-      result.result = out = newOut;
+      out = result.result = ArrayUtil.grow(out, outUpto+length+1);
     }
 
     while (upto < end) {

Modified: lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java Thu Jan 28 11:40:16 2010
@@ -20,8 +20,6 @@
 import java.io.IOException;
 import java.util.BitSet;
 
-import org.apache.lucene.util.LuceneTestCase;
-
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 
@@ -193,4 +191,11 @@
   public void test12() {
    tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
   }
+  public void test13Allocation() throws Exception {
+    int [] a = new int[2000]; // SortedVIntList initial byte size is 128
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (107 + i) * i;
+    }
+    tstIterator(new SortedVIntList(a), a);
+  }
 }