You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/01/28 12:40:17 UTC
svn commit: r904044 - in /lucene/java/trunk/src:
java/org/apache/lucene/analysis/ java/org/apache/lucene/index/
java/org/apache/lucene/store/ java/org/apache/lucene/util/
test/org/apache/lucene/util/
Author: mikemccand
Date: Thu Jan 28 11:40:16 2010
New Revision: 904044
URL: http://svn.apache.org/viewvc?rev=904044&view=rev
Log:
LUCENE-2217: cutover more places where arrays grow to ArrayUtil.oversize/grow
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java
lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java
lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java
lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java
lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java
lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java
lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/PorterStemmer.java Thu Jan 28 11:40:16 2010
@@ -44,7 +44,12 @@
*/
-import java.io.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.FileInputStream;
+
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
+import org.apache.lucene.util.ArrayUtil;
/**
*
@@ -61,11 +66,10 @@
private int i, /* offset into b */
j, k, k0;
private boolean dirty = false;
- private static final int INC = 50; /* unit of size whereby b is increased */
- private static final int EXTRA = 1;
+ private static final int INITIAL_SIZE = 50;
public PorterStemmer() {
- b = new char[INC];
+ b = new char[INITIAL_SIZE];
i = 0;
}
@@ -81,10 +85,8 @@
* adding characters, you can call stem(void) to process the word.
*/
public void add(char ch) {
- if (b.length <= i + EXTRA) {
- char[] new_b = new char[b.length+INC];
- System.arraycopy(b, 0, new_b, 0, b.length);
- b = new_b;
+ if (b.length <= i) {
+ b = ArrayUtil.grow(b, i+1);
}
b[i++] = ch;
}
@@ -451,8 +453,7 @@
public boolean stem(char[] wordBuffer, int offset, int wordLen) {
reset();
if (b.length < wordLen) {
- char[] new_b = new char[wordLen + EXTRA];
- b = new_b;
+ b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
}
System.arraycopy(wordBuffer, offset, b, 0, wordLen);
i = wordLen;
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/ByteBlockPool.java Thu Jan 28 11:40:16 2010
@@ -34,6 +34,9 @@
* hit a non-zero byte. */
import java.util.Arrays;
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+
final class ByteBlockPool {
@@ -83,7 +86,8 @@
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
- byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
+ byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
+ NUM_BYTES_OBJECT_REF)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CharBlockPool.java Thu Jan 28 11:40:16 2010
@@ -17,6 +17,9 @@
* limitations under the License.
*/
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+
final class CharBlockPool {
public char[][] buffers = new char[10][];
@@ -42,7 +45,8 @@
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
- char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
+ char[][] newBuffers = new char[ArrayUtil.oversize(buffers.length+1,
+ NUM_BYTES_OBJECT_REF)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultipleTermPositions.java Thu Jan 28 11:40:16 2010
@@ -17,14 +17,14 @@
* limitations under the License.
*/
-import org.apache.lucene.util.PriorityQueue;
-
import java.io.IOException;
import java.util.Arrays;
-
import java.util.LinkedList;
import java.util.List;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.ArrayUtil;
+
/**
* Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as
* a single {@link TermPositions}.
@@ -83,10 +83,8 @@
}
private void growArray() {
- int[] newArray = new int[_arraySize * 2];
- System.arraycopy(_array, 0, newArray, 0, _arraySize);
- _array = newArray;
- _arraySize *= 2;
+ _array = ArrayUtil.grow(_array, _arraySize+1);
+ _arraySize = _array.length;
}
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java Thu Jan 28 11:40:16 2010
@@ -22,6 +22,8 @@
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.ArrayUtil;
+
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
Directory. A TermInfos can be written once, in order. */
@@ -207,9 +209,7 @@
output.writeBytes(termBytes, start, length); // write delta bytes
output.writeVInt(fieldNumber); // write field num
if (lastTermBytes.length < termBytesLength) {
- byte[] newArray = new byte[(int) (termBytesLength*1.5)];
- System.arraycopy(lastTermBytes, 0, newArray, 0, start);
- lastTermBytes = newArray;
+ lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
}
System.arraycopy(termBytes, start, lastTermBytes, start, length);
lastTermBytesLength = termBytesLength;
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Thu Jan 28 11:40:16 2010
@@ -20,6 +20,7 @@
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import java.io.IOException;
import java.util.Arrays;
@@ -460,18 +461,14 @@
if (preUTF8) {
// Term stored as java chars
if (charBuffer.length < totalLength) {
- char[] newCharBuffer = new char[(int) (1.5*totalLength)];
- System.arraycopy(charBuffer, 0, newCharBuffer, 0, start);
- charBuffer = newCharBuffer;
+ charBuffer = ArrayUtil.grow(charBuffer, totalLength);
}
tvf.readChars(charBuffer, start, deltaLength);
term = new String(charBuffer, 0, totalLength);
} else {
// Term stored as utf8 bytes
if (byteBuffer.length < totalLength) {
- byte[] newByteBuffer = new byte[(int) (1.5*totalLength)];
- System.arraycopy(byteBuffer, 0, newByteBuffer, 0, start);
- byteBuffer = newByteBuffer;
+ byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
}
tvf.readBytes(byteBuffer, start, deltaLength);
term = new String(byteBuffer, 0, totalLength, "UTF-8");
Modified: lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java Thu Jan 28 11:40:16 2010
@@ -22,6 +22,9 @@
import java.util.Map;
import java.util.HashMap;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
/** Abstract base class for input from a file in a {@link Directory}. A
* random-access input stream. Used for all Lucene index input operations.
* @see Directory
@@ -122,16 +125,18 @@
if (preUTF8Strings)
return readModifiedUTF8String();
int length = readVInt();
- if (bytes == null || length > bytes.length)
- bytes = new byte[(int) (length*1.25)];
+ if (bytes == null || length > bytes.length) {
+ bytes = new byte[ArrayUtil.oversize(length, 1)];
+ }
readBytes(bytes, 0, length);
return new String(bytes, 0, length, "UTF-8");
}
private String readModifiedUTF8String() throws IOException {
int length = readVInt();
- if (chars == null || length > chars.length)
- chars = new char[length];
+ if (chars == null || length > chars.length) {
+ chars = new char[ArrayUtil.oversize(length, RamUsageEstimator.NUM_BYTES_CHAR)];
+ }
readChars(chars, 0, length);
return new String(chars, 0, length);
}
@@ -157,10 +162,11 @@
else if ((b & 0xE0) != 0xE0) {
buffer[i] = (char)(((b & 0x1F) << 6)
| (readByte() & 0x3F));
- } else
+ } else {
buffer[i] = (char)(((b & 0x0F) << 12)
| ((readByte() & 0x3F) << 6)
| (readByte() & 0x3F));
+ }
}
}
@@ -181,10 +187,9 @@
byte b = readByte();
if ((b & 0x80) == 0){
//do nothing, we only need one byte
- }
- else if ((b & 0xE0) != 0xE0) {
+ } else if ((b & 0xE0) != 0xE0) {
readByte();//read an additional byte
- } else{
+ } else {
//read two additional bytes.
readByte();
readByte();
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java Thu Jan 28 11:40:16 2010
@@ -289,6 +289,29 @@
return array;
}
+ public static char[] grow(char[] array, int minSize) {
+ if (array.length < minSize) {
+ char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)];
+ System.arraycopy(array, 0, newArray, 0, array.length);
+ return newArray;
+ } else
+ return array;
+ }
+
+ public static char[] grow(char[] array) {
+ return grow(array, 1 + array.length);
+ }
+
+ public static char[] shrink(char[] array, int targetSize) {
+ final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR);
+ if (newSize != array.length) {
+ char[] newArray = new char[newSize];
+ System.arraycopy(array, 0, newArray, 0, newSize);
+ return newArray;
+ } else
+ return array;
+ }
+
/**
* Returns hash of chars in range start (inclusive) to
* end (inclusive)
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/SortedVIntList.java Thu Jan 28 11:40:16 2010
@@ -128,8 +128,8 @@
}
if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
- // biggest possible int does not fit
- resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
+ // Biggest possible int does not fit.
+ resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
}
// See org.apache.lucene.store.IndexOutput.writeVInt()
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/UnicodeUtil.java Thu Jan 28 11:40:16 2010
@@ -78,9 +78,7 @@
public void setLength(int newLength) {
if (result.length < newLength) {
- byte[] newArray = new byte[(int) (1.5*newLength)];
- System.arraycopy(result, 0, newArray, 0, length);
- result = newArray;
+ result = ArrayUtil.grow(result, newLength);
}
length = newLength;
}
@@ -93,9 +91,7 @@
public void setLength(int newLength) {
if (result.length < newLength) {
- char[] newArray = new char[(int) (1.5*newLength)];
- System.arraycopy(result, 0, newArray, 0, length);
- result = newArray;
+ result = ArrayUtil.grow(result, newLength);
}
length = newLength;
}
@@ -120,10 +116,7 @@
final int code = (int) source[i++];
if (upto+4 > out.length) {
- byte[] newOut = new byte[2*out.length];
- assert newOut.length >= upto+4;
- System.arraycopy(out, 0, newOut, 0, upto);
- result.result = out = newOut;
+ out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@@ -179,10 +172,7 @@
final int code = (int) source[i++];
if (upto+4 > out.length) {
- byte[] newOut = new byte[2*out.length];
- assert newOut.length >= upto+4;
- System.arraycopy(out, 0, newOut, 0, upto);
- result.result = out = newOut;
+ out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@@ -233,10 +223,7 @@
final int code = (int) s.charAt(i);
if (upto+4 > out.length) {
- byte[] newOut = new byte[2*out.length];
- assert newOut.length >= upto+4;
- System.arraycopy(out, 0, newOut, 0, upto);
- result.result = out = newOut;
+ out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@@ -283,9 +270,7 @@
final int end = offset + length;
char[] out = result.result;
if (result.offsets.length <= end) {
- int[] newOffsets = new int[2*end];
- System.arraycopy(result.offsets, 0, newOffsets, 0, result.offsets.length);
- result.offsets = newOffsets;
+ result.offsets = ArrayUtil.grow(result.offsets, end+1);
}
final int[] offsets = result.offsets;
@@ -299,9 +284,7 @@
// Pre-allocate for worst case 1-for-1
if (outUpto+length >= out.length) {
- char[] newOut = new char[2*(outUpto+length)];
- System.arraycopy(out, 0, newOut, 0, outUpto);
- result.result = out = newOut;
+ out = result.result = ArrayUtil.grow(out, outUpto+length+1);
}
while (upto < end) {
Modified: lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java?rev=904044&r1=904043&r2=904044&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestSortedVIntList.java Thu Jan 28 11:40:16 2010
@@ -20,8 +20,6 @@
import java.io.IOException;
import java.util.BitSet;
-import org.apache.lucene.util.LuceneTestCase;
-
import junit.framework.TestSuite;
import junit.textui.TestRunner;
@@ -193,4 +191,11 @@
public void test12() {
tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
}
+ public void test13Allocation() throws Exception {
+ int [] a = new int[2000]; // SortedVIntList initial byte size is 128
+ for (int i = 0; i < a.length; i++) {
+ a[i] = (107 + i) * i;
+ }
+ tstIterator(new SortedVIntList(a), a);
+ }
}