You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/27 19:03:27 UTC
svn commit: r1206789 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/util/BytesRef.java
java/org/apache/lucene/util/UnicodeUtil.java
test/org/apache/lucene/util/TestBytesRef.java
Author: rmuir
Date: Sun Nov 27 18:03:22 2011
New Revision: 1206789
URL: http://svn.apache.org/viewvc?rev=1206789&view=rev
Log:
LUCENE-3590: fix copyBytes to respect offset, remove dup'ed compareTo code, add javadocs and TODOs
Added:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java (with props)
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1206789&r1=1206788&r2=1206789&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java Sun Nov 27 18:03:22 2011
@@ -25,6 +25,7 @@ import java.util.Comparator;
*
* @lucene.experimental */
public final class BytesRef implements Comparable<BytesRef>,Cloneable {
+ /** An empty byte array for convenience */
public static final byte[] EMPTY_BYTES = new byte[0];
/** The contents of the BytesRef. Should never be {@code null}. */
@@ -36,8 +37,9 @@ public final class BytesRef implements C
/** Length of used bytes. */
public int length;
+ /** Create a BytesRef with {@link #EMPTY_BYTES} */
public BytesRef() {
- bytes = EMPTY_BYTES;
+ this(EMPTY_BYTES);
}
/** This instance will directly reference bytes w/o making a copy.
@@ -53,20 +55,23 @@ public final class BytesRef implements C
/** This instance will directly reference bytes w/o making a copy.
* bytes should not be null */
public BytesRef(byte[] bytes) {
- assert bytes != null;
- this.bytes = bytes;
- this.offset = 0;
- this.length = bytes.length;
+ this(bytes, 0, bytes.length);
}
+ /**
+ * Create a BytesRef pointing to a new array of size <code>capacity</code>.
+ * Offset and length will both be zero.
+ */
public BytesRef(int capacity) {
this.bytes = new byte[capacity];
}
/**
- * @param text Initialize the byte[] from the UTF8 bytes
- * for the provided String. This must be well-formed
- * unicode text, with no unpaired surrogates or U+FFFF.
+ * Initialize the byte[] from the UTF8 bytes
+ * for the provided String.
+ *
+ * @param text This must be well-formed
+ * unicode text, with no unpaired surrogates.
*/
public BytesRef(CharSequence text) {
this();
@@ -79,11 +84,20 @@ public final class BytesRef implements C
* @param text Must be well-formed unicode text, with no
* unpaired surrogates or invalid UTF16 code units.
*/
+ // TODO broken if offset != 0
public void copyChars(CharSequence text) {
UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
}
+ /**
+ * Expert: compares the bytes against another BytesRef,
+ * returning true if the bytes are equal.
+ *
+ * @param other Another BytesRef, should not be null.
+ * @lucene.internal
+ */
public boolean bytesEquals(BytesRef other) {
+ assert other != null;
if (length == other.length) {
int otherUpto = other.offset;
final byte[] otherBytes = other.bytes;
@@ -186,20 +200,24 @@ public final class BytesRef implements C
/**
* Copies the bytes from the given {@link BytesRef}
* <p>
- * NOTE: this method resets the offset to 0 and resizes the reference array
- * if needed.
+ * NOTE: if this would exceed the array size, this method creates a
+ * new reference array.
*/
public void copyBytes(BytesRef other) {
if (bytes.length < other.length) {
bytes = new byte[other.length];
+ offset = 0;
}
- System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
+ System.arraycopy(other.bytes, other.offset, bytes, offset, other.length);
length = other.length;
- offset = 0;
}
-
-
+ /**
+ * Appends the bytes from the given {@link BytesRef}
+ * <p>
+ * NOTE: if this would exceed the array size, this method creates a
+ * new reference array.
+ */
public void append(BytesRef other) {
int newLen = length + other.length;
if (bytes.length < newLen) {
@@ -212,30 +230,15 @@ public final class BytesRef implements C
length = newLen;
}
+ // TODO: stupid if existing offset is non-zero.
+ /** @lucene.internal */
public void grow(int newLength) {
bytes = ArrayUtil.grow(bytes, newLength);
}
/** Unsigned byte order comparison */
public int compareTo(BytesRef other) {
- if (this == other) return 0;
-
- final byte[] aBytes = this.bytes;
- int aUpto = this.offset;
- final byte[] bBytes = other.bytes;
- int bUpto = other.offset;
-
- final int aStop = aUpto + Math.min(this.length, other.length);
-
- while(aUpto < aStop) {
- int aByte = aBytes[aUpto++] & 0xff;
- int bByte = bBytes[bUpto++] & 0xff;
- int diff = aByte - bByte;
- if (diff != 0) return diff;
- }
-
- // One is a prefix of the other, or, they are equal:
- return this.length - other.length;
+ return utf8SortedAsUnicodeSortOrder.compare(this, other);
}
private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=1206789&r1=1206788&r2=1206789&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java Sun Nov 27 18:03:22 2011
@@ -123,6 +123,7 @@ public final class UnicodeUtil {
/** Encode characters from a char[] source, starting at
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
+ // TODO: broken if incoming result.offset != 0
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
int hash = 0;
int upto = 0;
@@ -179,6 +180,7 @@ public final class UnicodeUtil {
/** Encode characters from a char[] source, starting at
* offset for length chars. After encoding, result.offset will always be 0.
*/
+ // TODO: broken if incoming result.offset != 0
public static void UTF16toUTF8(final char[] source, final int offset, final int length, BytesRef result) {
int upto = 0;
@@ -234,6 +236,7 @@ public final class UnicodeUtil {
/** Encode characters from this String, starting at offset
* for length characters. After encoding, result.offset will always be 0.
*/
+ // TODO: broken if incoming result.offset != 0
public static void UTF16toUTF8(final CharSequence s, final int offset, final int length, BytesRef result) {
final int end = offset + length;
@@ -427,8 +430,10 @@ public final class UnicodeUtil {
return codePointCount;
}
+ // TODO: broken if incoming result.offset != 0
public static void UTF8toUTF32(final BytesRef utf8, final IntsRef utf32) {
// pre-alloc for worst case
+ // TODO: ints cannot be null, should be an assert
if (utf32.ints == null || utf32.ints.length < utf8.length) {
utf32.ints = new int[utf8.length];
}
@@ -567,6 +572,7 @@ public final class UnicodeUtil {
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
* Explicit checks for valid UTF-8 are not performed.
*/
+ // TODO: broken if chars.offset != 0
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
int out_offset = chars.offset = 0;
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java?rev=1206789&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java Sun Nov 27 18:03:22 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestBytesRef extends LuceneTestCase {
+ public void testEmpty() {
+ BytesRef b = new BytesRef();
+ assertEquals(BytesRef.EMPTY_BYTES, b.bytes);
+ assertEquals(0, b.offset);
+ assertEquals(0, b.length);
+ }
+
+ public void testFromBytes() {
+ byte bytes[] = new byte[] { (byte)'a', (byte)'b', (byte)'c', (byte)'d' };
+ BytesRef b = new BytesRef(bytes);
+ assertEquals(bytes, b.bytes);
+ assertEquals(0, b.offset);
+ assertEquals(4, b.length);
+
+ BytesRef b2 = new BytesRef(bytes, 1, 3);
+ assertEquals("bcd", b2.utf8ToString());
+
+ assertFalse(b.equals(b2));
+ }
+
+ public void testFromChars() {
+ for (int i = 0; i < 100; i++) {
+ String s = _TestUtil.randomUnicodeString(random);
+ String s2 = new BytesRef(s).utf8ToString();
+ assertEquals(s, s2);
+ }
+
+ // only for 4.x
+ assertEquals("\uFFFF", new BytesRef("\uFFFF").utf8ToString());
+ }
+}