You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/27 19:03:27 UTC

svn commit: r1206789 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/BytesRef.java java/org/apache/lucene/util/UnicodeUtil.java test/org/apache/lucene/util/TestBytesRef.java

Author: rmuir
Date: Sun Nov 27 18:03:22 2011
New Revision: 1206789

URL: http://svn.apache.org/viewvc?rev=1206789&view=rev
Log:
LUCENE-3590: fix copyBytes to respect offset, remove dup'ed compareTo code, add javadocs and TODOs

Added:
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java   (with props)
Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1206789&r1=1206788&r2=1206789&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java Sun Nov 27 18:03:22 2011
@@ -25,6 +25,7 @@ import java.util.Comparator;
  *
  *  @lucene.experimental */
 public final class BytesRef implements Comparable<BytesRef>,Cloneable {
+  /** An empty byte array for convenience */
   public static final byte[] EMPTY_BYTES = new byte[0]; 
 
   /** The contents of the BytesRef. Should never be {@code null}. */
@@ -36,8 +37,9 @@ public final class BytesRef implements C
   /** Length of used bytes. */
   public int length;
 
+  /** Create a BytesRef with {@link #EMPTY_BYTES} */
   public BytesRef() {
-    bytes = EMPTY_BYTES;
+    this(EMPTY_BYTES);
   }
 
   /** This instance will directly reference bytes w/o making a copy.
@@ -53,20 +55,23 @@ public final class BytesRef implements C
   /** This instance will directly reference bytes w/o making a copy.
    * bytes should not be null */
   public BytesRef(byte[] bytes) {
-    assert bytes != null;
-    this.bytes = bytes;
-    this.offset = 0;
-    this.length = bytes.length;
+    this(bytes, 0, bytes.length);
   }
 
+  /** 
+   * Create a BytesRef pointing to a new array of size <code>capacity</code>.
+   * Offset and length will both be zero.
+   */
   public BytesRef(int capacity) {
     this.bytes = new byte[capacity];
   }
 
   /**
-   * @param text Initialize the byte[] from the UTF8 bytes
-   * for the provided String.  This must be well-formed
-   * unicode text, with no unpaired surrogates or U+FFFF.
+   * Initialize the byte[] from the UTF8 bytes
+   * for the provided String.  
+   * 
+   * @param text This must be well-formed
+   * unicode text, with no unpaired surrogates.
    */
   public BytesRef(CharSequence text) {
     this();
@@ -79,11 +84,20 @@ public final class BytesRef implements C
    * @param text Must be well-formed unicode text, with no
    * unpaired surrogates or invalid UTF16 code units.
    */
+  // TODO broken if offset != 0
   public void copyChars(CharSequence text) {
     UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
   }
   
+  /**
+   * Expert: compares the bytes against another BytesRef,
+   * returning true if the bytes are equal.
+   * 
+   * @param other Another BytesRef, should not be null.
+   * @lucene.internal
+   */
   public boolean bytesEquals(BytesRef other) {
+    assert other != null;
     if (length == other.length) {
       int otherUpto = other.offset;
       final byte[] otherBytes = other.bytes;
@@ -186,20 +200,24 @@ public final class BytesRef implements C
   /**
    * Copies the bytes from the given {@link BytesRef}
    * <p>
-   * NOTE: this method resets the offset to 0 and resizes the reference array
-   * if needed.
+   * NOTE: if this would exceed the array size, this method creates a 
+   * new reference array.
    */
   public void copyBytes(BytesRef other) {
     if (bytes.length < other.length) {
       bytes = new byte[other.length];
+      offset = 0;
     }
-    System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
+    System.arraycopy(other.bytes, other.offset, bytes, offset, other.length);
     length = other.length;
-    offset = 0;
   }
 
-
-
+  /**
+   * Appends the bytes from the given {@link BytesRef}
+   * <p>
+   * NOTE: if this would exceed the array size, this method creates a 
+   * new reference array.
+   */
   public void append(BytesRef other) {
     int newLen = length + other.length;
     if (bytes.length < newLen) {
@@ -212,30 +230,15 @@ public final class BytesRef implements C
     length = newLen;
   }
 
+  // TODO: stupid if existing offset is non-zero.
+  /** @lucene.internal */
   public void grow(int newLength) {
     bytes = ArrayUtil.grow(bytes, newLength);
   }
 
   /** Unsigned byte order comparison */
   public int compareTo(BytesRef other) {
-    if (this == other) return 0;
-
-    final byte[] aBytes = this.bytes;
-    int aUpto = this.offset;
-    final byte[] bBytes = other.bytes;
-    int bUpto = other.offset;
-
-    final int aStop = aUpto + Math.min(this.length, other.length);
-
-    while(aUpto < aStop) {
-      int aByte = aBytes[aUpto++] & 0xff;
-      int bByte = bBytes[bUpto++] & 0xff;
-      int diff = aByte - bByte;
-      if (diff != 0) return diff;
-    }
-
-    // One is a prefix of the other, or, they are equal:
-    return this.length - other.length;
+    return utf8SortedAsUnicodeSortOrder.compare(this, other);
   }
   
   private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=1206789&r1=1206788&r2=1206789&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java Sun Nov 27 18:03:22 2011
@@ -123,6 +123,7 @@ public final class UnicodeUtil {
 
   /** Encode characters from a char[] source, starting at
    *  offset for length chars.  Returns a hash of the resulting bytes.  After encoding, result.offset will always be 0. */
+  // TODO: broken if incoming result.offset != 0
   public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
     int hash = 0;
     int upto = 0;
@@ -179,6 +180,7 @@ public final class UnicodeUtil {
   /** Encode characters from a char[] source, starting at
    *  offset for length chars. After encoding, result.offset will always be 0.
    */
+  // TODO: broken if incoming result.offset != 0
   public static void UTF16toUTF8(final char[] source, final int offset, final int length, BytesRef result) {
 
     int upto = 0;
@@ -234,6 +236,7 @@ public final class UnicodeUtil {
   /** Encode characters from this String, starting at offset
    *  for length characters. After encoding, result.offset will always be 0.
    */
+  // TODO: broken if incoming result.offset != 0
   public static void UTF16toUTF8(final CharSequence s, final int offset, final int length, BytesRef result) {
     final int end = offset + length;
 
@@ -427,8 +430,10 @@ public final class UnicodeUtil {
     return codePointCount;
   }
 
+  // TODO: broken if incoming result.offset != 0
   public static void UTF8toUTF32(final BytesRef utf8, final IntsRef utf32) {
     // pre-alloc for worst case
+    // TODO: ints cannot be null, should be an assert
     if (utf32.ints == null || utf32.ints.length < utf8.length) {
       utf32.ints = new int[utf8.length];
     }
@@ -567,6 +572,7 @@ public final class UnicodeUtil {
    * can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
    * Explicit checks for valid UTF-8 are not performed. 
    */
+  // TODO: broken if chars.offset != 0
   public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
     int out_offset = chars.offset = 0;
     final char[] out = chars.chars =  ArrayUtil.grow(chars.chars, length);

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java?rev=1206789&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestBytesRef.java Sun Nov 27 18:03:22 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestBytesRef extends LuceneTestCase {
+  public void testEmpty() {
+    BytesRef b = new BytesRef();
+    assertEquals(BytesRef.EMPTY_BYTES, b.bytes);
+    assertEquals(0, b.offset);
+    assertEquals(0, b.length);
+  }
+  
+  public void testFromBytes() {
+    byte bytes[] = new byte[] { (byte)'a', (byte)'b', (byte)'c', (byte)'d' };
+    BytesRef b = new BytesRef(bytes);
+    assertEquals(bytes, b.bytes);
+    assertEquals(0, b.offset);
+    assertEquals(4, b.length);
+    
+    BytesRef b2 = new BytesRef(bytes, 1, 3);
+    assertEquals("bcd", b2.utf8ToString());
+    
+    assertFalse(b.equals(b2));
+  }
+  
+  public void testFromChars() {
+    for (int i = 0; i < 100; i++) {
+      String s = _TestUtil.randomUnicodeString(random);
+      String s2 = new BytesRef(s).utf8ToString();
+      assertEquals(s, s2);
+    }
+    
+    // only for 4.x
+    assertEquals("\uFFFF", new BytesRef("\uFFFF").utf8ToString());
+  }
+}