You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sm...@apache.org on 2017/07/29 00:09:58 UTC

arrow git commit: ARROW-1267: [Java] Handle zero length case in BitVector.splitAndTransfer

Repository: arrow
Updated Branches:
  refs/heads/master 3b14765e8 -> 1dd0f5f58


ARROW-1267: [Java] Handle zero length case in BitVector.splitAndTransfer

@jacques-n
@StevenMPhillips

PR for the change: https://github.com/dremio/arrow/commit/b794dfa5fe209cf8e3c17cb828964a0a0863c1f8

A new unit test has been added on top of original change.

Author: Steven Phillips <st...@dremio.com>
Author: siddharth <si...@dremio.com>

Closes #890 from siddharthteotia/ARROW-1267-PR and squashes the following commits:

89a08d9 [siddharth] Handle zero length in BitVector.splitAndTransfer
84946b7 [Steven Phillips] Handle zero length case in BitVector.splitAndTransfer()
b55c146 [Steven Phillips] Handle zero length case in BitVector.splitAndTransfer()


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/1dd0f5f5
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/1dd0f5f5
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/1dd0f5f5

Branch: refs/heads/master
Commit: 1dd0f5f580bd751d6e10879775d9441358c4fe66
Parents: 3b14765
Author: Steven Phillips <st...@dremio.com>
Authored: Fri Jul 28 17:09:54 2017 -0700
Committer: Steven Phillips <st...@dremio.com>
Committed: Fri Jul 28 17:09:54 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/arrow/vector/BitVector.java | 52 +++++++------
 .../org/apache/arrow/vector/TestBitVector.java  | 80 +++++++++++++++++++-
 2 files changed, 106 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/1dd0f5f5/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index 82cbd47..f34ef2c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -261,32 +261,34 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
     int firstByte = getByteIndex(startIndex);
     int byteSize = getSizeFromCount(length);
     int offset = startIndex % 8;
-    if (offset == 0) {
-      target.clear();
-      // slice
-      if (target.data != null) {
-        target.data.release();
-      }
-      target.data = data.slice(firstByte, byteSize);
-      target.data.retain(1);
-    } else {
-      // Copy data
-      // When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector.
-      // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte.
-      // The last byte copied to target is a bit tricky :
-      //   1) if length requires partly byte (length % 8 !=0), copy the remaining bits only.
-      //   2) otherwise, copy the last byte in the same way as to the prior bytes.
-      target.clear();
-      target.allocateNew(length);
-      // TODO maybe do this one word at a time, rather than byte?
-      for(int i = 0; i < byteSize - 1; i++) {
-        target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) <<  (8 - offset))));
-      }
-      if (length % 8 != 0) {
-        target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset));
+    if (length > 0) {
+      if (offset == 0) {
+        target.clear();
+        // slice
+        if (target.data != null) {
+          target.data.release();
+        }
+        target.data = data.slice(firstByte, byteSize);
+        target.data.retain(1);
       } else {
-        target.data.setByte(byteSize - 1,
-            (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) <<  (8 - offset))));
+        // Copy data
+        // When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector.
+        // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte.
+        // The last byte copied to target is a bit tricky :
+        //   1) if length requires partly byte (length % 8 !=0), copy the remaining bits only.
+        //   2) otherwise, copy the last byte in the same way as to the prior bytes.
+        target.clear();
+        target.allocateNew(length);
+        // TODO maybe do this one word at a time, rather than byte?
+        for (int i = 0; i < byteSize - 1; i++) {
+          target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset))));
+        }
+        if (length % 8 != 0) {
+          target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset));
+        } else {
+          target.data.setByte(byteSize - 1,
+                  (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) << (8 - offset))));
+        }
       }
     }
     target.getMutator().setValueCount(length);

http://git-wip-us.apache.org/repos/asf/arrow/blob/1dd0f5f5/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
index f2343c8..194b785 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
@@ -20,6 +20,8 @@ package org.apache.arrow.vector;
 import static org.junit.Assert.assertEquals;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -31,7 +33,7 @@ public class TestBitVector {
 
   @Before
   public void init() {
-    allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+    allocator = new RootAllocator(Long.MAX_VALUE);
   }
 
   @After
@@ -63,4 +65,80 @@ public class TestBitVector {
     }
   }
 
+  @Test
+  public void testSplitAndTransfer() throws Exception {
+
+    try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+      final BitVector.Mutator sourceMutator = sourceVector.getMutator();
+      final BitVector.Accessor sourceAccessor = sourceVector.getAccessor();
+
+      sourceVector.allocateNew(40);
+
+      /* populate the bitvector -- 010101010101010101010101..... */
+      for(int i = 0; i < 40; i++) {
+        if((i & 1) ==  1) {
+          sourceMutator.set(i, 1);
+        }
+        else {
+          sourceMutator.set(i, 0);
+        }
+      }
+
+      sourceMutator.setValueCount(40);
+
+      /* check the vector output */
+      for(int i = 0; i < 40; i++) {
+        int result = sourceAccessor.get(i);
+        if((i & 1) ==  1) {
+          assertEquals(Integer.toString(1), Integer.toString(result));
+        }
+        else {
+          assertEquals(Integer.toString(0), Integer.toString(result));
+        }
+      }
+
+      final TransferPair transferPair = sourceVector.getTransferPair(allocator);
+      final BitVector toVector = (BitVector)transferPair.getTo();
+      final BitVector.Accessor toAccessor = toVector.getAccessor();
+      final BitVector.Mutator toMutator = toVector.getMutator();
+
+      /*
+       * form test cases such that we cover:
+       *
+       * (1) the start index is exactly where a particular byte starts in the source bit vector
+       * (2) the start index is randomly positioned within a byte in the source bit vector
+       *    (2.1) the length is a multiple of 8
+       *    (2.2) the length is not a multiple of 8
+       */
+      final int[][] transferLengths = {  {0, 8},     /* (1) */
+                                         {8, 10},    /* (1) */
+                                         {18, 0},    /* zero length scenario */
+                                         {18, 8},    /* (2.1) */
+                                         {26, 0},    /* zero length scenario */
+                                         {26, 14}    /* (2.2) */
+                                      };
+
+      for (final int[] transferLength : transferLengths) {
+        final int start = transferLength[0];
+        final int length = transferLength[1];
+
+        transferPair.splitAndTransfer(start, length);
+
+        /* check the toVector output after doing splitAndTransfer */
+        for (int i = 0; i < length; i++) {
+          int result = toAccessor.get(i);
+          if((i & 1) == 1) {
+            assertEquals(Integer.toString(1), Integer.toString(result));
+          }
+          else {
+            assertEquals(Integer.toString(0), Integer.toString(result));
+          }
+        }
+
+        toVector.clear();
+      }
+
+      sourceVector.close();
+    }
+  }
 }