You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2017/03/11 01:43:14 UTC

[1/2] incubator-carbondata git commit: use binary search to improve the performance in method setFilterdIndexToBitSet

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 6713ffa1f -> 22eccf7c6


use binary search to improve the performance in method
setFilterdIndexToBitSet

add binary range search and add test case

revert previous change

format changed code

change code format to pass check style

revert the code to use inverted index

add comments and change variables definition style


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/bedaa59e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/bedaa59e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/bedaa59e

Branch: refs/heads/master
Commit: bedaa59ef298fdf12be4641d239b14c05368ea5b
Parents: 6713ffa
Author: mayun <ma...@10.100.56.61>
Authored: Thu Mar 9 13:13:22 2017 +0800
Committer: ravipesala <ra...@gmail.com>
Committed: Sat Mar 11 07:12:21 2017 +0530

----------------------------------------------------------------------
 .../executer/IncludeFilterExecuterImpl.java     |  47 ++-
 .../apache/carbondata/core/util/CarbonUtil.java |  94 ++++++
 .../executer/IncludeFilterExecuterImplTest.java | 299 +++++++++++++++++++
 .../carbondata/core/util/CarbonUtilTest.java    | 180 +++++++++++
 4 files changed, 596 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedaa59e/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index e640d71..732e87c 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -116,30 +116,18 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
   private BitSet setFilterdIndexToBitSetWithColumnIndex(
       FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
     int startIndex = 0;
     byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
     for (int i = 0; i < filterValues.length; i++) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[i], false);
-      if (start < 0) {
-        continue;
-      }
-      bitSet.set(dimensionColumnDataChunk.getInvertedIndex(start));
-      last = start;
-      for (int j = start + 1; j < numerOfRows; j++) {
-        if (dimensionColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
-          bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j));
-          last++;
-        } else {
-          break;
-        }
+      int[] rangeIndex = CarbonUtil.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk,
+          startIndex, numerOfRows - 1, filterValues[i]);
+      for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
+
+        bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j));
       }
-      startIndex = last;
-      if (startIndex >= numerOfRows) {
-        break;
+
+      if (rangeIndex[1] >= 0) {
+        startIndex = rangeIndex[1];
       }
     }
     return bitSet;
@@ -150,12 +138,23 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
       byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-      for (int k = 0; k < filterValues.length; k++) {
-        for (int j = 0; j < numerOfRows; j++) {
-          if (dimensionColumnDataChunk.compareTo(j, filterValues[k]) == 0) {
-            bitSet.set(j);
+      for (int i = 0; i < numerOfRows; i++) {
+
+        if (filterValues.length > 1) {
+          int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
+              dimensionColumnDataChunk.getChunkData(i));
+
+          if (index >= 0) {
+            bitSet.set(i);
+          }
+        } else if (filterValues.length == 1) {
+          if (dimensionColumnDataChunk.compareTo(i, filterValues[0]) == 0) {
+            bitSet.set(i);
           }
+        } else {
+          break;
         }
+
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedaa59e/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 5a656e0..36e75f3 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -420,6 +420,100 @@ public final class CarbonUtil {
   }
 
   /**
+   * search a specific compareValue's range index in a sorted byte array
+   *
+   * @param dimColumnDataChunk
+   * @param low
+   * @param high
+   * @param compareValue
+   * @return the compareValue's range index in the dimColumnDataChunk
+   */
+  public static int[] getRangeIndexUsingBinarySearch(
+      FixedLengthDimensionDataChunk dimColumnDataChunk, int low, int high, byte[] compareValue) {
+
+    int[] rangeIndex = new int[2];
+    int cmpResult = 0;
+    while (high >= low) {
+      int mid = (low + high) / 2;
+      cmpResult = dimColumnDataChunk.compareTo(mid, compareValue);
+      if (cmpResult < 0) {
+        low = mid + 1;
+      } else if (cmpResult > 0) {
+        high = mid - 1;
+      } else {
+
+        int currentIndex = mid;
+        while (currentIndex - 1 >= 0
+            && dimColumnDataChunk.compareTo(currentIndex - 1, compareValue) == 0) {
+          --currentIndex;
+        }
+        rangeIndex[0] = currentIndex;
+
+        currentIndex = mid;
+        while (currentIndex + 1 <= high
+            && dimColumnDataChunk.compareTo(currentIndex + 1, compareValue) == 0) {
+          currentIndex++;
+        }
+        rangeIndex[1] = currentIndex;
+
+        return rangeIndex;
+      }
+    }
+
+    // key not found. return a not exist range
+    // rangeIndex[0] = 0;
+    rangeIndex[1] = -1;
+    return rangeIndex;
+  }
+
+  /**
+   * Checks that {@code fromIndex} and {@code toIndex} are in the range and
+   * throws an exception if they aren't.
+   */
+  private static void rangeCheck(int fromIndex, int toIndex) {
+    if (fromIndex > toIndex) {
+      throw new IllegalArgumentException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
+    }
+    if (fromIndex < 0) {
+      throw new ArrayIndexOutOfBoundsException(fromIndex);
+    }
+  }
+
+  /**
+   * search a specific key in sorted byte array
+   *
+   * @param filterValues
+   * @param low
+   * @param high
+   * @param compareValue
+   * @return the compareValue's index in the filterValues
+   */
+  public static int binarySearch(byte[][] filterValues, int low, int high,
+      byte[] compareValue) {
+
+    rangeCheck(low, high);
+
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+
+      int result = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[mid], compareValue);
+
+      if (result < 0) {
+        low = mid + 1;
+      } else if (result > 0) {
+        high = mid - 1;
+      } else {
+
+        return mid; // key found
+      }
+
+    }
+    // key not found
+    return -(low + 1);
+  }
+
+
+  /**
    * Method will identify the value which is lesser than the pivot element
    * on which range filter is been applied.
    *

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedaa59e/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
new file mode 100644
index 0000000..b2aa855
--- /dev/null
+++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.scan.filter.executer;
+
+import java.util.BitSet;
+
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.junit.Before;
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+public class IncludeFilterExecuterImplTest extends TestCase {
+
+  /**
+   * @throws Exception
+   */
+  @Before
+  public void setUp() throws Exception {
+
+  }
+
+  private BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk dimensionColumnDataChunk,
+      int numerOfRows, byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
+      // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+      for (int i = 0; i < numerOfRows; i++) {
+
+        if (filterValues.length > 1) {
+          int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
+              dimensionColumnDataChunk.getChunkData(i));
+
+          if (index >= 0) {
+            bitSet.set(i);
+          }
+        } else if (filterValues.length == 1) {
+          if (dimensionColumnDataChunk.compareTo(i, filterValues[0]) == 0) {
+            bitSet.set(i);
+          }
+        } else {
+          break;
+        }
+
+      }
+    }
+    return bitSet;
+  }
+
+  private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows,
+      byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
+      // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+      for (int k = 0; k < filterValues.length; k++) {
+        for (int j = 0; j < numerOfRows; j++) {
+          if (dimensionColumnDataChunk.compareTo(j, filterValues[k]) == 0) {
+            bitSet.set(j);
+          }
+        }
+      }
+    }
+    return bitSet;
+  }
+
+  /**
+   * short int to byte
+   * 
+   * @param s
+   *          short int
+   * @return byte[]
+   */
+  private byte[] unsignedShortToByte2(int s) {
+    byte[] targets = new byte[2];
+    targets[0] = (byte) (s >> 8 & 0xFF);
+    targets[1] = (byte) (s & 0xFF);
+    return targets;
+  }
+
+  @Test
+  public void testPerformance() {
+
+    long oldTime = 0;
+    long newTime = 0;
+    long start;
+    int dataCnt = 120000;
+    int filterCnt = 800;
+    int queryCnt = 5;
+    int repeatCnt = 20;
+    byte[] keyWord = new byte[2];
+    FixedLengthDimensionDataChunk dimensionColumnDataChunk;
+    DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
+
+    byte[] dataChunk = new byte[dataCnt * keyWord.length];
+    for (int i = 0; i < dataCnt; i++) {
+
+      if (i % repeatCnt == 0) {
+        repeatCnt++;
+      }
+
+      byte[] data = unsignedShortToByte2(repeatCnt);
+      dataChunk[2 * i] = data[0];
+      dataChunk[2 * i + 1] = data[1];
+
+    }
+
+    byte[][] filterKeys = new byte[filterCnt][2];
+    for (int ii = 0; ii < filterCnt; ii++) {
+      filterKeys[ii] = unsignedShortToByte2(100 + ii);
+    }
+    dim.setFilterKeys(filterKeys);
+
+    dimensionColumnDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    for (int j = 0; j < queryCnt; j++) {
+
+      start = System.currentTimeMillis();
+      BitSet bitOld = this.setFilterdIndexToBitSet(dimensionColumnDataChunk, dataCnt, filterKeys);
+      oldTime = oldTime + System.currentTimeMillis() - start;
+
+      start = System.currentTimeMillis();
+      BitSet bitNew = this.setFilterdIndexToBitSetNew((FixedLengthDimensionDataChunk) dimensionColumnDataChunk, dataCnt,
+          filterKeys);
+      newTime = newTime + System.currentTimeMillis() - start;
+
+      assertTrue(bitOld.equals(bitNew));
+
+    }
+
+    assertTrue(newTime < oldTime);
+
+    System.out.println("old code performance time: " + oldTime);
+    System.out.println("new code performance time: " + newTime);
+
+  }
+
+  private BitSet setFilterdIndexToBitSetWithColumnIndex(FixedLengthDimensionDataChunk dimensionColumnDataChunk,
+      int numerOfRows, byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    int start = 0;
+    int last = 0;
+    int startIndex = 0;
+    // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+    for (int i = 0; i < filterValues.length; i++) {
+      start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+          filterValues[i], false);
+      if (start < 0) {
+        continue;
+      }
+      bitSet.set(start);
+      last = start;
+      for (int j = start + 1; j < numerOfRows; j++) {
+        if (dimensionColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
+          bitSet.set(j);
+          last++;
+        } else {
+          break;
+        }
+      }
+      startIndex = last;
+      if (startIndex >= numerOfRows) {
+        break;
+      }
+    }
+    return bitSet;
+  }
+
+  private BitSet setFilterdIndexToBitSetWithColumnIndexOld(FixedLengthDimensionDataChunk dimensionColumnDataChunk,
+      int numerOfRows, byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    int start = 0;
+    int last = 0;
+    int startIndex = 0;
+    // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+    for (int i = 0; i < filterValues.length; i++) {
+      start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+          filterValues[i], false);
+      if (start < 0) {
+        continue;
+      }
+      bitSet.set(start);
+      last = start;
+      for (int j = start + 1; j < numerOfRows; j++) {
+        if (dimensionColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
+          bitSet.set(j);
+          last++;
+        } else {
+          break;
+        }
+      }
+      startIndex = last;
+      if (startIndex >= numerOfRows) {
+        break;
+      }
+    }
+    return bitSet;
+  }
+
+  private BitSet setFilterdIndexToBitSetWithColumnIndexNew(FixedLengthDimensionDataChunk dimensionColumnDataChunk,
+      int numerOfRows, byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    int startIndex = 0;
+    // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+    for (int i = 0; i < filterValues.length; i++) {
+      int[] rangeIndex = CarbonUtil.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+          numerOfRows - 1, filterValues[i]);
+      for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
+
+        bitSet.set(j);
+      }
+
+      if (rangeIndex[1] > -1) {
+        startIndex = rangeIndex[1];
+      }
+    }
+    return bitSet;
+  }
+
+  @Test
+  public void testRangBinarySearch() {
+
+    long oldTime = 0;
+    long newTime = 0;
+    long start;
+    long end;
+    int dataCnt = 120000;
+    int filterCnt = 800;
+    int queryCnt = 10000;
+    int repeatCnt = 200;
+    byte[] keyWord = new byte[2];
+    FixedLengthDimensionDataChunk dimensionColumnDataChunk;
+    DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
+
+    byte[] dataChunk = new byte[dataCnt * keyWord.length];
+    for (int i = 0; i < dataCnt; i++) {
+
+      if (i % repeatCnt == 0) {
+        repeatCnt++;
+      }
+
+      byte[] data = unsignedShortToByte2(repeatCnt);
+      dataChunk[2 * i] = data[0];
+      dataChunk[2 * i + 1] = data[1];
+
+    }
+
+    byte[][] filterKeys = new byte[filterCnt][2];
+    for (int ii = 0; ii < filterCnt; ii++) {
+      filterKeys[ii] = unsignedShortToByte2(100 + ii);
+    }
+    dim.setFilterKeys(filterKeys);
+
+    dimensionColumnDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    // initial to run
+    BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataCnt, filterKeys);
+    BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataCnt, filterKeys);
+
+    // performance run
+    for (int j = 0; j < queryCnt; j++) {
+
+      start = System.currentTimeMillis();
+      bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataCnt, filterKeys);
+      end = System.currentTimeMillis();
+      oldTime = oldTime + end - start;
+
+      start = System.currentTimeMillis();
+      bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataCnt, filterKeys);
+      end = System.currentTimeMillis();
+      newTime = newTime + end - start;
+
+      assertTrue(bitOld.equals(bitNew));
+
+    }
+
+    System.out.println("old code performance time: " + oldTime);
+    System.out.println("new code performance time: " + newTime);
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedaa59e/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
index 647c2f8..9beaac7 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
@@ -764,7 +764,187 @@ public class CarbonUtilTest {
         .getFirstIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 1, 3, compareValue, true);
     assertEquals(2, result);
   }
+  
+  @Test
+  public void testBinaryRangeSearch() {
+
+    byte[] dataChunk = new byte[10];
+    FixedLengthDimensionDataChunk fixedLengthDimensionDataChunk;
+    byte[] keyWord = new byte[1];
+    int[] range;
+
+    dataChunk = "abbcccddddeffgggh".getBytes();
+    byte[][] dataArr = new byte[dataChunk.length / keyWord.length][keyWord.length];
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    for (int ii = 0; ii < dataChunk.length / keyWord.length; ii++) {
+      dataArr[ii] = fixedLengthDimensionDataChunk.getChunkData(ii);
+    }
+
+    keyWord[0] = Byte.valueOf("97");
+    int[] expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 0;
+    expectRangeIndex[1] = 0;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    keyWord[0] = Byte.valueOf("104");
+    expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 16;
+    expectRangeIndex[1] = 16;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    keyWord[0] = Byte.valueOf("101");
+    expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 10;
+    expectRangeIndex[1] = 10;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    keyWord[0] = Byte.valueOf("99");
+    expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 3;
+    expectRangeIndex[1] = 5;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    dataChunk = "ab".getBytes();
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    keyWord[0] = Byte.valueOf("97");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(0, range[0]);
+    assertEquals(0, range[1]);
+
+    keyWord[0] = Byte.valueOf("98");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(1, range[0]);
+    assertEquals(1, range[1]);
+
+    dataChunk = "aabb".getBytes();
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    keyWord[0] = Byte.valueOf("97");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(0, range[0]);
+    assertEquals(1, range[1]);
+
+    keyWord[0] = Byte.valueOf("98");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(2, range[0]);
+    assertEquals(3, range[1]);
+
+    dataChunk = "a".getBytes();
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    keyWord[0] = Byte.valueOf("97");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(0, range[0]);
+    assertEquals(0, range[1]);
+
+    dataChunk = "aa".getBytes();
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    keyWord[0] = Byte.valueOf("97");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(0, range[0]);
+    assertEquals(1, range[1]);
+
+    dataChunk = "aabbbbbbbbbbcc".getBytes();
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+    keyWord[0] = Byte.valueOf("98");
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
+    assertEquals(2, range[0]);
+    assertEquals(11, range[1]);
+
+  }
+
+  @Test
+  public void IndexUsingBinarySearchLengthTwo() {
+
+    byte[] dataChunk = new byte[10];
+    FixedLengthDimensionDataChunk fixedLengthDimensionDataChunk;
+
+    byte[] keyWord = new byte[2];
+
+    dataChunk = "aabbbbbbbbbbcc".getBytes();
+    byte[][] dataArr = new byte[dataChunk.length / keyWord.length][keyWord.length];
+
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    for (int ii = 0; ii < dataChunk.length / keyWord.length; ii++) {
+      dataArr[ii] = fixedLengthDimensionDataChunk.getChunkData(ii);
+    }
+
+    keyWord[0] = Byte.valueOf("98");
+    keyWord[1] = Byte.valueOf("98");
+    int[] expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 1;
+    expectRangeIndex[1] = 5;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    keyWord[0] = Byte.valueOf("97");
+    keyWord[1] = Byte.valueOf("97");
+
+    expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 0;
+    expectRangeIndex[1] = 0;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+    keyWord[0] = Byte.valueOf("99");
+    keyWord[1] = Byte.valueOf("99");
+    expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 6;
+    expectRangeIndex[1] = 6;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+  }
+
+  @Test
+  public void IndexUsingBinarySearchLengthThree() {
+
+    byte[] dataChunk = new byte[10];
+    FixedLengthDimensionDataChunk fixedLengthDimensionDataChunk;
 
+    byte[] keyWord = new byte[3];
+
+    dataChunk = "aaabbbbbbbbbccc".getBytes();
+    byte[][] dataArr = new byte[dataChunk.length / keyWord.length][keyWord.length];
+
+    fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
+        dataChunk.length / keyWord.length, keyWord.length);
+
+    for (int ii = 0; ii < dataChunk.length / keyWord.length; ii++) {
+      dataArr[ii] = fixedLengthDimensionDataChunk.getChunkData(ii);
+    }
+
+    keyWord[0] = Byte.valueOf("98");
+    keyWord[1] = Byte.valueOf("98");
+    keyWord[2] = Byte.valueOf("98");
+    int[] expectRangeIndex = new int[2];
+    expectRangeIndex[0] = 1;
+    expectRangeIndex[1] = 3;
+    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
+
+  }
+
+  private void assertRangeIndex(byte[][] dataArr, byte[] dataChunk,
+      FixedLengthDimensionDataChunk fixedLengthDimensionDataChunk, byte[] keyWord, int[] expectRangeIndex) {
+    int[] range;
+    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0,
+        (dataChunk.length - 1) / keyWord.length, keyWord);
+    assertEquals(expectRangeIndex[0], range[0]);
+    assertEquals(expectRangeIndex[1], range[1]);
+
+    int index = CarbonUtil.binarySearch(dataArr, 0, dataChunk.length / keyWord.length - 1, keyWord);
+    assertTrue(expectRangeIndex[0] <= index && index <= range[1]);
+  }
+ 	
+ 	
   @AfterClass public static void testcleanUp() {
     new File("../core/src/test/resources/testFile.txt").deleteOnExit();
     new File("../core/src/test/resources/testDatabase/levelmetadata_testTable.metadata")


[2/2] incubator-carbondata git commit: [CARBONDATA-748] use binary search improve filter on dimension has no inverted index This closes #638

Posted by ra...@apache.org.
[CARBONDATA-748] use binary search improve filter on dimension has no inverted index This closes #638


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/22eccf7c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/22eccf7c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/22eccf7c

Branch: refs/heads/master
Commit: 22eccf7c694550a9ff478d8b1d3b4a8ddfbef8a2
Parents: 6713ffa bedaa59
Author: ravipesala <ra...@gmail.com>
Authored: Sat Mar 11 07:12:52 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Sat Mar 11 07:12:52 2017 +0530

----------------------------------------------------------------------
 .../executer/IncludeFilterExecuterImpl.java     |  47 ++-
 .../apache/carbondata/core/util/CarbonUtil.java |  94 ++++++
 .../executer/IncludeFilterExecuterImplTest.java | 299 +++++++++++++++++++
 .../carbondata/core/util/CarbonUtilTest.java    | 180 +++++++++++
 4 files changed, 596 insertions(+), 24 deletions(-)
----------------------------------------------------------------------