You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/12/31 12:46:00 UTC

[24/47] hbase git commit: HBASE-21620 Problem in scan query when using more than one column prefix filter in some cases

HBASE-21620 Problem in scan query when using more than one column prefix filter in some cases

Signed-off-by: Guanghao Zhang <zg...@apache.org>
Signed-off-by: Michael Stack <st...@apache.org>
Signed-off-by: Allan Yang <al...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e160b5ac
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e160b5ac
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e160b5ac

Branch: refs/heads/HBASE-21512
Commit: e160b5ac8d82330911ea746e456ea53bf317ace8
Parents: 12786f8
Author: openinx <op...@gmail.com>
Authored: Thu Dec 20 21:04:10 2018 +0800
Committer: stack <st...@apache.org>
Committed: Fri Dec 21 15:21:53 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/filter/FilterListWithOR.java   | 65 ++++++++++----------
 .../hadoop/hbase/regionserver/StoreScanner.java |  2 +-
 .../hadoop/hbase/filter/TestFilterList.java     | 62 +++++++++++++++++--
 .../hbase/filter/TestFilterListOnMini.java      | 50 ++++++++++++++-
 4 files changed, 140 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e160b5ac/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java
index 842fdc5..ba4cd88 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java
@@ -83,30 +83,40 @@ public class FilterListWithOR extends FilterListBase {
    * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
    * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
    * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
+   * @param subFilter which sub-filter to calculate the return code by using previous cell and
+   *          previous return code.
+   * @param prevCell the previous cell passed to given sub-filter.
+   * @param currentCell the current cell which will pass to given sub-filter.
+   * @param prevCode the previous return code for given sub-filter.
+   * @return return code calculated by using previous cell and previous return code. null means can
+   *         not decide which return code should return, so we will pass the currentCell to
+   *         subFilter for getting currentCell's return code, and it won't impact the sub-filter's
+   *         internal states.
    */
-  private boolean shouldPassCurrentCellToFilter(Cell prevCell, Cell currentCell,
-      ReturnCode prevCode) throws IOException {
+  private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell,
+      Cell prevCell, ReturnCode prevCode) throws IOException {
     if (prevCell == null || prevCode == null) {
-      return true;
+      return null;
     }
     switch (prevCode) {
     case INCLUDE:
     case SKIP:
-      return true;
+        return null;
     case SEEK_NEXT_USING_HINT:
-      Cell nextHintCell = getNextCellHint(prevCell);
-      return nextHintCell == null || this.compareCell(currentCell, nextHintCell) >= 0;
+        Cell nextHintCell = subFilter.getNextCellHint(prevCell);
+        return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0
+          ? ReturnCode.SEEK_NEXT_USING_HINT : null;
     case NEXT_COL:
     case INCLUDE_AND_NEXT_COL:
-      // Once row changed, reset() will clear prevCells, so we need not to compare their rows
-      // because rows are the same here.
-      return !CellUtil.matchingColumn(prevCell, currentCell);
+        // Once row changed, reset() will clear prevCells, so we need not to compare their rows
+        // because rows are the same here.
+        return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null;
     case NEXT_ROW:
     case INCLUDE_AND_SEEK_NEXT_ROW:
-      // As described above, rows are definitely the same, so we only compare the family.
-      return !CellUtil.matchingFamily(prevCell, currentCell);
+        // As described above, rows are definitely the same, so we only compare the family.
+        return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null;
     default:
-      throw new IllegalStateException("Received code is not valid.");
+        throw new IllegalStateException("Received code is not valid.");
     }
   }
 
@@ -240,7 +250,7 @@ public class FilterListWithOR extends FilterListBase {
   private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
     if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
       // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
-      // corresponding sub-filter(need not test shouldPassCurrentCellToFilter() method), So we
+      // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we
       // need not save current cell to prevCellList for saving heap memory.
       prevCellList.set(index, null);
     } else {
@@ -254,28 +264,27 @@ public class FilterListWithOR extends FilterListBase {
       return ReturnCode.INCLUDE;
     }
     ReturnCode rc = null;
-    boolean everyFilterReturnHint = true;
     for (int i = 0, n = filters.size(); i < n; i++) {
       Filter filter = filters.get(i);
       subFiltersIncludedCell.set(i, false);
 
       Cell prevCell = this.prevCellList.get(i);
       ReturnCode prevCode = this.prevFilterRCList.get(i);
-      if (filter.filterAllRemaining() || !shouldPassCurrentCellToFilter(prevCell, c, prevCode)) {
-        everyFilterReturnHint = false;
+      if (filter.filterAllRemaining()) {
         continue;
       }
-
-      ReturnCode localRC = filter.filterCell(c);
+      ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode);
+      if (localRC == null) {
+        // Can not get return code based on previous cell and previous return code. In other words,
+        // we should pass the current cell to this sub-filter to get the return code, and it won't
+        // impact the sub-filter's internal state.
+        localRC = filter.filterCell(c);
+      }
 
       // Update previous return code and previous cell for filter[i].
       updatePrevFilterRCList(i, localRC);
       updatePrevCellList(i, c, localRC);
 
-      if (localRC != ReturnCode.SEEK_NEXT_USING_HINT) {
-        everyFilterReturnHint = false;
-      }
-
       rc = mergeReturnCode(rc, localRC);
 
       // For INCLUDE* case, we need to update the transformed cell.
@@ -284,15 +293,9 @@ public class FilterListWithOR extends FilterListBase {
         subFiltersIncludedCell.set(i, true);
       }
     }
-
-    if (everyFilterReturnHint) {
-      return ReturnCode.SEEK_NEXT_USING_HINT;
-    } else if (rc == null) {
-      // Each sub-filter in filter list got true for filterAllRemaining().
-      return ReturnCode.SKIP;
-    } else {
-      return rc;
-    }
+    // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should
+    // return SKIP.
+    return rc == null ? ReturnCode.SKIP : rc;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/e160b5ac/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
index 91ca592..b318950 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@@ -690,7 +690,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
 
         case SEEK_NEXT_USING_HINT:
           Cell nextKV = matcher.getNextKeyHint(cell);
-          if (nextKV != null) {
+          if (nextKV != null && comparator.compare(nextKV, cell) > 0) {
             seekAsDirection(nextKV);
             NextState stateAfterSeekByHint = needToReturn(outResult);
             if (stateAfterSeekByHint != null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e160b5ac/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
index 28f344a..2ba98ef 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
@@ -45,6 +45,7 @@ import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 
@@ -604,15 +605,15 @@ public class TestFilterList {
         3, Bytes.toBytes("value"));
 
     assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList01.filterCell(kv1));
-    assertEquals(ReturnCode.SKIP, filterList01.filterCell(kv2));
-    assertEquals(ReturnCode.SKIP, filterList01.filterCell(kv3));
+    assertEquals(ReturnCode.NEXT_COL, filterList01.filterCell(kv2));
+    assertEquals(ReturnCode.NEXT_COL, filterList01.filterCell(kv3));
 
     FilterList filterList11 =
         new FilterList(Operator.MUST_PASS_ONE, new ColumnPaginationFilter(1, 1));
 
     assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv1));
-    assertEquals(ReturnCode.SKIP, filterList11.filterCell(kv2));
-    assertEquals(ReturnCode.SKIP, filterList11.filterCell(kv3));
+    assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv2));
+    assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv3));
   }
 
   @Test
@@ -631,9 +632,9 @@ public class TestFilterList {
         Bytes.toBytes("value"));
 
     assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
-    assertEquals(ReturnCode.SKIP, filterList.filterCell(kv2));
+    assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv2));
     assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv3));
-    assertEquals(ReturnCode.SKIP, filterList.filterCell(kv4));
+    assertEquals(ReturnCode.NEXT_COL, filterList.filterCell(kv4));
   }
 
   private static class MockFilter extends FilterBase {
@@ -1072,5 +1073,54 @@ public class TestFilterList {
     Assert.assertEquals(true, filter2.getTransformed());
     Assert.assertEquals(true, filter3.getTransformed());
   }
+
+  @Test
+  public void testFilterListWithORWhenPassingCellMismatchPreviousRC() throws IOException {
+    // Mainly test FilterListWithOR#calculateReturnCodeByPrevCellAndRC method with two sub-filters.
+    KeyValue kv1 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("a"),
+        100, Bytes.toBytes("value"));
+    KeyValue kv2 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("a"), 99,
+        Bytes.toBytes("value"));
+    KeyValue kv3 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("b"), 1,
+        Bytes.toBytes("value"));
+    KeyValue kv4 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fan"), Bytes.toBytes("a"), 1,
+        Bytes.toBytes("value"));
+    Filter subFilter1 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
+    Mockito.when(subFilter1.filterCell(kv2)).thenReturn(ReturnCode.NEXT_COL);
+    Mockito.when(subFilter1.filterCell(kv3)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
+    Mockito.when(subFilter1.filterCell(kv4)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
+
+    Filter subFilter2 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SKIP);
+    Mockito.when(subFilter2.filterCell(kv2)).thenReturn(ReturnCode.NEXT_ROW);
+    Mockito.when(subFilter2.filterCell(kv3)).thenReturn(ReturnCode.NEXT_ROW);
+    Mockito.when(subFilter2.filterCell(kv4)).thenReturn(ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW);
+
+    Filter filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
+    Assert.assertEquals(ReturnCode.INCLUDE, filterList.filterCell(kv1));
+    Assert.assertEquals(ReturnCode.NEXT_COL, filterList.filterCell(kv2));
+    Assert.assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv3));
+    Assert.assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv4));
+
+    // One sub-filter will filterAllRemaining but other sub-filter will return SEEK_HINT
+    subFilter1 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter1.filterAllRemaining()).thenReturn(true);
+    Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.NEXT_ROW);
+
+    subFilter2 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
+    filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
+    Assert.assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
+
+    // Two sub-filter returns SEEK_NEXT_USING_HINT, then we should return SEEK_NEXT_USING_HINT.
+    subFilter1 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
+
+    subFilter2 = Mockito.mock(FilterBase.class);
+    Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
+    filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
+    Assert.assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/e160b5ac/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListOnMini.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListOnMini.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListOnMini.java
index 7967e0b..4266522 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListOnMini.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListOnMini.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.filter.FilterList.Operator;
 import org.apache.hadoop.hbase.testclassification.FilterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -28,7 +29,6 @@ import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -90,4 +90,52 @@ public class TestFilterListOnMini {
       Assert.assertEquals(2, rr.size());
     }
   }
+
+  /**
+   * Test case for HBASE-21620
+   */
+  @Test
+  public void testColumnPrefixFilterConcatWithOR() throws Exception {
+    TableName tn = TableName.valueOf(name.getMethodName());
+    byte[] cf1 = Bytes.toBytes("f1");
+    byte[] row = Bytes.toBytes("row");
+    byte[] value = Bytes.toBytes("value");
+    String[] columns = new String[]{
+      "1544768273917010001_lt",
+      "1544768273917010001_w_1",
+      "1544768723910010001_ca_1",
+      "1544768723910010001_lt",
+      "1544768723910010001_ut_1",
+      "1544768723910010001_w_5",
+      "1544769779710010001_lt",
+      "1544769779710010001_w_5",
+      "1544769883529010001_lt",
+      "1544769883529010001_w_5",
+      "1544769915805010001_lt",
+      "1544769915805010001_w_5",
+      "1544779883529010001_lt",
+      "1544770422942010001_lt",
+      "1544770422942010001_w_5"
+    };
+    Table table = TEST_UTIL.createTable(tn, cf1);
+    for (int i = 0; i < columns.length; i++) {
+      Put put = new Put(row).addColumn(cf1, Bytes.toBytes(columns[i]), value);
+      table.put(put);
+    }
+    Scan scan = new Scan();
+    scan.withStartRow(row).withStopRow(row, true)
+        .setFilter(new FilterList(Operator.MUST_PASS_ONE,
+            new ColumnPrefixFilter(Bytes.toBytes("1544770422942010001_")),
+            new ColumnPrefixFilter(Bytes.toBytes("1544769883529010001_"))));
+    ResultScanner scanner = table.getScanner(scan);
+    Result result;
+    int resultCount = 0;
+    int cellCount = 0;
+    while ((result = scanner.next()) != null) {
+      cellCount += result.listCells().size();
+      resultCount++;
+    }
+    Assert.assertEquals(resultCount, 1);
+    Assert.assertEquals(cellCount, 4);
+  }
 }