You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2013/07/04 21:59:24 UTC

svn commit: r1499851 - in /hbase/trunk: hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ hbase-server/src/test/java/org/apache/hadoop/hbase/filter/

Author: tedyu
Date: Thu Jul  4 19:59:24 2013
New Revision: 1499851

URL: http://svn.apache.org/r1499851
Log:
HBASE-8847 Filter.transform() always applies unconditionally, even when combined in a FilterList (Christophe Taton)


Modified:
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java?rev=1499851&r1=1499850&r2=1499851&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java Thu Jul  4 19:59:24 2013
@@ -29,17 +29,19 @@ import org.apache.hadoop.hbase.exception
 
 /**
  * Interface for row and column filters directly applied within the regionserver.
+ *
  * A filter can expect the following call sequence:
- *<ul>
- * <li>{@link #reset()}</li>
- * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
- * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
- * if false, we will also call</li>
- * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
- * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
- * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
- * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
- * </li>
+ * <ul>
+ *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
+ *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
+ *   <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
+ *   <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
+ *        See {@link ReturnCode}. </li>
+ *   <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
+ *        KeyValue. </li>
+ *   <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
+ *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
+ *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
  * </ul>
  *
  * Filter instances are created one per region/scan.  This abstract class replaces
@@ -47,7 +49,7 @@ import org.apache.hadoop.hbase.exception
  *
  * When implementing your own filters, consider inheriting {@link FilterBase} to help
  * you reduce boilerplate.
- * 
+ *
  * @see FilterBase
  */
 @InterfaceAudience.Public
@@ -254,4 +256,4 @@ public abstract class Filter {
    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
    */
   abstract boolean areSerializedFieldsEqual(Filter other);
-}
\ No newline at end of file
+}

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java?rev=1499851&r1=1499850&r2=1499851&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java Thu Jul  4 19:59:24 2013
@@ -35,9 +35,18 @@ import com.google.protobuf.InvalidProtoc
 /**
  * Implementation of {@link Filter} that represents an ordered List of Filters
  * which will be evaluated with a specified boolean operator {@link Operator#MUST_PASS_ALL}
- * (<code>!AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>!OR</code>).
+ * (<code>AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>OR</code>).
  * Since you can use Filter Lists as children of Filter Lists, you can create a
  * hierarchy of filters to be evaluated.
+ *
+ * <br/>
+ * {@link Operator#MUST_PASS_ALL} evaluates lazily: evaluation stops as soon as one filter does
+ * not include the KeyValue.
+ *
+ * <br/>
+ * {@link Operator#MUST_PASS_ONE} evaluates non-lazily: all filters are always evaluated.
+ *
+ * <br/>
  * Defaults to {@link Operator#MUST_PASS_ALL}.
  * <p>TODO: Fix creation of Configuration on serialization and deserialization.
  */
@@ -56,6 +65,18 @@ public class FilterList extends Filter {
   private Operator operator = Operator.MUST_PASS_ALL;
   private List<Filter> filters = new ArrayList<Filter>();
 
+  /** Reference KeyValue used by {@link #transform(KeyValue)} for validation purpose. */
+  private KeyValue referenceKV = null;
+
+  /**
+   * When filtering a given KeyValue in {@link #filterKeyValue(KeyValue)},
+   * this stores the transformed KeyValue to be returned by {@link #transform(KeyValue)}.
+   *
+   * Individual filters transformation are applied only when the filter includes the KeyValue.
+   * Transformations are composed in the order specified by {@link #filters}.
+   */
+  private KeyValue transformedKV = null;
+
   /**
    * Constructor that takes a set of {@link Filter}s. The default operator
    * MUST_PASS_ALL is assumed.
@@ -181,15 +202,21 @@ public class FilterList extends Filter {
 
   @Override
   public KeyValue transform(KeyValue v) throws IOException {
-    KeyValue current = v;
-    for (Filter filter : filters) {
-      current = filter.transform(current);
-    }
-    return current;
+    // transform() is expected to follow an inclusive filterKeyValue() immediately:
+    if (!v.equals(this.referenceKV)) {
+      throw new IllegalStateException(
+          "Reference KeyValue: " + this.referenceKV + " does not match: " + v);
+     }
+    return this.transformedKV;
   }
 
   @Override
   public ReturnCode filterKeyValue(KeyValue v) throws IOException {
+    this.referenceKV = v;
+
+    // Accumulates successive transformation of every filter that includes the KeyValue:
+    KeyValue transformed = v;
+
     ReturnCode rc = operator == Operator.MUST_PASS_ONE?
         ReturnCode.SKIP: ReturnCode.INCLUDE;
     for (Filter filter : filters) {
@@ -203,6 +230,7 @@ public class FilterList extends Filter {
         case INCLUDE_AND_NEXT_COL:
           rc = ReturnCode.INCLUDE_AND_NEXT_COL;
         case INCLUDE:
+          transformed = filter.transform(transformed);
           continue;
         default:
           return code;
@@ -217,15 +245,16 @@ public class FilterList extends Filter {
           if (rc != ReturnCode.INCLUDE_AND_NEXT_COL) {
             rc = ReturnCode.INCLUDE;
           }
+          transformed = filter.transform(transformed);
           break;
         case INCLUDE_AND_NEXT_COL:
           rc = ReturnCode.INCLUDE_AND_NEXT_COL;
+          transformed = filter.transform(transformed);
           // must continue here to evaluate all filters
           break;
         case NEXT_ROW:
           break;
         case SKIP:
-          // continue;
           break;
         case NEXT_COL:
           break;
@@ -236,6 +265,10 @@ public class FilterList extends Filter {
         }
       }
     }
+
+    // Save the transformed KeyValue for transform():
+    this.transformedKV = transformed;
+
     return rc;
   }
 

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java?rev=1499851&r1=1499850&r2=1499851&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java Thu Jul  4 19:59:24 2013
@@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Byte
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import com.google.common.collect.Lists;
+
 /**
  * Tests filter sets
  *
@@ -421,5 +423,46 @@ public class TestFilterList {
         minKeyValue));
   }
 
+  /**
+   * Tests the behavior of transform() in a hierarchical filter.
+   *
+   * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
+   * Lazy evaluation of AND
+   */
+  @Test
+  public void testTransformMPO() throws Exception {
+    // Apply the following filter:
+    //     (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
+    //  OR (family=fam AND qualifier=qual2)
+    final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
+        new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
+            new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
+            new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
+            new KeyOnlyFilter())),
+        new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
+            new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
+            new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
+
+    final KeyValue kvQual1 = new KeyValue(
+        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
+    final KeyValue kvQual2 = new KeyValue(
+        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
+    final KeyValue kvQual3 = new KeyValue(
+        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
+
+    // Value for fam:qual1 should be stripped:
+    assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
+    final KeyValue transformedQual1 = flist.transform(kvQual1);
+    assertEquals(0, transformedQual1.getValue().length);
+
+    // Value for fam:qual2 should not be stripped:
+    assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
+    final KeyValue transformedQual2 = flist.transform(kvQual2);
+    assertEquals("value", Bytes.toString(transformedQual2.getValue()));
+
+    // Other keys should be skipped:
+    assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
+  }
+
 }