You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2013/07/18 22:18:06 UTC
svn commit: r1504631 - in /hbase/branches/0.94/src:
main/java/org/apache/hadoop/hbase/filter/Filter.java
main/java/org/apache/hadoop/hbase/filter/FilterList.java
test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
Author: larsh
Date: Thu Jul 18 20:18:06 2013
New Revision: 1504631
URL: http://svn.apache.org/r1504631
Log:
HBASE-8847 Filter.transform() always applies unconditionally, even when combined in a FilterList (Christophe Taton)
Modified:
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/Filter.java?rev=1504631&r1=1504630&r2=1504631&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/Filter.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/Filter.java Thu Jul 18 20:18:06 2013
@@ -27,17 +27,19 @@ import java.util.List;
/**
* Interface for row and column filters directly applied within the regionserver.
+ *
* A filter can expect the following call sequence:
- *<ul>
- * <li>{@link #reset()}</li>
- * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
- * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
- * if false, we will also call</li>
- * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
- * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
- * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
- * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
- * </li>
+ * <ul>
+ * <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
+ * <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
+ * <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
+ * <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
+ * See {@link ReturnCode}. </li>
+ * <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
+ * KeyValue. </li>
+ * <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
+ * <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
+ * filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
* </ul>
*
* Filter instances are created one per region/scan. This interface replaces
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java?rev=1504631&r1=1504630&r2=1504631&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java Thu Jul 18 20:18:06 2013
@@ -35,9 +35,18 @@ import java.util.List;
/**
* Implementation of {@link Filter} that represents an ordered List of Filters
* which will be evaluated with a specified boolean operator {@link Operator#MUST_PASS_ALL}
- * (<code>!AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>!OR</code>).
+ * (<code>AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>OR</code>).
* Since you can use Filter Lists as children of Filter Lists, you can create a
* hierarchy of filters to be evaluated.
+ *
+ * <br/>
+ * {@link Operator#MUST_PASS_ALL} evaluates lazily: evaluation stops as soon as one filter does
+ * not include the KeyValue.
+ *
+ * <br/>
+ * {@link Operator#MUST_PASS_ONE} evaluates non-lazily: all filters are always evaluated.
+ *
+ * <br/>
* Defaults to {@link Operator#MUST_PASS_ALL}.
* <p>TODO: Fix creation of Configuration on serialization and deserialization.
*/
@@ -55,6 +64,18 @@ public class FilterList implements Filte
private Operator operator = Operator.MUST_PASS_ALL;
private List<Filter> filters = new ArrayList<Filter>();
+ /** Reference KeyValue used by {@link #transform(KeyValue)} for validation purpose. */
+ private KeyValue referenceKV = null;
+
+ /**
+ * When filtering a given KeyValue in {@link #filterKeyValue(KeyValue)},
+ * this stores the transformed KeyValue to be returned by {@link #transform(KeyValue)}.
+ *
+ * Individual filters transformation are applied only when the filter includes the KeyValue.
+ * Transformations are composed in the order specified by {@link #filters}.
+ */
+ private KeyValue transformedKV = null;
+
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
@@ -184,15 +205,21 @@ public class FilterList implements Filte
@Override
public KeyValue transform(KeyValue v) {
- KeyValue current = v;
- for (Filter filter : filters) {
- current = filter.transform(current);
- }
- return current;
+ // transform() is expected to follow an inclusive filterKeyValue() immediately:
+ if (!v.equals(this.referenceKV)) {
+ throw new IllegalStateException(
+ "Reference KeyValue: " + this.referenceKV + " does not match: " + v);
+ }
+ return this.transformedKV;
}
@Override
public ReturnCode filterKeyValue(KeyValue v) {
+ this.referenceKV = v;
+
+ // Accumulates successive transformation of every filter that includes the KeyValue:
+ KeyValue transformed = v;
+
ReturnCode rc = operator == Operator.MUST_PASS_ONE?
ReturnCode.SKIP: ReturnCode.INCLUDE;
for (Filter filter : filters) {
@@ -206,6 +233,7 @@ public class FilterList implements Filte
case INCLUDE_AND_NEXT_COL:
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
case INCLUDE:
+ transformed = filter.transform(transformed);
continue;
default:
return code;
@@ -220,16 +248,30 @@ public class FilterList implements Filte
if (rc != ReturnCode.INCLUDE_AND_NEXT_COL) {
rc = ReturnCode.INCLUDE;
}
+ transformed = filter.transform(transformed);
break;
case INCLUDE_AND_NEXT_COL:
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
+ transformed = filter.transform(transformed);
// must continue here to evaluate all filters
+ break;
case NEXT_ROW:
+ break;
case SKIP:
- // continue;
+ break;
+ case NEXT_COL:
+ break;
+ case SEEK_NEXT_USING_HINT:
+ break;
+ default:
+ throw new IllegalStateException("Received code is not valid.");
}
}
}
+
+ // Save the transformed KeyValue for transform():
+ this.transformedKV = transformed;
+
return rc;
}
Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java?rev=1504631&r1=1504630&r2=1504631&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java Thu Jul 18 20:18:06 2013
@@ -34,10 +34,13 @@ import junit.framework.TestCase;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.SmallTests;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.experimental.categories.Category;
+import com.google.common.collect.Lists;
+
/**
* Tests filter sets
*
@@ -394,6 +397,46 @@ public class TestFilterList extends Test
minKeyValue));
}
+ /**
+ * Tests the behavior of transform() in a hierarchical filter.
+ *
+ * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
+ * Lazy evaluation of AND
+ */
+ public void testTransformMPO() throws Exception {
+ // Apply the following filter:
+ // (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
+ // OR (family=fam AND qualifier=qual2)
+ final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
+ new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
+ new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
+ new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
+ new KeyOnlyFilter())),
+ new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
+ new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
+ new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
+
+ final KeyValue kvQual1 = new KeyValue(
+ Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
+ final KeyValue kvQual2 = new KeyValue(
+ Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
+ final KeyValue kvQual3 = new KeyValue(
+ Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
+
+ // Value for fam:qual1 should be stripped:
+ assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
+ final KeyValue transformedQual1 = flist.transform(kvQual1);
+ assertEquals(0, transformedQual1.getValue().length);
+
+ // Value for fam:qual2 should not be stripped:
+ assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
+ final KeyValue transformedQual2 = flist.transform(kvQual2);
+ assertEquals("value", Bytes.toString(transformedQual2.getValue()));
+
+ // Other keys should be skipped:
+ assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
+ }
+
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();