You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:15:04 UTC

svn commit: r1181511 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java

Author: nspiegelberg
Date: Tue Oct 11 02:15:03 2011
New Revision: 1181511

URL: http://svn.apache.org/viewvc?rev=1181511&view=rev
Log:
Add ColumnRangeFilter

Summary:
Add ColumnRangeFilter. This filter is used for selecting only those keys
with columns that are between minColumn to maxColumn. For example, if minColumn
is 'an', and maxColumn is 'be', it will pass keys with columns like 'ana',
'bad',
but not keys with columns like 'bed', 'eye'. If minColumn is null, there is no
lower bound. If maxColumn is null, there is no upper bound.

Test Plan:
Tested using the unit test:
1. verified the columns we get are as expected.
2. verified that the desired seek behavior happened as expected.
3. tried to measure performance on the unit test with/without seek
optimization. The runtime in the unit test varies quite a bit.

Reviewed By: kannan
Reviewers: jgray, kannan, pkhemani, kranganathan
CC: jfan, kannan, gqchen, hbase@lists
Revert Plan:
OK

Differential Revision: 223913

Added:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java
Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java

Added: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java?rev=1181511&view=auto
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java (added)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java Tue Oct 11 02:15:03 2011
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
+
+/**
+ * This filter is used for selecting only those keys with columns that are
+ * between minColumn to maxColumn. For example, if minColumn is 'an', and
+ * maxColumn is 'be', it will pass keys with columns like 'ana', 'bad', but not
+ * keys with columns like 'bed', 'eye'
+ *
+ * If minColumn is null, there is no lower bound. If maxColumn is null, there is
+ * no upper bound.
+ *
+ * minColumnInclusive and maxColumnInclusive specify if the ranges are inclusive
+ * or not.
+ */
+public class ColumnRangeFilter extends FilterBase {
+  protected byte[] minColumn = null;
+  protected boolean minColumnInclusive = true;
+  protected byte[] maxColumn = null;
+  protected boolean maxColumnInclusive = false;
+
+  public ColumnRangeFilter() {
+    super();
+  }
+  /**
+   * Create a filter to select those keys with columns that are between minColumn
+   * and maxColumn.
+   * @param minColumn minimum value for the column range. If if it's null,
+   * there is no lower bound.
+   * @param minColumnInclusive if true, include minColumn in the range.
+   * @param maxColumn maximum value for the column range. If it's null,
+   * @param maxColumnInclusive if true, include maxColumn in the range.
+   * there is no upper bound.
+   */
+  public ColumnRangeFilter(final byte[] minColumn, boolean minColumnInclusive,
+      final byte[] maxColumn, boolean maxColumnInclusive) {
+    this.minColumn = minColumn;
+    this.minColumnInclusive = minColumnInclusive;
+    this.maxColumn = maxColumn;
+    this.maxColumnInclusive = maxColumnInclusive;
+  }
+
+  /**
+   * @return if min column range is inclusive.
+   */
+  public boolean isMinColumnInclusive() {
+    return minColumnInclusive;
+  }
+
+  /**
+   * @return if max column range is inclusive.
+   */
+  public boolean isMaxColumnInclusive() {
+    return maxColumnInclusive;
+  }
+
+  /**
+   * @return the min column range for the filter
+   */
+  public byte[] getMinColumn() {
+    return this.minColumn;
+  }
+
+  /**
+   * @return the max column range for the filter
+   */
+  public byte[] getMaxColumn() {
+    return this.maxColumn;
+  }
+
+  @Override
+  public ReturnCode filterKeyValue(KeyValue kv) {
+    byte[] buffer = kv.getBuffer();
+    int qualifierOffset = kv.getQualifierOffset();
+    int qualifierLength = kv.getQualifierLength();
+    int cmpMin = 1;
+
+    if (this.minColumn != null) {
+      cmpMin = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
+          this.minColumn, 0, this.minColumn.length);
+    }
+
+    if (cmpMin < 0) {
+      return ReturnCode.SEEK_NEXT_USING_HINT;
+    }
+
+    if (!this.minColumnInclusive && cmpMin == 0) {
+      return ReturnCode.SKIP;
+    }
+
+    if (this.maxColumn == null) {
+      return ReturnCode.INCLUDE;
+    }
+
+    int cmpMax = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
+        this.maxColumn, 0, this.maxColumn.length);
+
+    if (this.maxColumnInclusive && cmpMax <= 0 ||
+        !this.maxColumnInclusive && cmpMax < 0) {
+      return ReturnCode.INCLUDE;
+    }
+
+    return ReturnCode.NEXT_ROW;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    // need to write out a flag for null value separately. Otherwise,
+    // we will not be able to differentiate empty string and null
+    out.writeBoolean(this.minColumn == null);
+    Bytes.writeByteArray(out, this.minColumn);
+    out.writeBoolean(this.minColumnInclusive);
+
+    out.writeBoolean(this.maxColumn == null);
+    Bytes.writeByteArray(out, this.maxColumn);
+    out.writeBoolean(this.maxColumnInclusive);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    boolean isMinColumnNull = in.readBoolean();
+    this.minColumn = Bytes.readByteArray(in);
+
+    if (isMinColumnNull) {
+      this.minColumn = null;
+    }
+
+    this.minColumnInclusive = in.readBoolean();
+
+    boolean isMaxColumnNull = in.readBoolean();
+    this.maxColumn = Bytes.readByteArray(in);
+    if (isMaxColumnNull) {
+      this.maxColumn = null;
+    }
+    this.maxColumnInclusive = in.readBoolean();
+  }
+
+
+  @Override
+  public KeyValue getNextKeyHint(KeyValue kv) {
+    return KeyValue.createFirstOnRow(kv.getBuffer(), kv.getRowOffset(), kv
+        .getRowLength(), kv.getBuffer(), kv.getFamilyOffset(), kv
+        .getFamilyLength(), this.minColumn, 0, this.minColumn == null ? 0
+        : this.minColumn.length);
+  }
+}

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java?rev=1181511&r1=1181510&r2=1181511&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java Tue Oct 11 02:15:03 2011
@@ -174,7 +174,7 @@ public class HbaseObjectWritable impleme
     addToMap(ColumnPrefixFilter.class, code++);
 
     addToMap(KeyOnlyFilter.class, code++);
-
+    addToMap(ColumnRangeFilter.class, code++);
   }
 
   private Class<?> declaredClass;
@@ -529,4 +529,4 @@ public class HbaseObjectWritable impleme
   public Configuration getConf() {
     return this.conf;
   }
-}
\ No newline at end of file
+}

Added: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java?rev=1181511&view=auto
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java (added)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java Tue Oct 11 02:15:03 2011
@@ -0,0 +1,242 @@
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueTestUtil;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+class StringRange {
+  private String start = null;
+  private String end = null;
+  private boolean startInclusive = true;
+  private boolean endInclusive = false;
+
+  public StringRange(String start, boolean startInclusive, String end,
+      boolean endInclusive) {
+    this.start = start;
+    this.startInclusive = startInclusive;
+    this.end = end;
+    this.endInclusive = endInclusive;
+  }
+
+  public String getStart() {
+    return this.start;
+  }
+
+  public String getEnd() {
+    return this.end;
+  }
+
+  public boolean isStartInclusive() {
+    return this.startInclusive;
+  }
+
+  public boolean isEndInclusive() {
+    return this.endInclusive;
+  }
+
+  @Override
+  public int hashCode() {
+    int hashCode = 0;
+    if (this.start != null) {
+      hashCode ^= this.start.hashCode();
+    }
+
+    if (this.end != null) {
+      hashCode ^= this.end.hashCode();
+    }
+    return hashCode;
+  }
+
+  @Override
+  public String toString() {
+    String result = (this.startInclusive ? "[" : "(")
+          + (this.start == null ? null : this.start) + ", "
+          + (this.end == null ? null : this.end)
+          + (this.endInclusive ? "]" : ")");
+    return result;
+  }
+
+   public boolean inRange(String value) {
+    boolean afterStart = true;
+    if (this.start != null) {
+      int startCmp = value.compareTo(this.start);
+      afterStart = this.startInclusive ? startCmp >= 0 : startCmp > 0;
+    }
+
+    boolean beforeEnd = true;
+    if (this.end != null) {
+      int endCmp = value.compareTo(this.end);
+      beforeEnd = this.endInclusive ? endCmp <= 0 : endCmp < 0;
+    }
+
+    return afterStart && beforeEnd;
+  }
+}
+
+public class TestColumnRangeFilter {
+
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  private final Log LOG = LogFactory.getLog(this.getClass());
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @Before
+  public void setUp() throws Exception {
+    // Nothing to do.
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @After
+  public void tearDown() throws Exception {
+    // Nothing to do.
+  }
+
+  @Test
+  public void TestColumnRangeFilterClient() throws Exception {
+    String family = "Family";
+    String table = "TestColumnRangeFilterClient";
+    HTable ht = TEST_UTIL.createTable(Bytes.toBytes(table),
+        Bytes.toBytes(family), Integer.MAX_VALUE);
+
+    List<String> rows = generateRandomWords(10, 8);
+    long maxTimestamp = 2;
+    List<String> columns = generateRandomWords(20000, 8);
+
+    List<KeyValue> kvList = new ArrayList<KeyValue>();
+
+    Map<StringRange, List<KeyValue>> rangeMap = new HashMap<StringRange, List<KeyValue>>();
+
+    rangeMap.put(new StringRange(null, true, "b", false),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("p", true, "q", false),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("r", false, "s", true),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("z", false, null, false),
+        new ArrayList<KeyValue>());
+    String valueString = "ValueString";
+
+    for (String row : rows) {
+      Put p = new Put(Bytes.toBytes(row));
+      for (String column : columns) {
+        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
+          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
+              valueString);
+          p.add(kv);
+          kvList.add(kv);
+          for (StringRange s : rangeMap.keySet()) {
+            if (s.inRange(column)) {
+              rangeMap.get(s).add(kv);
+            }
+          }
+        }
+      }
+      ht.put(p);
+    }
+
+    TEST_UTIL.flush();
+    try {
+      Thread.sleep(3000);
+    } catch (InterruptedException i) {
+        // ignore
+    }
+
+    ColumnRangeFilter filter;
+    Scan scan = new Scan();
+    scan.setMaxVersions();
+    for (StringRange s : rangeMap.keySet()) {
+      filter = new ColumnRangeFilter(s.getStart() == null ? null
+          : Bytes.toBytes(s.getStart()), s.isStartInclusive(),
+          s.getEnd() == null ? null : Bytes.toBytes(s.getEnd()),
+          s.isEndInclusive());
+      scan.setFilter(filter);
+      ResultScanner scanner = ht.getScanner(scan);
+      List<KeyValue> results = new ArrayList<KeyValue>();
+      LOG.info("scan column range: " + s.toString());
+      long timeBeforeScan = System.currentTimeMillis();
+
+      Result result;
+      while ((result = scanner.next()) != null) {
+        for (KeyValue kv : result.list()) {
+          results.add(kv);
+        }
+      }
+      long scanTime = System.currentTimeMillis() - timeBeforeScan;
+      LOG.info("scan time = " + scanTime + "ms");
+      LOG.info("found " + results.size() + " results");
+      LOG.info("Expecting " + rangeMap.get(s).size() + " results");
+
+      /*
+      for (KeyValue kv : results) {
+        LOG.info("found row " + Bytes.toString(kv.getRow()) + ", column "
+            + Bytes.toString(kv.getQualifier()));
+      }
+      */
+
+      assertEquals(rangeMap.get(s).size(), results.size());
+    }
+  }
+
+  List<String> generateRandomWords(int numberOfWords, int maxLengthOfWords) {
+    Set<String> wordSet = new HashSet<String>();
+    for (int i = 0; i < numberOfWords; i++) {
+      int lengthOfWords = (int) (Math.random() * maxLengthOfWords) + 1;
+      char[] wordChar = new char[lengthOfWords];
+      for (int j = 0; j < wordChar.length; j++) {
+        wordChar[j] = (char) (Math.random() * 26 + 97);
+      }
+      String word = new String(wordChar);
+      wordSet.add(word);
+    }
+    List<String> wordList = new ArrayList<String>(wordSet);
+    return wordList;
+  }
+}