You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ec...@apache.org on 2016/03/24 03:01:59 UTC
hbase git commit: HBASE-15475 Allow TimestampsFilter to provide a
seek hint
Repository: hbase
Updated Branches:
refs/heads/branch-1.3 76a71084f -> 396e4f152
HBASE-15475 Allow TimestampsFilter to provide a seek hint
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/396e4f15
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/396e4f15
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/396e4f15
Branch: refs/heads/branch-1.3
Commit: 396e4f15261d43d3fe89c75b0d4157e8dba394c2
Parents: 76a7108
Author: Elliott Clark <ec...@apache.org>
Authored: Thu Mar 17 13:49:16 2016 -0700
Committer: Elliott Clark <ec...@apache.org>
Committed: Wed Mar 23 18:57:13 2016 -0700
----------------------------------------------------------------------
.../hadoop/hbase/filter/TimestampsFilter.java | 84 ++++++++++---
.../hbase/protobuf/generated/FilterProtos.java | 117 +++++++++++++++++--
hbase-protocol/src/main/protobuf/Filter.proto | 1 +
.../TestTimestampFilterSeekHint.java | 106 +++++++++++++++++
4 files changed, 280 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/396e4f15/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/TimestampsFilter.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/TimestampsFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/TimestampsFilter.java
index 27896ea..be5a0f6 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/TimestampsFilter.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/TimestampsFilter.java
@@ -19,16 +19,18 @@ package org.apache.hadoop.hbase.filter;
import com.google.common.base.Preconditions;
import com.google.protobuf.InvalidProtocolBufferException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
-import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.TreeSet;
-
/**
* Filter that returns only cells whose timestamp (version) is
* in the specified list of timestamps (versions).
@@ -42,6 +44,7 @@ import java.util.TreeSet;
@InterfaceStability.Stable
public class TimestampsFilter extends FilterBase {
+ private final boolean canHint;
TreeSet<Long> timestamps;
private static final int MAX_LOG_TIMESTAMPS = 5;
@@ -50,16 +53,29 @@ public class TimestampsFilter extends FilterBase {
long minTimeStamp = Long.MAX_VALUE;
/**
+ * Constructor for filter that retains only the specified timestamps in the list.
+ * @param timestamps
+ */
+ public TimestampsFilter(List<Long> timestamps) {
+ this(timestamps, false);
+ }
+
+ /**
* Constructor for filter that retains only those
* cells whose timestamp (version) is in the specified
* list of timestamps.
*
- * @param timestamps
+ * @param timestamps list of timestamps that are wanted.
+ * @param canHint should the filter provide a seek hint? This can skip
+ * past delete tombstones, so it should only be used when that
+ * is not an issue ( no deletes, or don't care if data
+ * becomes visible)
*/
- public TimestampsFilter(List<Long> timestamps) {
+ public TimestampsFilter(List<Long> timestamps, boolean canHint) {
for (Long timestamp : timestamps) {
Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp);
}
+ this.canHint = canHint;
this.timestamps = new TreeSet<Long>(timestamps);
init();
}
@@ -96,7 +112,41 @@ public class TimestampsFilter extends FilterBase {
// to be lesser than all of the other values.
return ReturnCode.NEXT_COL;
}
- return ReturnCode.SKIP;
+ return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP;
+ }
+
+
+ /**
+ * Pick the next cell that the scanner should seek to. Since this can skip any number of cells
+ * any of which can be a delete this can resurect old data.
+ *
+ * The method will only be used if canHint was set to true while creating the filter.
+ *
+ * @throws IOException This will never happen.
+ */
+ public Cell getNextCellHint(Cell currentCell) throws IOException {
+ if (!canHint) {
+ return null;
+ }
+
+ Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp());
+
+ if (nextTimestampObject == null) {
+ // This should only happen if the current column's
+ // timestamp is below the last one in the list.
+ //
+ // It should never happen as the filterKeyValue should return NEXT_COL
+ // but it's always better to be extra safe and protect against future
+ // behavioral changes.
+
+ return KeyValueUtil.createLastOnRowCol(currentCell);
+ }
+
+ // Since we know the nextTimestampObject isn't null here there must still be
+ // timestamps that can be included. Cast the Long to a long and return the
+ // a cell with the current row/cf/col and the next found timestamp.
+ long nextTimestamp = nextTimestampObject;
+ return KeyValueUtil.createFirstOnRowColTS(currentCell, nextTimestamp);
}
// Override here explicitly as the method in super class FilterBase might do a KeyValue recreate.
@@ -118,28 +168,30 @@ public class TimestampsFilter extends FilterBase {
/**
* @return The filter serialized using pb
*/
- public byte [] toByteArray() {
+ public byte[] toByteArray() {
FilterProtos.TimestampsFilter.Builder builder =
- FilterProtos.TimestampsFilter.newBuilder();
+ FilterProtos.TimestampsFilter.newBuilder();
builder.addAllTimestamps(this.timestamps);
+ builder.setCanHint(canHint);
return builder.build().toByteArray();
}
/**
* @param pbBytes A pb serialized {@link TimestampsFilter} instance
+ *
* @return An instance of {@link TimestampsFilter} made from <code>bytes</code>
- * @throws DeserializationException
* @see #toByteArray
*/
- public static TimestampsFilter parseFrom(final byte [] pbBytes)
- throws DeserializationException {
+ public static TimestampsFilter parseFrom(final byte[] pbBytes)
+ throws DeserializationException {
FilterProtos.TimestampsFilter proto;
try {
proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes);
} catch (InvalidProtocolBufferException e) {
throw new DeserializationException(e);
}
- return new TimestampsFilter(proto.getTimestampsList());
+ return new TimestampsFilter(proto.getTimestampsList(),
+ proto.hasCanHint() && proto.getCanHint());
}
/**
@@ -175,7 +227,7 @@ public class TimestampsFilter extends FilterBase {
}
}
- return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(),
- count, this.timestamps.size(), tsList.toString());
+ return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(),
+ count, this.timestamps.size(), tsList.toString(), canHint);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/396e4f15/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/FilterProtos.java
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/FilterProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/FilterProtos.java
index e558371..5b995a4 100644
--- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/FilterProtos.java
+++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/FilterProtos.java
@@ -13923,6 +13923,16 @@ public final class FilterProtos {
* <code>repeated int64 timestamps = 1 [packed = true];</code>
*/
long getTimestamps(int index);
+
+ // optional bool can_hint = 2;
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ boolean hasCanHint();
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ boolean getCanHint();
}
/**
* Protobuf type {@code hbase.pb.TimestampsFilter}
@@ -13996,6 +14006,11 @@ public final class FilterProtos {
input.popLimit(limit);
break;
}
+ case 16: {
+ bitField0_ |= 0x00000001;
+ canHint_ = input.readBool();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -14038,6 +14053,7 @@ public final class FilterProtos {
return PARSER;
}
+ private int bitField0_;
// repeated int64 timestamps = 1 [packed = true];
public static final int TIMESTAMPS_FIELD_NUMBER = 1;
private java.util.List<java.lang.Long> timestamps_;
@@ -14062,8 +14078,25 @@ public final class FilterProtos {
}
private int timestampsMemoizedSerializedSize = -1;
+ // optional bool can_hint = 2;
+ public static final int CAN_HINT_FIELD_NUMBER = 2;
+ private boolean canHint_;
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public boolean hasCanHint() {
+ return ((bitField0_ & 0x00000001) == 0x00000001);
+ }
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public boolean getCanHint() {
+ return canHint_;
+ }
+
private void initFields() {
timestamps_ = java.util.Collections.emptyList();
+ canHint_ = false;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -14084,6 +14117,9 @@ public final class FilterProtos {
for (int i = 0; i < timestamps_.size(); i++) {
output.writeInt64NoTag(timestamps_.get(i));
}
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ output.writeBool(2, canHint_);
+ }
getUnknownFields().writeTo(output);
}
@@ -14107,6 +14143,10 @@ public final class FilterProtos {
}
timestampsMemoizedSerializedSize = dataSize;
}
+ if (((bitField0_ & 0x00000001) == 0x00000001)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeBoolSize(2, canHint_);
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -14132,6 +14172,11 @@ public final class FilterProtos {
boolean result = true;
result = result && getTimestampsList()
.equals(other.getTimestampsList());
+ result = result && (hasCanHint() == other.hasCanHint());
+ if (hasCanHint()) {
+ result = result && (getCanHint()
+ == other.getCanHint());
+ }
result = result &&
getUnknownFields().equals(other.getUnknownFields());
return result;
@@ -14149,6 +14194,10 @@ public final class FilterProtos {
hash = (37 * hash) + TIMESTAMPS_FIELD_NUMBER;
hash = (53 * hash) + getTimestampsList().hashCode();
}
+ if (hasCanHint()) {
+ hash = (37 * hash) + CAN_HINT_FIELD_NUMBER;
+ hash = (53 * hash) + hashBoolean(getCanHint());
+ }
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
@@ -14260,6 +14309,8 @@ public final class FilterProtos {
super.clear();
timestamps_ = java.util.Collections.emptyList();
bitField0_ = (bitField0_ & ~0x00000001);
+ canHint_ = false;
+ bitField0_ = (bitField0_ & ~0x00000002);
return this;
}
@@ -14287,11 +14338,17 @@ public final class FilterProtos {
public org.apache.hadoop.hbase.protobuf.generated.FilterProtos.TimestampsFilter buildPartial() {
org.apache.hadoop.hbase.protobuf.generated.FilterProtos.TimestampsFilter result = new org.apache.hadoop.hbase.protobuf.generated.FilterProtos.TimestampsFilter(this);
int from_bitField0_ = bitField0_;
+ int to_bitField0_ = 0;
if (((bitField0_ & 0x00000001) == 0x00000001)) {
timestamps_ = java.util.Collections.unmodifiableList(timestamps_);
bitField0_ = (bitField0_ & ~0x00000001);
}
result.timestamps_ = timestamps_;
+ if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+ to_bitField0_ |= 0x00000001;
+ }
+ result.canHint_ = canHint_;
+ result.bitField0_ = to_bitField0_;
onBuilt();
return result;
}
@@ -14317,6 +14374,9 @@ public final class FilterProtos {
}
onChanged();
}
+ if (other.hasCanHint()) {
+ setCanHint(other.getCanHint());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -14410,6 +14470,39 @@ public final class FilterProtos {
return this;
}
+ // optional bool can_hint = 2;
+ private boolean canHint_ ;
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public boolean hasCanHint() {
+ return ((bitField0_ & 0x00000002) == 0x00000002);
+ }
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public boolean getCanHint() {
+ return canHint_;
+ }
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public Builder setCanHint(boolean value) {
+ bitField0_ |= 0x00000002;
+ canHint_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * <code>optional bool can_hint = 2;</code>
+ */
+ public Builder clearCanHint() {
+ bitField0_ = (bitField0_ & ~0x00000002);
+ canHint_ = false;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:hbase.pb.TimestampsFilter)
}
@@ -17503,18 +17596,18 @@ public final class FilterProtos {
"ompareType\022(\n\ncomparator\030\004 \002(\0132\024.hbase.p" +
"b.Comparator\022\031\n\021filter_if_missing\030\005 \001(\010\022" +
"\033\n\023latest_version_only\030\006 \001(\010\".\n\nSkipFilt" +
- "er\022 \n\006filter\030\001 \002(\0132\020.hbase.pb.Filter\"*\n\020" +
+ "er\022 \n\006filter\030\001 \002(\0132\020.hbase.pb.Filter\"<\n\020" +
"TimestampsFilter\022\026\n\ntimestamps\030\001 \003(\003B\002\020\001" +
- "\">\n\013ValueFilter\022/\n\016compare_filter\030\001 \002(\0132" +
- "\027.hbase.pb.CompareFilter\"4\n\020WhileMatchFi" +
- "lter\022 \n\006filter\030\001 \002(\0132\020.hbase.pb.Filter\"\021" +
- "\n\017FilterAllFilter\"h\n\010RowRange\022\021\n\tstart_r",
- "ow\030\001 \001(\014\022\033\n\023start_row_inclusive\030\002 \001(\010\022\020\n" +
- "\010stop_row\030\003 \001(\014\022\032\n\022stop_row_inclusive\030\004 " +
- "\001(\010\"A\n\023MultiRowRangeFilter\022*\n\016row_range_" +
- "list\030\001 \003(\0132\022.hbase.pb.RowRangeBB\n*org.ap" +
- "ache.hadoop.hbase.protobuf.generatedB\014Fi" +
- "lterProtosH\001\210\001\001\240\001\001"
+ "\022\020\n\010can_hint\030\002 \001(\010\">\n\013ValueFilter\022/\n\016com" +
+ "pare_filter\030\001 \002(\0132\027.hbase.pb.CompareFilt" +
+ "er\"4\n\020WhileMatchFilter\022 \n\006filter\030\001 \002(\0132\020" +
+ ".hbase.pb.Filter\"\021\n\017FilterAllFilter\"h\n\010R",
+ "owRange\022\021\n\tstart_row\030\001 \001(\014\022\033\n\023start_row_" +
+ "inclusive\030\002 \001(\010\022\020\n\010stop_row\030\003 \001(\014\022\032\n\022sto" +
+ "p_row_inclusive\030\004 \001(\010\"A\n\023MultiRowRangeFi" +
+ "lter\022*\n\016row_range_list\030\001 \003(\0132\022.hbase.pb." +
+ "RowRangeBB\n*org.apache.hadoop.hbase.prot" +
+ "obuf.generatedB\014FilterProtosH\001\210\001\001\240\001\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -17670,7 +17763,7 @@ public final class FilterProtos {
internal_static_hbase_pb_TimestampsFilter_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_hbase_pb_TimestampsFilter_descriptor,
- new java.lang.String[] { "Timestamps", });
+ new java.lang.String[] { "Timestamps", "CanHint", });
internal_static_hbase_pb_ValueFilter_descriptor =
getDescriptor().getMessageTypes().get(25);
internal_static_hbase_pb_ValueFilter_fieldAccessorTable = new
http://git-wip-us.apache.org/repos/asf/hbase/blob/396e4f15/hbase-protocol/src/main/protobuf/Filter.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/Filter.proto b/hbase-protocol/src/main/protobuf/Filter.proto
index 67d5717..1fa6697 100644
--- a/hbase-protocol/src/main/protobuf/Filter.proto
+++ b/hbase-protocol/src/main/protobuf/Filter.proto
@@ -146,6 +146,7 @@ message SkipFilter {
message TimestampsFilter {
repeated int64 timestamps = 1 [packed=true];
+ optional bool can_hint = 2;
}
message ValueFilter {
http://git-wip-us.apache.org/repos/asf/hbase/blob/396e4f15/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimestampFilterSeekHint.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimestampFilterSeekHint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimestampFilterSeekHint.java
new file mode 100644
index 0000000..58f0c56
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTimestampFilterSeekHint.java
@@ -0,0 +1,106 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.filter.TimestampsFilter;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import static org.junit.Assert.assertTrue;
+
+@Category({RegionServerTests.class, LargeTests.class})
+public class TestTimestampFilterSeekHint {
+
+ private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
+ private final static String RK = "myRK";
+ private final static byte[] RK_BYTES = Bytes.toBytes(RK);
+
+ private final static String FAMILY = "D";
+ private final static byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
+
+ private final static String QUAL = "0";
+ private final static byte[] QUAL_BYTES = Bytes.toBytes(QUAL);
+
+ public static final int MAX_VERSIONS = 50000;
+ private HRegion region;
+ private int regionCount = 0;
+
+ @Test
+ public void testGetSeek() throws IOException {
+ StoreFileScanner.instrument();
+ prepareRegion();
+
+ Get g = new Get(RK_BYTES);
+ final TimestampsFilter timestampsFilter = new TimestampsFilter(ImmutableList.of(5L), true);
+ g.setFilter(timestampsFilter);
+ final long initialSeekCount = StoreFileScanner.getSeekCount();
+ region.get(g);
+ final long finalSeekCount = StoreFileScanner.getSeekCount();
+
+ /*
+ Make sure there's more than one.
+ Aka one seek to get to the row, and one to get to the time.
+ */
+ assertTrue(finalSeekCount >= initialSeekCount + 3 );
+ }
+
+ @Test
+ public void testGetDoesntSeekWithNoHint() throws IOException {
+ StoreFileScanner.instrument();
+ prepareRegion();
+
+ Get g = new Get(RK_BYTES);
+ g.setFilter(new TimestampsFilter(ImmutableList.of(5L)));
+ final long initialSeekCount = StoreFileScanner.getSeekCount();
+ region.get(g);
+ final long finalSeekCount = StoreFileScanner.getSeekCount();
+
+ assertTrue(finalSeekCount >= initialSeekCount );
+ assertTrue(finalSeekCount < initialSeekCount + 3);
+ }
+
+ @Before
+ public void prepareRegion() throws IOException {
+ region =
+ TEST_UTIL.createTestRegion("TestTimestampFilterSeekHint" + regionCount++,
+ new HColumnDescriptor(FAMILY)
+ .setBlocksize(1024)
+ .setMaxVersions(MAX_VERSIONS)
+ );
+
+ for (long i = 0; i <MAX_VERSIONS - 2; i++) {
+ Put p = new Put(RK_BYTES, i);
+ p.addColumn(FAMILY_BYTES, QUAL_BYTES, Bytes.toBytes(RandomStringUtils.randomAlphabetic(255)));
+ region.put(p);
+ }
+ region.flush(true);
+ }
+}