You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:01:18 UTC
svn commit: r1181352 - in /hbase/branches/0.89/src:
main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/filter/
main/java/org/apache/hadoop/hbase/io/
main/java/org/apache/hadoop/hbase/io/hfile/
main/java/org/apache/hadoop/hbase/regions...
Author: nspiegelberg
Date: Tue Oct 11 02:01:17 2011
New Revision: 1181352
URL: http://svn.apache.org/viewvc?rev=1181352&view=rev
Log:
Search optimization
Summary:
So this is the final diff for search optimization and the reseek feature. It
has been reviewed by the community and committed to public trunk. I am still
sending out an internal diff. Feel free to give further comments or suggestions.
Test Plan:
Written tests for HFile and ColumnPrefixFilter to check for correctness.
Performance measurement done.
DiffCamp Revision: 145905
Reviewed By: kranganathan
CC: davidrecordon, achao, kannan, kranganathan, hbase@lists
Tasks:
#277270: Search optimization
Revert Plan:
OK
Added:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnPrefixFilter.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java Tue Oct 11 02:01:17 2011
@@ -1581,6 +1581,31 @@ public class KeyValue implements Writabl
}
/**
+ * Create a KeyValue for the specified row, family and qualifier that would be
+ * smaller than all other possible KeyValues that have the same row,
+ * family, qualifier.
+ * Used for seeking.
+ * @param row row key
+ * @param roffset row offset
+ * @param rlength row length
+ * @param family family name
+ * @param foffset family offset
+ * @param flength family length
+ * @param qualifier column qualifier
+ * @param qoffset qualifier offset
+ * @param qlength qualifier length
+ * @return First possible key on passed Row, Family, Qualifier.
+ */
+ public static KeyValue createFirstOnRow(final byte [] row,
+ final int roffset, final int rlength, final byte [] family,
+ final int foffset, final int flength, final byte [] qualifier,
+ final int qoffset, final int qlength) {
+ return new KeyValue(row, roffset, rlength, family,
+ foffset, flength, qualifier, qoffset, qlength,
+ HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
+ }
+
+ /**
* @param b
* @return A KeyValue made of a byte array that holds the key-only part.
* Needed to convert hfile index members to KeyValues.
Added: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java?rev=1181352&view=auto
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java (added)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java Tue Oct 11 02:01:17 2011
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.Type;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
+
+/**
+ * This filter is used for selecting only those keys with columns that matches
+ * a particular prefix. For example, if prefix is 'an', it will pass keys will
+ * columns like 'and', 'anti' but not keys with columns like 'ball', 'act'.
+ */
+public class ColumnPrefixFilter extends FilterBase {
+ protected byte [] prefix = null;
+
+ public ColumnPrefixFilter() {
+ super();
+ }
+
+ public ColumnPrefixFilter(final byte [] prefix) {
+ this.prefix = prefix;
+ }
+
+ public byte[] getPrefix() {
+ return prefix;
+ }
+
+ @Override
+ public ReturnCode filterKeyValue(KeyValue kv) {
+ if (this.prefix == null || kv.getBuffer() == null) {
+ return ReturnCode.INCLUDE;
+ } else {
+ return filterColumn(kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
+ }
+ }
+
+ public ReturnCode filterColumn(byte[] buffer, int qualifierOffset, int qualifierLength) {
+ if (qualifierLength < prefix.length) {
+ int cmp = Bytes.compareTo(buffer, qualifierOffset, qualifierLength, this.prefix, 0,
+ qualifierLength);
+ if (cmp <= 0) {
+ return ReturnCode.SEEK_NEXT_USING_HINT;
+ } else {
+ return ReturnCode.NEXT_ROW;
+ }
+ } else {
+ int cmp = Bytes.compareTo(buffer, qualifierOffset, this.prefix.length, this.prefix, 0,
+ this.prefix.length);
+ if (cmp < 0) {
+ return ReturnCode.SEEK_NEXT_USING_HINT;
+ } else if (cmp > 0) {
+ return ReturnCode.NEXT_ROW;
+ } else {
+ return ReturnCode.INCLUDE;
+ }
+ }
+ }
+
+ public void write(DataOutput out) throws IOException {
+ Bytes.writeByteArray(out, this.prefix);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ this.prefix = Bytes.readByteArray(in);
+ }
+
+ public KeyValue getNextKeyHint(KeyValue kv) {
+ return KeyValue.createFirstOnRow(
+ kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
+ kv.getFamilyOffset(), kv.getFamilyLength(), prefix, 0, prefix.length);
+ }
+}
\ No newline at end of file
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/Filter.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/Filter.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/Filter.java Tue Oct 11 02:01:17 2011
@@ -110,7 +110,11 @@ public interface Filter extends Writable
* still be called.
*/
NEXT_ROW,
- }
+ /**
+ * Seek to next key which is given as hint by the filter.
+ */
+ SEEK_NEXT_USING_HINT,
+}
/**
* Chance to alter the list of keyvalues to be submitted.
@@ -136,4 +140,13 @@ public interface Filter extends Writable
*/
public boolean filterRow();
+ /**
+ * If the filter returns the match code SEEK_NEXT_USING_HINT, then
+ * it should also tell which is the next key it must seek to.
+ * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
+ * call this function to find out which key it must next seek to.
+ * @return KeyValue which must be next seeked. return null if the filter is
+ * not sure which key to seek to next.
+ */
+ public KeyValue getNextKeyHint(KeyValue currentKV);
}
\ No newline at end of file
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java Tue Oct 11 02:01:17 2011
@@ -110,4 +110,15 @@ public abstract class FilterBase impleme
public boolean filterRow() {
return false;
}
+
+ /**
+ * Filters that are not sure which key must be next seeked to, can inherit
+ * this implementation that, by default, returns a null KeyValue.
+ *
+ * @inheritDoc
+ */
+ public KeyValue getNextKeyHint(KeyValue currentKV) {
+ return null;
+ }
+
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java Tue Oct 11 02:01:17 2011
@@ -245,4 +245,9 @@ public class FilterList implements Filte
HbaseObjectWritable.writeObject(out, filter, Writable.class, conf);
}
}
+
+ @Override
+ public KeyValue getNextKeyHint(KeyValue currentKV) {
+ return null;
+ }
}
\ No newline at end of file
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java Tue Oct 11 02:01:17 2011
@@ -201,6 +201,37 @@ public class HalfStoreFileReader extends
return delegate.seekTo(key, offset, length);
}
+ @Override
+ public int reseekTo(byte[] key) throws IOException {
+ return reseekTo(key, 0, key.length);
+ }
+
+ @Override
+ public int reseekTo(byte[] key, int offset, int length)
+ throws IOException {
+ //This function is identical to the corresponding seekTo function except
+ //that we call reseekTo (and not seekTo) on the delegate.
+ if (top) {
+ if (getComparator().compare(key, offset, length, splitkey, 0,
+ splitkey.length) < 0) {
+ return -1;
+ }
+ } else {
+ if (getComparator().compare(key, offset, length, splitkey, 0,
+ splitkey.length) >= 0) {
+ // we would place the scanner in the second half.
+ // it might be an error to return false here ever...
+ boolean res = delegate.seekBefore(splitkey, 0, splitkey.length);
+ if (!res) {
+ throw new IOException("Seeking for a key in bottom of file, but" +
+ " key exists in top of file, failed on seekBefore(midkey)");
+ }
+ return 1;
+ }
+ }
+ return delegate.reseekTo(key, offset, length);
+ }
+
public org.apache.hadoop.hbase.io.hfile.HFile.Reader getReader() {
return this.delegate.getReader();
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java Tue Oct 11 02:01:17 2011
@@ -159,6 +159,7 @@ public class HbaseObjectWritable impleme
addToMap(WritableByteArrayComparable.class, code++);
addToMap(FirstKeyOnlyFilter.class, code++);
addToMap(DependentColumnFilter.class, code++);
+ addToMap(ColumnPrefixFilter.class, code++);
addToMap(Delete [].class, code++);
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Tue Oct 11 02:01:17 2011
@@ -30,7 +30,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
-import java.util.SortedSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -1256,13 +1255,37 @@ public class HFile {
return seekTo(key, 0, key.length);
}
-
public int seekTo(byte[] key, int offset, int length) throws IOException {
int b = reader.blockContainingKey(key, offset, length);
if (b < 0) return -1; // falls before the beginning of the file! :-(
// Avoid re-reading the same block (that'd be dumb).
- loadBlock(b);
+ loadBlock(b, true);
+ return blockSeek(key, offset, length, false);
+ }
+
+ public int reseekTo(byte [] key) throws IOException {
+ return reseekTo(key, 0, key.length);
+ }
+
+ public int reseekTo(byte[] key, int offset, int length)
+ throws IOException {
+
+ if (this.block != null && this.currKeyLen != 0) {
+ ByteBuffer bb = getKey();
+ int compared = this.reader.comparator.compare(key, offset, length,
+ bb.array(), bb.arrayOffset(), bb.limit());
+ if (compared < 1) {
+ //If the required key is less than or equal to current key, then
+ //don't do anything.
+ return compared;
+ }
+ }
+ int b = reader.blockContainingKey(key, offset, length);
+ if (b < 0) {
+ return -1;
+ }
+ loadBlock(b, false);
return blockSeek(key, offset, length, false);
}
@@ -1336,7 +1359,7 @@ public class HFile {
b--;
// TODO shortcut: seek forward in this block to the last key of the block.
}
- loadBlock(b);
+ loadBlock(b, true);
blockSeek(key, offset, length, true);
return true;
}
@@ -1377,7 +1400,7 @@ public class HFile {
return true;
}
- private void loadBlock(int bloc) throws IOException {
+ private void loadBlock(int bloc, boolean rewind) throws IOException {
if (block == null) {
block = reader.readBlock(bloc, this.cacheBlocks, this.pread);
currBlock = bloc;
@@ -1389,7 +1412,13 @@ public class HFile {
blockFetches++;
} else {
// we are already in the same block, just rewind to seek again.
- block.rewind();
+ if (rewind) {
+ block.rewind();
+ }
+ else {
+ //Go back by (size of rowlength + size of valuelength) = 8 bytes
+ block.position(block.position()-8);
+ }
}
}
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java Tue Oct 11 02:01:17 2011
@@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.io.hfile
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.SortedSet;
import org.apache.hadoop.hbase.KeyValue;
@@ -47,13 +46,38 @@ public interface HFileScanner {
* @return -1, if key < k[0], no position;
* 0, such that k[i] = key and scanner is left in position i; and
* 1, such that k[i] < key, and scanner is left in position i.
- * Furthermore, there may be a k[i+1], such that k[i] < key < k[i+1]
- * but there may not be a k[i+1], and next() will return false (EOF).
+ * The scanner will position itself between k[i] and k[i+1] where
+ * k[i] < key <= k[i+1].
+ * If there is no key k[i+1] greater than or equal to the input key, then the
+ * scanner will position itself at the end of the file and next() will return
+ * false when it is called.
* @throws IOException
*/
public int seekTo(byte[] key) throws IOException;
public int seekTo(byte[] key, int offset, int length) throws IOException;
/**
+ * Reseek to or just before the passed <code>key</code>. Similar to seekTo
+ * except that this can be called even if the scanner is not at the beginning
+ * of a file.
+ * This can be used to seek only to keys which come after the current position
+ * of the scanner.
+ * Consider the key stream of all the keys in the file,
+ * <code>k[0] .. k[n]</code>, where there are n keys in the file after
+ * current position of HFileScanner.
+ * The scanner will position itself between k[i] and k[i+1] where
+ * k[i] < key <= k[i+1].
+ * If there is no key k[i+1] greater than or equal to the input key, then the
+ * scanner will position itself at the end of the file and next() will return
+ * false when it is called.
+ * @param key Key to find (should be non-null)
+ * @return -1, if key < k[0], no position;
+ * 0, such that k[i] = key and scanner is left in position i; and
+ * 1, such that k[i] < key, and scanner is left in position i.
+ * @throws IOException
+ */
+ public int reseekTo(byte[] key) throws IOException;
+ public int reseekTo(byte[] key, int offset, int length) throws IOException;
+ /**
* Consider the key stream of all the keys in the file,
* <code>k[0] .. k[n]</code>, where there are n keys in the file.
* @param key Key to find
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java Tue Oct 11 02:01:17 2011
@@ -220,6 +220,33 @@ public class KeyValueHeap implements Key
return false;
}
+ public boolean reseek(KeyValue seekKey) throws IOException {
+ //This function is very identical to the seek(KeyValue) function except that
+ //scanner.seek(seekKey) is changed to scanner.reseek(seekKey)
+ if (this.current == null) {
+ return false;
+ }
+ this.heap.add(this.current);
+ this.current = null;
+
+ KeyValueScanner scanner;
+ while ((scanner = this.heap.poll()) != null) {
+ KeyValue topKey = scanner.peek();
+ if (comparator.getComparator().compare(seekKey, topKey) <= 0) {
+ // Top KeyValue is at-or-after Seek KeyValue
+ this.current = scanner;
+ return true;
+ }
+ if (!scanner.reseek(seekKey)) {
+ scanner.close();
+ } else {
+ this.heap.add(scanner);
+ }
+ }
+ // Heap is returning empty, scanner is done
+ return false;
+ }
+
/**
* @return the current Heap
*/
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java Tue Oct 11 02:01:17 2011
@@ -47,6 +47,16 @@ public interface KeyValueScanner {
public boolean seek(KeyValue key) throws IOException;
/**
+ * Reseek the scanner at or after the specified KeyValue.
+ * This method is guaranteed to seek to or before the required key only if the
+ * key comes after the current position of the scanner. Should not be used
+ * to seek to a key which may come before the current position.
+ * @param key seek value (should be non-null)
+ * @return true if scanner has values left, false if end of scanner
+ */
+ public boolean reseek(KeyValue key) throws IOException;
+
+ /**
* Close the KeyValue scanner.
*/
public void close();
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java Tue Oct 11 02:01:17 2011
@@ -20,7 +20,6 @@
package org.apache.hadoop.hbase.regionserver;
-import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.rmi.UnexpectedException;
@@ -563,6 +562,20 @@ public class MemStore implements HeapSiz
return lowest != null;
}
+ @Override
+ public boolean reseek(KeyValue key) {
+ while (kvsetNextRow != null &&
+ comparator.compare(kvsetNextRow, key) < 0) {
+ kvsetNextRow = getNext(kvsetIt);
+ }
+
+ while (snapshotNextRow != null &&
+ comparator.compare(snapshotNextRow, key) < 0) {
+ snapshotNextRow = getNext(snapshotIt);
+ }
+ return (kvsetNextRow != null || snapshotNextRow != null);
+ }
+
public synchronized KeyValue peek() {
//DebugPrint.println(" MS@" + hashCode() + " peek = " + getLowest());
return getLowest();
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java Tue Oct 11 02:01:17 2011
@@ -22,7 +22,6 @@ package org.apache.hadoop.hbase.regionse
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.io.hfile.HFile;
import java.io.IOException;
import java.util.List;
@@ -73,6 +72,10 @@ public class MinorCompactingStoreScanner
throw new UnsupportedOperationException("Can't seek a MinorCompactingStoreScanner");
}
+ public boolean reseek(KeyValue key) {
+ return seek(key);
+ }
+
/**
* High performance merge scan.
* @param writer
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 11 02:01:17 2011
@@ -125,7 +125,7 @@ public class ScanQueryMatcher {
// could optimize this, if necessary?
// Could also be called SEEK_TO_CURRENT_ROW, but this
// should be rare/never happens.
- return MatchCode.SKIP;
+ return MatchCode.SEEK_NEXT_ROW;
}
// optimize case.
@@ -150,7 +150,7 @@ public class ScanQueryMatcher {
long timestamp = kv.getTimestamp();
if (isExpired(timestamp)) {
// done, the rest of this column will also be expired as well.
- return MatchCode.SEEK_NEXT_COL;
+ return getNextRowOrNextColumn(bytes, offset, qualLength);
}
byte type = kv.getType();
@@ -194,6 +194,8 @@ public class ScanQueryMatcher {
} else if (filterResponse == ReturnCode.NEXT_ROW) {
stickyNextRow = true;
return MatchCode.SEEK_NEXT_ROW;
+ } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
}
}
@@ -272,6 +274,14 @@ public class ScanQueryMatcher {
return this.startKey;
}
+ public KeyValue getNextKeyHint(KeyValue kv) {
+ if (filter == null) {
+ return null;
+ } else {
+ return filter.getNextKeyHint(kv);
+ }
+ }
+
/**
* {@link #match} return codes. These instruct the scanner moving through
* memstores and StoreFiles what to do with the current KeyValue.
@@ -317,5 +327,10 @@ public class ScanQueryMatcher {
* Done with scan, thanks to the row filter.
*/
DONE_SCAN,
+
+ /*
+ * Seek to next key which is given as hint.
+ */
+ SEEK_NEXT_USING_HINT,
}
}
\ No newline at end of file
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java Tue Oct 11 02:01:17 2011
@@ -105,6 +105,20 @@ class StoreFileScanner implements KeyVal
}
}
+ public boolean reseek(KeyValue key) throws IOException {
+ try {
+ if (!reseekAtOrAfter(hfs, key)) {
+ close();
+ return false;
+ }
+ cur = hfs.getKeyValue();
+ hfs.next();
+ return true;
+ } catch (IOException ioe) {
+ throw new IOException("Could not seek " + this, ioe);
+ }
+ }
+
public void close() {
// Nothing to close on HFileScanner?
cur = null;
@@ -132,6 +146,19 @@ class StoreFileScanner implements KeyVal
return true;
}
+ static boolean reseekAtOrAfter(HFileScanner s, KeyValue k)
+ throws IOException {
+ //This function is similar to seekAtOrAfter function
+ int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength());
+ if (result <= 0) {
+ return true;
+ } else {
+ // passed KV is larger than current KV in file, if there is a next
+ // it is after, if not then this scanner is done.
+ return s.next();
+ }
+ }
+
// StoreFile filter hook.
public boolean shouldSeek(Scan scan, final SortedSet<byte[]> columns) {
return reader.shouldSeek(scan, columns);
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Tue Oct 11 02:01:17 2011
@@ -274,6 +274,15 @@ class StoreScanner implements KeyValueSc
this.heap.next();
break;
+ case SEEK_NEXT_USING_HINT:
+ KeyValue nextKV = matcher.getNextKeyHint(kv);
+ if (nextKV != null) {
+ reseek(nextKV);
+ } else {
+ heap.next();
+ }
+ break;
+
default:
throw new RuntimeException("UNEXPECTED");
}
@@ -319,18 +328,20 @@ class StoreScanner implements KeyValueSc
private void checkReseek() throws IOException {
if (this.heap == null && this.lastTop != null) {
-
- reseek(this.lastTop);
+ resetScannerStack(this.lastTop);
this.lastTop = null; // gone!
}
// else dont need to reseek
}
- private void reseek(KeyValue lastTopKey) throws IOException {
+ private void resetScannerStack(KeyValue lastTopKey) throws IOException {
if (heap != null) {
throw new RuntimeException("StoreScanner.reseek run on an existing heap!");
}
+ /* When we have the scan object, should we not pass it to getScanners()
+ * to get a limited set of scanners? We did so in the constructor and we
+ * could have done it now by storing the scan object from the constructor */
List<KeyValueScanner> scanners = getScanners();
for(KeyValueScanner scanner : scanners) {
@@ -345,4 +356,11 @@ class StoreScanner implements KeyValueSc
KeyValue kv = heap.peek();
matcher.setRow((kv == null ? lastTopKey : kv).getRow());
}
+
+ @Override
+ public synchronized boolean reseek(KeyValue kv) throws IOException {
+ //Heap cannot be null, because this is only called from next() which
+ //guarantees that heap will never be null before this call.
+ return this.heap.reseek(kv);
+ }
}
Added: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnPrefixFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnPrefixFilter.java?rev=1181352&view=auto
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnPrefixFilter.java (added)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/filter/TestColumnPrefixFilter.java Tue Oct 11 02:01:17 2011
@@ -0,0 +1,104 @@
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueTestUtil;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+
+public class TestColumnPrefixFilter {
+
+ private final static HBaseTestingUtility TEST_UTIL = new
+ HBaseTestingUtility();
+
+ @Test
+ public void testColumnPrefixFilter() throws IOException {
+ String family = "Family";
+ HTableDescriptor htd = new HTableDescriptor("TestColumnPrefixFilter");
+ htd.addFamily(new HColumnDescriptor(family));
+ HRegionInfo info = new HRegionInfo(htd, null, null, false);
+ HRegion region = HRegion.createHRegion(info, HBaseTestingUtility.
+ getTestDir(), TEST_UTIL.getConfiguration());
+
+ List<String> rows = generateRandomWords(100, "row");
+ List<String> columns = generateRandomWords(10000, "column");
+ long maxTimestamp = 2;
+
+ List<KeyValue> kvList = new ArrayList<KeyValue>();
+
+ Map<String, List<KeyValue>> prefixMap = new HashMap<String,
+ List<KeyValue>>();
+
+ prefixMap.put("p", new ArrayList<KeyValue>());
+ prefixMap.put("s", new ArrayList<KeyValue>());
+
+ String valueString = "ValueString";
+
+ for (String row: rows) {
+ Put p = new Put(Bytes.toBytes(row));
+ for (String column: columns) {
+ for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
+ KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
+ valueString);
+ p.add(kv);
+ kvList.add(kv);
+ for (String s: prefixMap.keySet()) {
+ if (column.startsWith(s)) {
+ prefixMap.get(s).add(kv);
+ }
+ }
+ }
+ }
+ region.put(p);
+ }
+
+ ColumnPrefixFilter filter;
+ Scan scan = new Scan();
+ scan.setMaxVersions();
+ for (String s: prefixMap.keySet()) {
+ filter = new ColumnPrefixFilter(Bytes.toBytes(s));
+ scan.setFilter(filter);
+ InternalScanner scanner = region.getScanner(scan);
+ List<KeyValue> results = new ArrayList<KeyValue>();
+ while(scanner.next(results));
+ assertEquals(prefixMap.get(s).size(), results.size());
+ }
+ }
+
+ List<String> generateRandomWords(int numberOfWords, String suffix) {
+ Set<String> wordSet = new HashSet<String>();
+ for (int i = 0; i < numberOfWords; i++) {
+ int lengthOfWords = (int) (Math.random()*2) + 1;
+ char[] wordChar = new char[lengthOfWords];
+ for (int j = 0; j < wordChar.length; j++) {
+ wordChar[j] = (char) (Math.random() * 26 + 97);
+ }
+ String word;
+ if (suffix == null) {
+ word = new String(wordChar);
+ } else {
+ word = new String(wordChar) + suffix;
+ }
+ wordSet.add(word);
+ }
+ List<String> wordList = new ArrayList<String>(wordSet);
+ return wordList;
+ }
+}
Added: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java?rev=1181352&view=auto
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java (added)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java Tue Oct 11 02:01:17 2011
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Test {@link HFileScanner#reseekTo(byte[])}
+ */
+public class TestReseekTo {
+
+ private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+ @Test
+ public void testReseekTo() throws Exception {
+
+ Path ncTFile = new Path(HBaseTestingUtility.getTestDir(), "basic.hfile");
+ FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
+ HFile.Writer writer = new HFile.Writer(fout, 4000, "none", null);
+ int numberOfKeys = 1000;
+
+ String valueString = "Value";
+
+ List<Integer> keyList = new ArrayList<Integer>();
+ List<String> valueList = new ArrayList<String>();
+
+ for (int key = 0; key < numberOfKeys; key++) {
+ String value = valueString + key;
+ keyList.add(key);
+ valueList.add(value);
+ writer.append(Bytes.toBytes(key), Bytes.toBytes(value));
+ }
+ writer.close();
+ fout.close();
+
+ HFile.Reader reader = new HFile.Reader(TEST_UTIL.getTestFileSystem(),
+ ncTFile, null, false);
+ reader.loadFileInfo();
+ HFileScanner scanner = reader.getScanner(false, true);
+
+ scanner.seekTo();
+ for (int i = 0; i < keyList.size(); i++) {
+ Integer key = keyList.get(i);
+ String value = valueList.get(i);
+ scanner.seekTo(Bytes.toBytes(key));
+ assertEquals(value, scanner.getValueString());
+ }
+
+ scanner.seekTo();
+ for (int i = 0; i < keyList.size(); i += 10) {
+ Integer key = keyList.get(i);
+ String value = valueList.get(i);
+ scanner.reseekTo(Bytes.toBytes(key));
+ assertEquals(value, scanner.getValueString());
+ }
+ }
+
+}
\ No newline at end of file
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java Tue Oct 11 02:01:17 2011
@@ -95,6 +95,11 @@ public class KeyValueScanFixture impleme
}
@Override
+ public boolean reseek(KeyValue key) {
+ return seek(key);
+ }
+
+ @Override
public void close() {
// noop.
}
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java Tue Oct 11 02:01:17 2011
@@ -27,7 +27,6 @@ import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hbase.HBaseTestCase;
-import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
@@ -255,6 +254,11 @@ public class TestKeyValueHeap extends HB
}
return false;
}
+
+ @Override
+ public boolean reseek(KeyValue key) throws IOException {
+ return seek(key);
+ }
}
}
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java?rev=1181352&r1=1181351&r2=1181352&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java Tue Oct 11 02:01:17 2011
@@ -185,7 +185,7 @@ public class TestQueryMatcher extends HB
ScanQueryMatcher.MatchCode.INCLUDE,
ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.INCLUDE,
- ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
+ ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW,
ScanQueryMatcher.MatchCode.DONE
};