You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2015/10/05 22:51:18 UTC
accumulo git commit: ACCUMULO-4017 use boyer-moore sub-string search
Repository: accumulo
Updated Branches:
refs/heads/master 647cfb74f -> 29c7eeb6b
ACCUMULO-4017 use boyer-moore sub-string search
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/29c7eeb6
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/29c7eeb6
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/29c7eeb6
Branch: refs/heads/master
Commit: 29c7eeb6b375090164e77b61806177781febcead
Parents: 647cfb7
Author: Eric C. Newton <er...@gmail.com>
Authored: Mon Oct 5 16:51:01 2015 -0400
Committer: Eric C. Newton <er...@gmail.com>
Committed: Mon Oct 5 16:51:01 2015 -0400
----------------------------------------------------------------------
.../core/iterators/user/GrepIterator.java | 51 +++++++++-----------
1 file changed, 23 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/29c7eeb6/core/src/main/java/org/apache/accumulo/core/iterators/user/GrepIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/user/GrepIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/user/GrepIterator.java
index 043a729..d3efe2f 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/user/GrepIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/user/GrepIterator.java
@@ -36,46 +36,35 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
public class GrepIterator extends Filter {
private byte term[];
+ private int right[] = new int[256];
@Override
public boolean accept(Key k, Value v) {
return match(v.get()) || match(k.getRowData()) || match(k.getColumnFamilyData()) || match(k.getColumnQualifierData());
}
- private boolean match(ByteSequence bs) {
- return indexOf(bs.getBackingArray(), bs.offset(), bs.length(), term) >= 0;
+ protected boolean match(ByteSequence bs) {
+ return indexOf(bs.getBackingArray(), bs.offset(), bs.length()) >= 0;
}
- private boolean match(byte[] ba) {
- return indexOf(ba, 0, ba.length, term) >= 0;
+ protected boolean match(byte[] ba) {
+ return indexOf(ba, 0, ba.length) >= 0;
}
- // copied code below from java string and modified
-
- private static int indexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target) {
- byte first = target[0];
- int targetCount = target.length;
- int max = sourceOffset + (sourceCount - targetCount);
-
- for (int i = sourceOffset; i <= max; i++) {
- /* Look for first character. */
- if (source[i] != first) {
- while (++i <= max && source[i] != first)
- continue;
- }
-
- /* Found first character, now look at the rest of v2 */
- if (i <= max) {
- int j = i + 1;
- int end = j + targetCount - 1;
- for (int k = 1; j < end && source[j] == target[k]; j++, k++)
- continue;
-
- if (j == end) {
- /* Found whole string. */
- return i - sourceOffset;
+ protected int indexOf(byte[] value, int offset, int length) {
+ final int M = term.length;
+ final int N = offset + length;
+ int skip;
+ for (int i = offset; i <= N - M; i += skip) {
+ skip = 0;
+ for (int j = M - 1; j >= 0; j--) {
+ if (term[j] != value[i + j]) {
+ skip = Math.max(1, j - right[value[i + j]]);
}
}
+ if (skip == 0) {
+ return i;
+ }
}
return -1;
}
@@ -91,6 +80,12 @@ public class GrepIterator extends Filter {
public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
super.init(source, options, env);
term = options.get("term").getBytes(UTF_8);
+ for (int i = 0; i < right.length; i++) {
+ right[i] = -1;
+ }
+ for (int i = 0; i < term.length; i++) {
+ right[term[i] & 0xff] = i;
+ }
}
/**