You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by to...@apache.org on 2014/10/28 00:04:43 UTC
[4/7] git commit: Added bucket tests
Added bucket tests
Project: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/commit/dd84d6bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/tree/dd84d6bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/diff/dd84d6bd
Branch: refs/heads/key-row-sharding
Commit: dd84d6bd777352e811fc77574c013c88902a29fe
Parents: 5a56dd4
Author: Todd Nine <tn...@apigee.com>
Authored: Fri Oct 24 12:04:27 2014 -0600
Committer: Todd Nine <tn...@apigee.com>
Committed: Fri Oct 24 12:04:27 2014 -0600
----------------------------------------------------------------------
.../core/astyanax/BucketLocator.java | 67 -------------
.../persistence/core/hash/BucketLocator.java | 77 +++++++++++++++
.../core/hash/ExpandingBucketLocator.java | 99 ++++++++++++++++++++
.../core/hash/BucketLocatorTest.java | 66 +++++++++++++
.../core/hash/ExpandingBucketLocatorTest.java | 58 ++++++++++++
5 files changed, 300 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
deleted file mode 100644
index 49041f8..0000000
--- a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.apache.usergrid.persistence.core.astyanax;/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import com.google.common.hash.Funnel;
-import com.google.common.hash.HashCode;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-
-
-/**
- * Simple utility to locate which bucket an element should be in
- */
-public class BucketLocator<T> {
-
- /**
- * Use the murmur 3 hash
- */
- private static final HashFunction HASHER = Hashing.murmur3_128();
-
- private final int totalBuckets;
- private final Funnel<T> funnel;
-
-
- public BucketLocator( final int totalBuckets, final Funnel<T> funnel ) {
- this.totalBuckets = totalBuckets;
- this.funnel = funnel;
- }
-
-
- /**
- * Locate the bucket number given the value, the funnel and the total buckets.
- *
- * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that minimizes the
- * need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, n)} equals:
- *
- * <ul> <li>{@code n - 1}, with approximate probability {@code 1/n} <li>{@code consistentHash(h, n - 1)}, otherwise
- * (probability {@code 1 - 1/n}) </ul>
- *
- * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia article on consistent hashing</a>
- * for more information.
- */
- public int getBucket( T value ) {
-
- final HashCode hashCode = HASHER.hashObject( value, funnel );
-
- int owningIndex = Hashing.consistentHash( hashCode, totalBuckets );
-
- return owningIndex;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
new file mode 100644
index 0000000..52ffeba
--- /dev/null
+++ b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.usergrid.persistence.core.hash;
+
+
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+
+/**
+ * Simple utility to locate which bucket an element should be located based on it's funnel
+ *
+ */
+public class BucketLocator<T> {
+
+ /**
+ * Use the murmur 3 hash
+ */
+ private static final HashFunction HASHER = Hashing.murmur3_128();
+
+ private final int totalBuckets;
+ private final Funnel<T> funnel;
+
+
+ public BucketLocator(final Funnel<T> funnel, final int totalBuckets ) {
+ this.funnel = funnel;
+ this.totalBuckets = totalBuckets;
+ }
+
+
+ /**
+ * Locate the bucket number given the value, the funnel and the total buckets.
+ *
+ * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that minimizes the
+ * need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, n)} equals:
+ *
+ * <ul> <li>{@code n - 1}, with approximate probability {@code 1/n} <li>{@code consistentHash(h, n - 1)}, otherwise
+ * (probability {@code 1 - 1/n}) </ul>
+ *
+ * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia article on consistent hashing</a>
+ * for more information.
+ *
+ * <p>See <a href="http://arxiv.org/pdf/1406.2294v1.pdf">this paper</a> for more details on the algorithm</p>
+ *
+ *
+ * Note that after testing, increasing buckets does NOT yield the expected results. You will need an algorithm
+ * that manually walks a tree. See
+ *
+ */
+ public int getBucket( T value ) {
+
+ final HashCode hashCode = HASHER.hashObject( value, funnel );
+
+ int owningIndex = Hashing.consistentHash( hashCode, totalBuckets );
+
+ return owningIndex;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
new file mode 100644
index 0000000..c9bba2f
--- /dev/null
+++ b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
@@ -0,0 +1,99 @@
+/*
+ *
+ * * Licensed to the Apache Software Foundation (ASF) under one
+ * * or more contributor license agreements. See the NOTICE file
+ * * distributed with this work for additional information
+ * * regarding copyright ownership. The ASF licenses this file
+ * * to you under the Apache License, Version 2.0 (the
+ * * "License"); you may not use this file except in compliance
+ * * with the License. You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing,
+ * * software distributed under the License is distributed on an
+ * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * * KIND, either express or implied. See the License for the
+ * * specific language governing permissions and limitations
+ * * under the License.
+ *
+ */
+
+package org.apache.usergrid.persistence.core.hash;/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.google.common.hash.Funnel;
+
+
+/**
+ * An algorithm that will generate all possible keys for different "Levels" of sharding. For instance, imagine this scheme.
+ *
+ * 1 Shard
+ * 2 Shards
+ * 4 Shards
+ * 8 Shards
+ *
+ * (note that we do not need to expand by 2x each time, this is merely an example).
+ *
+ * When seeking on a string key, for 4 levels of the key, we get 4 different keys due to different shard sizes.
+ * This is faster than seeking ALL shards, since this would result in 15 shards, vs 4 in the example.
+ *
+ * @param <T>
+ */
+public class ExpandingBucketLocator<T> {
+
+ private final BucketLocator<T>[] bucketLocatorList;
+
+
+ /**
+ * Create a new instance with the specified history. For instance, from the javadoc above, the constructor
+ * would contains {8, 4, 3, 2, 1}. Shards are returned in the size order they are given in the constructor
+ * @param funnel
+ * @param bucketSizes
+ */
+ public ExpandingBucketLocator( final Funnel<T> funnel, final int... bucketSizes ) {
+
+ bucketLocatorList = new BucketLocator[bucketSizes.length];
+
+ for(int i = 0; i < bucketSizes.length; i ++){
+ bucketLocatorList[i] = new BucketLocator<>( funnel, bucketSizes[i] );
+ }
+
+ }
+
+
+ /**
+ * Hash the results, and return them in the same order as specified in the constructor
+ * @param hash
+ * @return
+ */
+ public int[] getBuckets(T hash){
+ int[] results = new int[bucketLocatorList.length];
+
+ for(int i = 0; i < bucketLocatorList.length; i ++){
+ results[i] = bucketLocatorList[i].getBucket( hash );
+ }
+
+ return results;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
new file mode 100644
index 0000000..49c306b
--- /dev/null
+++ b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.usergrid.persistence.core.hash;
+
+
+import java.nio.charset.Charset;
+
+import org.junit.Test;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.PrimitiveSink;
+
+import junit.framework.TestCase;
+
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Simple test that validates hashing is actually consistent as buckets grow
+ */
+public class BucketLocatorTest {
+
+ public static final Funnel<String> STRING_FUNNEL = new Funnel<String>() {
+
+ private Charset UTF8 = Charset.forName( "UTF8" );
+
+
+ @Override
+ public void funnel( final String from, final PrimitiveSink into ) {
+ into.putString( from, UTF8 );
+ }
+ };
+
+
+ @Test
+ public void stringHashing() {
+
+ final String hashValue = "keystring";
+
+ BucketLocator<String> bucketLocator1 = new BucketLocator<>(STRING_FUNNEL, 100 );
+
+ int index1 = bucketLocator1.getBucket( hashValue );
+
+ BucketLocator<String> bucketLocator2 = new BucketLocator<>( STRING_FUNNEL, 100 );
+
+ int index2 = bucketLocator2.getBucket( hashValue );
+
+ assertEquals( "Same index expected", index1, index2 );
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
new file mode 100644
index 0000000..32deb07
--- /dev/null
+++ b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
@@ -0,0 +1,58 @@
+/*
+ *
+ * * Licensed to the Apache Software Foundation (ASF) under one
+ * * or more contributor license agreements. See the NOTICE file
+ * * distributed with this work for additional information
+ * * regarding copyright ownership. The ASF licenses this file
+ * * to you under the Apache License, Version 2.0 (the
+ * * "License"); you may not use this file except in compliance
+ * * with the License. You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing,
+ * * software distributed under the License is distributed on an
+ * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * * KIND, either express or implied. See the License for the
+ * * specific language governing permissions and limitations
+ * * under the License.
+ *
+ */
+
+package org.apache.usergrid.persistence.core.hash;
+
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+
+/**
+ * Tests that we get consistent results when hashing
+ */
+public class ExpandingBucketLocatorTest extends TestCase {
+
+
+ @Test
+ public void testConsistency(){
+
+ final ExpandingBucketLocator<String> expandingBucketLocator1 = new ExpandingBucketLocator<>( BucketLocatorTest.STRING_FUNNEL, 20, 10, 1 );
+ final ExpandingBucketLocator<String> expandingBucketLocator2 = new ExpandingBucketLocator<>( BucketLocatorTest.STRING_FUNNEL, 20, 10, 1 );
+
+ final String key = "mytestkey";
+
+
+ int[] results1 = expandingBucketLocator1.getBuckets(key );
+ int[] results2 = expandingBucketLocator2.getBuckets(key );
+
+ assertTrue( "Same results returned", Arrays.equals(results1, results2));
+
+ assertTrue("Within bounds", results1[0] <= 19);
+ assertTrue("Within bounds", results1[1] <= 9);
+ assertTrue("Within bounds", results1[2] <= 0);
+
+
+ }
+}