You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@usergrid.apache.org by to...@apache.org on 2014/10/28 00:04:43 UTC

[4/7] git commit: Added bucket tests

Added bucket tests


Project: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/commit/dd84d6bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/tree/dd84d6bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-usergrid/diff/dd84d6bd

Branch: refs/heads/key-row-sharding
Commit: dd84d6bd777352e811fc77574c013c88902a29fe
Parents: 5a56dd4
Author: Todd Nine <tn...@apigee.com>
Authored: Fri Oct 24 12:04:27 2014 -0600
Committer: Todd Nine <tn...@apigee.com>
Committed: Fri Oct 24 12:04:27 2014 -0600

----------------------------------------------------------------------
 .../core/astyanax/BucketLocator.java            | 67 -------------
 .../persistence/core/hash/BucketLocator.java    | 77 +++++++++++++++
 .../core/hash/ExpandingBucketLocator.java       | 99 ++++++++++++++++++++
 .../core/hash/BucketLocatorTest.java            | 66 +++++++++++++
 .../core/hash/ExpandingBucketLocatorTest.java   | 58 ++++++++++++
 5 files changed, 300 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
deleted file mode 100644
index 49041f8..0000000
--- a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/astyanax/BucketLocator.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.apache.usergrid.persistence.core.astyanax;/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import com.google.common.hash.Funnel;
-import com.google.common.hash.HashCode;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-
-
-/**
- * Simple utility to locate which bucket an element should be in
- */
-public class BucketLocator<T> {
-
-    /**
-     * Use the murmur 3 hash
-     */
-    private static final HashFunction HASHER = Hashing.murmur3_128();
-
-    private final int totalBuckets;
-    private final Funnel<T> funnel;
-
-
-    public BucketLocator( final int totalBuckets, final Funnel<T> funnel ) {
-        this.totalBuckets = totalBuckets;
-        this.funnel = funnel;
-    }
-
-
-    /**
-     * Locate the bucket number given the value, the funnel and the total buckets.
-     *
-     * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that minimizes the
-     * need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, n)} equals:
-     *
-     * <ul> <li>{@code n - 1}, with approximate probability {@code 1/n} <li>{@code consistentHash(h, n - 1)}, otherwise
-     * (probability {@code 1 - 1/n}) </ul>
-     *
-     * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia article on consistent hashing</a>
-     * for more information.
-     */
-    public int getBucket( T value ) {
-
-        final HashCode hashCode = HASHER.hashObject( value, funnel );
-
-        int owningIndex = Hashing.consistentHash( hashCode, totalBuckets );
-
-        return owningIndex;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
new file mode 100644
index 0000000..52ffeba
--- /dev/null
+++ b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/BucketLocator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.usergrid.persistence.core.hash;
+
+
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+
+/**
+ * Simple utility to locate which bucket an element should be located based on it's funnel
+ *
+ */
+public class BucketLocator<T> {
+
+    /**
+     * Use the murmur 3 hash
+     */
+    private static final HashFunction HASHER = Hashing.murmur3_128();
+
+    private final int totalBuckets;
+    private final Funnel<T> funnel;
+
+
+    public BucketLocator(final Funnel<T> funnel, final int totalBuckets ) {
+        this.funnel = funnel;
+        this.totalBuckets = totalBuckets;
+    }
+
+
+    /**
+     * Locate the bucket number given the value, the funnel and the total buckets.
+     *
+     * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that minimizes the
+     * need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, n)} equals:
+     *
+     * <ul> <li>{@code n - 1}, with approximate probability {@code 1/n} <li>{@code consistentHash(h, n - 1)}, otherwise
+     * (probability {@code 1 - 1/n}) </ul>
+     *
+     * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia article on consistent hashing</a>
+     * for more information.
+     *
+     * <p>See <a href="http://arxiv.org/pdf/1406.2294v1.pdf">this paper</a> for more details on the algorithm</p>
+     *
+     *
+     * Note that after testing, increasing buckets does NOT yield the expected results.  You will need an algorithm
+     * that manually walks a tree.  See
+     *
+     */
+    public int getBucket( T value ) {
+
+        final HashCode hashCode = HASHER.hashObject( value, funnel );
+
+        int owningIndex = Hashing.consistentHash( hashCode, totalBuckets );
+
+        return owningIndex;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
new file mode 100644
index 0000000..c9bba2f
--- /dev/null
+++ b/stack/corepersistence/common/src/main/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocator.java
@@ -0,0 +1,99 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one
+ *  * or more contributor license agreements.  See the NOTICE file
+ *  * distributed with this work for additional information
+ *  * regarding copyright ownership.  The ASF licenses this file
+ *  * to you under the Apache License, Version 2.0 (the
+ *  * "License"); you may not use this file except in compliance
+ *  * with the License.  You may obtain a copy of the License at
+ *  *
+ *  *    http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing,
+ *  * software distributed under the License is distributed on an
+ *  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  * KIND, either express or implied.  See the License for the
+ *  * specific language governing permissions and limitations
+ *  * under the License.
+ *
+ */
+
+package org.apache.usergrid.persistence.core.hash;/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.google.common.hash.Funnel;
+
+
+/**
+ * An algorithm that will generate all possible keys for different "Levels" of sharding.  For instance, imagine this scheme.
+ *
+ * 1 Shard
+ * 2 Shards
+ * 4 Shards
+ * 8 Shards
+ *
+ * (note that we do not need to expand by 2x each time, this is merely an example).
+ *
+ * When seeking on a string key, for 4 levels of the key, we get 4 different keys due to different shard sizes.
+ * This is faster than seeking ALL shards, since this would result in 15 shards, vs 4 in the example.
+ *
+ * @param <T>
+ */
+public class ExpandingBucketLocator<T> {
+
+    private final BucketLocator<T>[] bucketLocatorList;
+
+
+    /**
+     * Create a new instance with the specified history. For instance, from the javadoc above, the constructor
+     * would contains {8, 4, 3, 2, 1}.  Shards are returned in the size order they are given in the constructor
+     * @param funnel
+     * @param bucketSizes
+     */
+    public ExpandingBucketLocator( final Funnel<T> funnel, final int... bucketSizes ) {
+
+        bucketLocatorList = new BucketLocator[bucketSizes.length];
+
+        for(int i = 0; i < bucketSizes.length; i ++){
+            bucketLocatorList[i] = new BucketLocator<>( funnel, bucketSizes[i] );
+        }
+
+    }
+
+
+    /**
+     * Hash the results, and return them in the same order as specified in the constructor
+     * @param hash
+     * @return
+     */
+    public int[] getBuckets(T hash){
+        int[] results = new int[bucketLocatorList.length];
+
+        for(int i = 0; i < bucketLocatorList.length; i ++){
+          results[i] = bucketLocatorList[i].getBucket( hash );
+        }
+
+        return results;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
new file mode 100644
index 0000000..49c306b
--- /dev/null
+++ b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/BucketLocatorTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.usergrid.persistence.core.hash;
+
+
+import java.nio.charset.Charset;
+
+import org.junit.Test;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.PrimitiveSink;
+
+import junit.framework.TestCase;
+
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Simple test that validates hashing is actually consistent as buckets grow
+ */
+public class BucketLocatorTest {
+
+    public static final Funnel<String> STRING_FUNNEL = new Funnel<String>() {
+
+        private Charset UTF8 = Charset.forName( "UTF8" );
+
+
+        @Override
+        public void funnel( final String from, final PrimitiveSink into ) {
+            into.putString( from, UTF8 );
+        }
+    };
+
+
+    @Test
+    public void stringHashing() {
+
+        final String hashValue = "keystring";
+
+        BucketLocator<String> bucketLocator1 = new BucketLocator<>(STRING_FUNNEL,  100 );
+
+        int index1 = bucketLocator1.getBucket( hashValue );
+
+        BucketLocator<String> bucketLocator2 = new BucketLocator<>( STRING_FUNNEL, 100 );
+
+        int index2 = bucketLocator2.getBucket( hashValue );
+
+        assertEquals( "Same index expected", index1, index2 );
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-usergrid/blob/dd84d6bd/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
----------------------------------------------------------------------
diff --git a/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
new file mode 100644
index 0000000..32deb07
--- /dev/null
+++ b/stack/corepersistence/common/src/test/java/org/apache/usergrid/persistence/core/hash/ExpandingBucketLocatorTest.java
@@ -0,0 +1,58 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one
+ *  * or more contributor license agreements.  See the NOTICE file
+ *  * distributed with this work for additional information
+ *  * regarding copyright ownership.  The ASF licenses this file
+ *  * to you under the Apache License, Version 2.0 (the
+ *  * "License"); you may not use this file except in compliance
+ *  * with the License.  You may obtain a copy of the License at
+ *  *
+ *  *    http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing,
+ *  * software distributed under the License is distributed on an
+ *  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  * KIND, either express or implied.  See the License for the
+ *  * specific language governing permissions and limitations
+ *  * under the License.
+ *
+ */
+
+package org.apache.usergrid.persistence.core.hash;
+
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+
+/**
+ * Tests that we get consistent results when hashing
+ */
+public class ExpandingBucketLocatorTest extends TestCase {
+
+
+    @Test
+    public void testConsistency(){
+
+        final ExpandingBucketLocator<String> expandingBucketLocator1 = new ExpandingBucketLocator<>( BucketLocatorTest.STRING_FUNNEL, 20, 10, 1 );
+        final ExpandingBucketLocator<String> expandingBucketLocator2 = new ExpandingBucketLocator<>( BucketLocatorTest.STRING_FUNNEL, 20, 10, 1 );
+
+        final String key = "mytestkey";
+
+
+        int[] results1 = expandingBucketLocator1.getBuckets(key  );
+        int[] results2 = expandingBucketLocator2.getBuckets(key  );
+
+        assertTrue( "Same results returned", Arrays.equals(results1, results2));
+
+        assertTrue("Within bounds", results1[0] <= 19);
+        assertTrue("Within bounds", results1[1] <= 9);
+        assertTrue("Within bounds", results1[2] <= 0);
+
+
+    }
+}