You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by sb...@apache.org on 2016/02/18 15:51:57 UTC
[3/5] incubator-geode git commit: GEODE-970: Remove duplicate HLL
classes
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/IBuilder.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/IBuilder.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/IBuilder.java
new file mode 100755
index 0000000..bff40cd
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/IBuilder.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public interface IBuilder<T> {
+
+ T build();
+
+ int sizeof();
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/ICardinality.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/ICardinality.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/ICardinality.java
new file mode 100755
index 0000000..1671fe5
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/ICardinality.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.IOException;
+
+
+public interface ICardinality {
+
+ /**
+ * @param o stream element
+ * @return false if the value returned by cardinality() is unaffected by the appearance of o in the stream.
+ */
+ boolean offer(Object o);
+
+ /**
+ * Offer the value as a hashed long value
+ *
+ * @param hashedLong - the hash of the item to offer to the estimator
+ * @return false if the value returned by cardinality() is unaffected by the appearance of hashedLong in the stream
+ */
+ boolean offerHashed(long hashedLong);
+
+ /**
+ * Offer the value as a hashed long value
+ *
+ * @param hashedInt - the hash of the item to offer to the estimator
+ * @return false if the value returned by cardinality() is unaffected by the appearance of hashedInt in the stream
+ */
+ boolean offerHashed(int hashedInt);
+
+ /**
+ * @return the number of unique elements in the stream or an estimate thereof
+ */
+ long cardinality();
+
+ /**
+ * @return size in bytes needed for serialization
+ */
+ int sizeof();
+
+ /**
+ * @return byte[]
+ * @throws IOException
+ */
+ byte[] getBytes() throws IOException;
+
+ /**
+ * Merges estimators to produce a new estimator for the combined streams
+ * of this estimator and those passed as arguments.
+ * <p/>
+ * Nor this estimator nor the one passed as parameters are modified.
+ *
+ * @param estimators Zero or more compatible estimators
+ * @throws CardinalityMergeException If at least one of the estimators is not compatible with this one
+ */
+ ICardinality merge(ICardinality... estimators) throws CardinalityMergeException;
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/MurmurHash.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/MurmurHash.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/MurmurHash.java
new file mode 100755
index 0000000..bab802e
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/MurmurHash.java
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup. See http://murmurhash.googlepages.com/ for more details.
+ * <p/>
+ * <p>
+ * The C version of MurmurHash 2.0 found at that site was ported to Java by
+ * Andrzej Bialecki (ab at getopt org).
+ * </p>
+ */
+public class MurmurHash
+{
+ public static int hash(Object o)
+ {
+ if (o == null)
+ {
+ return 0;
+ }
+ if (o instanceof Long)
+ {
+ return hashLong((Long) o);
+ }
+ if (o instanceof Integer)
+ {
+ return hashLong((Integer) o);
+ }
+ if (o instanceof Double)
+ {
+ return hashLong(Double.doubleToRawLongBits((Double) o));
+ }
+ if (o instanceof Float)
+ {
+ return hashLong(Float.floatToRawIntBits((Float) o));
+ }
+ if (o instanceof String)
+ {
+ return hash(((String) o).getBytes());
+ }
+ if (o instanceof byte[])
+ {
+ return hash((byte[]) o);
+ }
+ return hash(o.toString());
+ }
+
+ public static int hash(byte[] data)
+ {
+ return hash(data, 0, data.length, -1);
+ }
+
+ public static int hash(byte[] data, int seed)
+ {
+ return hash(data, 0, data.length, seed);
+ }
+
+ public static int hash(byte[] data, int offset, int length, int seed)
+ {
+ int m = 0x5bd1e995;
+ int r = 24;
+
+ int h = seed ^ length;
+
+ int len_4 = length >> 2;
+
+ for (int i = 0; i < len_4; i++)
+ {
+ int i_4 = i << 2;
+ int k = data[offset + i_4 + 3];
+ k = k << 8;
+ k = k | (data[offset + i_4 + 2] & 0xff);
+ k = k << 8;
+ k = k | (data[offset + i_4 + 1] & 0xff);
+ k = k << 8;
+ k = k | (data[offset + i_4 + 0] & 0xff);
+ k *= m;
+ k ^= k >>> r;
+ k *= m;
+ h *= m;
+ h ^= k;
+ }
+
+ // avoid calculating modulo
+ int len_m = len_4 << 2;
+ int left = length - len_m;
+
+ if (left != 0)
+ {
+ if (left >= 3)
+ {
+ h ^= (int) data[offset + length - 3] << 16;
+ }
+ if (left >= 2)
+ {
+ h ^= (int) data[offset + length - 2] << 8;
+ }
+ if (left >= 1)
+ {
+ h ^= (int) data[offset + length - 1];
+ }
+
+ h *= m;
+ }
+
+ h ^= h >>> 13;
+ h *= m;
+ h ^= h >>> 15;
+
+ return h;
+ }
+
+ public static int hashLong(long data)
+ {
+ int m = 0x5bd1e995;
+ int r = 24;
+
+ int h = 0;
+
+ int k = (int) data * m;
+ k ^= k >>> r;
+ h ^= k * m;
+
+ k = (int) (data >> 32) * m;
+ k ^= k >>> r;
+ h *= m;
+ h ^= k * m;
+
+ h ^= h >>> 13;
+ h *= m;
+ h ^= h >>> 15;
+
+ return h;
+ }
+
+ public static long hash64(Object o)
+ {
+ if (o == null)
+ {
+ return 0l;
+ }
+ else if (o instanceof String)
+ {
+ final byte[] bytes = ((String) o).getBytes();
+ return hash64(bytes, bytes.length);
+ }
+ else if (o instanceof byte[])
+ {
+ final byte[] bytes = (byte[]) o;
+ return hash64(bytes, bytes.length);
+ }
+ return hash64(o.toString());
+ }
+
+ // 64 bit implementation copied from here: https://github.com/tnm/murmurhash-java
+
+ /**
+ * Generates 64 bit hash from byte array with default seed value.
+ *
+ * @param data byte array to hash
+ * @param length length of the array to hash
+ * @return 64 bit hash of the given string
+ */
+ public static long hash64(final byte[] data, int length)
+ {
+ return hash64(data, length, 0xe17a1465);
+ }
+
+
+ /**
+ * Generates 64 bit hash from byte array of the given length and seed.
+ *
+ * @param data byte array to hash
+ * @param length length of the array to hash
+ * @param seed initial seed value
+ * @return 64 bit hash of the given array
+ */
+ public static long hash64(final byte[] data, int length, int seed)
+ {
+ final long m = 0xc6a4a7935bd1e995L;
+ final int r = 47;
+
+ long h = (seed & 0xffffffffl) ^ (length * m);
+
+ int length8 = length / 8;
+
+ for (int i = 0; i < length8; i++)
+ {
+ final int i8 = i * 8;
+ long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
+ + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
+ + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
+ + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
+
+ k *= m;
+ k ^= k >>> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ switch (length % 8)
+ {
+ case 7:
+ h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
+ case 6:
+ h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
+ case 5:
+ h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
+ case 4:
+ h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
+ case 3:
+ h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
+ case 2:
+ h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
+ case 1:
+ h ^= (long) (data[length & ~7] & 0xff);
+ h *= m;
+ }
+ ;
+
+ h ^= h >>> r;
+ h *= m;
+ h ^= h >>> r;
+
+ return h;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/RegisterSet.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/RegisterSet.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/RegisterSet.java
new file mode 100755
index 0000000..e1a1944
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/RegisterSet.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2012 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class RegisterSet {
+
+ public final static int LOG2_BITS_PER_WORD = 6;
+ public final static int REGISTER_SIZE = 5;
+
+ public final int count;
+ public final int size;
+
+ private final int[] M;
+
+ public RegisterSet(int count) {
+ this(count, null);
+ }
+
+ public RegisterSet(int count, int[] initialValues) {
+ this.count = count;
+
+ if (initialValues == null) {
+ this.M = new int[getSizeForCount(count)];
+ } else {
+ this.M = initialValues;
+ }
+ this.size = this.M.length;
+ }
+
+ public static int getBits(int count) {
+ return count / LOG2_BITS_PER_WORD;
+ }
+
+ public static int getSizeForCount(int count) {
+ int bits = getBits(count);
+ if (bits == 0) {
+ return 1;
+ } else if (bits % Integer.SIZE == 0) {
+ return bits;
+ } else {
+ return bits + 1;
+ }
+ }
+
+ public void set(int position, int value) {
+ int bucketPos = position / LOG2_BITS_PER_WORD;
+ int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
+ this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift);
+ }
+
+ public int get(int position) {
+ int bucketPos = position / LOG2_BITS_PER_WORD;
+ int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
+ return (this.M[bucketPos] & (0x1f << shift)) >>> shift;
+ }
+
+ public boolean updateIfGreater(int position, int value) {
+ int bucket = position / LOG2_BITS_PER_WORD;
+ int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
+ int mask = 0x1f << shift;
+
+ // Use long to avoid sign issues with the left-most shift
+ long curVal = this.M[bucket] & mask;
+ long newVal = value << shift;
+ if (curVal < newVal) {
+ this.M[bucket] = (int) ((this.M[bucket] & ~mask) | newVal);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void merge(RegisterSet that) {
+ for (int bucket = 0; bucket < M.length; bucket++) {
+ int word = 0;
+ for (int j = 0; j < LOG2_BITS_PER_WORD; j++) {
+ int mask = 0x1f << (REGISTER_SIZE * j);
+
+ int thisVal = (this.M[bucket] & mask);
+ int thatVal = (that.M[bucket] & mask);
+ word |= (thisVal < thatVal) ? thatVal : thisVal;
+ }
+ this.M[bucket] = word;
+ }
+ }
+
+ int[] readOnlyBits() {
+ return M;
+ }
+
+ public int[] bits() {
+ int[] copy = new int[size];
+ System.arraycopy(M, 0, copy, 0, M.length);
+ return copy;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/RegionProvider.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/RegionProvider.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/RegionProvider.java
index f9539e5..dcd83cb 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/RegionProvider.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/RegionProvider.java
@@ -45,7 +45,7 @@ import com.gemstone.gemfire.internal.cache.GemFireCacheImpl;
import com.gemstone.gemfire.internal.redis.executor.ExpirationExecutor;
import com.gemstone.gemfire.internal.redis.executor.ListQuery;
import com.gemstone.gemfire.internal.redis.executor.SortedSetQuery;
-import com.gemstone.gemfire.internal.redis.executor.hll.HyperLogLogPlus;
+import com.gemstone.gemfire.internal.hll.HyperLogLogPlus;
import com.gemstone.gemfire.management.cli.Result;
import com.gemstone.gemfire.management.cli.Result.Status;
import com.gemstone.gemfire.management.internal.cli.commands.CreateAlterDestroyRegionCommands;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/Bits.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/Bits.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/Bits.java
deleted file mode 100755
index b204442..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/Bits.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gemstone.gemfire.internal.redis.executor.hll;
-
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.IOException;
-
-public class Bits {
-
- public static int[] getBits(byte[] mBytes) throws IOException {
- int bitSize = mBytes.length / 4;
- int[] bits = new int[bitSize];
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
- for (int i = 0; i < bitSize; i++) {
- bits[i] = dis.readInt();
- }
- return bits;
- }
-
- /**
- * This method might be better described as
- * "byte array to int array" or "data input to int array"
- */
- public static int[] getBits(DataInput dataIn, int byteLength) throws IOException {
- int bitSize = byteLength / 4;
- int[] bits = new int[bitSize];
- for (int i = 0; i < bitSize; i++) {
- bits[i] = dataIn.readInt();
- }
- return bits;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/CardinalityMergeException.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/CardinalityMergeException.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/CardinalityMergeException.java
deleted file mode 100755
index 1391779..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/CardinalityMergeException.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gemstone.gemfire.internal.redis.executor.hll;
-
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-@SuppressWarnings("serial")
-public abstract class CardinalityMergeException extends Exception {
-
- public CardinalityMergeException(String message) {
- super(message);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/HyperLogLog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/HyperLogLog.java
deleted file mode 100755
index c857db4..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/redis/executor/hll/HyperLogLog.java
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gemstone.gemfire.internal.redis.executor.hll;
-
-/*
- * Copyright (C) 2012 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.Externalizable;
-import java.io.IOException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.io.Serializable;
-
-/**
- * Java implementation of HyperLogLog (HLL) algorithm from this paper:
- * <p/>
- * http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
- * <p/>
- * HLL is an improved version of LogLog that is capable of estimating
- * the cardinality of a set with accuracy = 1.04/sqrt(m) where
- * m = 2^b. So we can control accuracy vs space usage by increasing
- * or decreasing b.
- * <p/>
- * The main benefit of using HLL over LL is that it only requires 64%
- * of the space that LL does to get the same accuracy.
- * <p/>
- * This implementation implements a single counter. If a large (millions)
- * number of counters are required you may want to refer to:
- * <p/>
- * http://dsiutils.dsi.unimi.it/
- * <p/>
- * It has a more complex implementation of HLL that supports multiple counters
- * in a single object, drastically reducing the java overhead from creating
- * a large number of objects.
- * <p/>
- * This implementation leveraged a javascript implementation that Yammer has
- * been working on:
- * <p/>
- * https://github.com/yammer/probablyjs
- * <p>
- * Note that this implementation does not include the long range correction function
- * defined in the original paper. Empirical evidence shows that the correction
- * function causes more harm than good.
- * </p>
- * <p/>
- * <p>
- * Users have different motivations to use different types of hashing functions.
- * Rather than try to keep up with all available hash functions and to remove
- * the concern of causing future binary incompatibilities this class allows clients
- * to offer the value in hashed int or long form. This way clients are free
- * to change their hash function on their own time line. We recommend using Google's
- * Guava Murmur3_128 implementation as it provides good performance and speed when
- * high precision is required. In our tests the 32bit MurmurHash function included
- * in this project is faster and produces better results than the 32 bit murmur3
- * implementation google provides.
- * </p>
- */
-public class HyperLogLog implements ICardinality, Serializable {
-
- private static final long serialVersionUID = -4661220245111112301L;
- private final RegisterSet registerSet;
- private final int log2m;
- private final double alphaMM;
-
-
- /**
- * Create a new HyperLogLog instance using the specified standard deviation.
- *
- * @param rsd - the relative standard deviation for the counter.
- * smaller values create counters that require more space.
- */
- public HyperLogLog(double rsd) {
- this(log2m(rsd));
- }
-
- private static int log2m(double rsd) {
- return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
- }
-
- /**
- * Create a new HyperLogLog instance. The log2m parameter defines the accuracy of
- * the counter. The larger the log2m the better the accuracy.
- * <p/>
- * accuracy = 1.04/sqrt(2^log2m)
- *
- * @param log2m - the number of bits to use as the basis for the HLL instance
- */
- public HyperLogLog(int log2m) {
- this(log2m, new RegisterSet(1 << log2m));
- }
-
- /**
- * Creates a new HyperLogLog instance using the given registers. Used for unmarshalling a serialized
- * instance and for merging multiple counters together.
- *
- * @param registerSet - the initial values for the register set
- */
- @Deprecated
- public HyperLogLog(int log2m, RegisterSet registerSet) {
- if (log2m < 0 || log2m > 30) {
- throw new IllegalArgumentException("log2m argument is "
- + log2m + " and is outside the range [0, 30]");
- }
- this.registerSet = registerSet;
- this.log2m = log2m;
- int m = 1 << this.log2m;
-
- alphaMM = getAlphaMM(log2m, m);
- }
-
- @Override
- public boolean offerHashed(long hashedValue) {
- // j becomes the binary address determined by the first b log2m of x
- // j will be between 0 and 2^log2m
- final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
- final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
- return registerSet.updateIfGreater(j, r);
- }
-
- @Override
- public boolean offerHashed(int hashedValue) {
- // j becomes the binary address determined by the first b log2m of x
- // j will be between 0 and 2^log2m
- final int j = hashedValue >>> (Integer.SIZE - log2m);
- final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
- return registerSet.updateIfGreater(j, r);
- }
-
- @Override
- public boolean offer(Object o) {
- final int x = MurmurHash.hash(o);
- return offerHashed(x);
- }
-
-
- @Override
- public long cardinality() {
- double registerSum = 0;
- int count = registerSet.count;
- double zeros = 0.0;
- for (int j = 0; j < registerSet.count; j++) {
- int val = registerSet.get(j);
- registerSum += 1.0 / (1 << val);
- if (val == 0) {
- zeros++;
- }
- }
-
- double estimate = alphaMM * (1 / registerSum);
-
- if (estimate <= (5.0 / 2.0) * count) {
- // Small Range Estimate
- return Math.round(linearCounting(count, zeros));
- } else {
- return Math.round(estimate);
- }
- }
-
- @Override
- public int sizeof() {
- return registerSet.size * 4;
- }
-
- @Override
- public byte[] getBytes() throws IOException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutput dos = new DataOutputStream(baos);
- writeBytes(dos);
-
- return baos.toByteArray();
- }
-
- private void writeBytes(DataOutput serializedByteStream) throws IOException {
- serializedByteStream.writeInt(log2m);
- serializedByteStream.writeInt(registerSet.size * 4);
- for (int x : registerSet.readOnlyBits()) {
- serializedByteStream.writeInt(x);
- }
- }
-
- /**
- * Add all the elements of the other set to this set.
- * <p/>
- * This operation does not imply a loss of precision.
- *
- * @param other A compatible Hyperloglog instance (same log2m)
- * @throws CardinalityMergeException if other is not compatible
- */
- public void addAll(HyperLogLog other) throws CardinalityMergeException {
- if (this.sizeof() != other.sizeof()) {
- throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
- }
-
- registerSet.merge(other.registerSet);
- }
-
- @Override
- public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
- HyperLogLog merged = new HyperLogLog(HllExecutor.DEFAULT_HLL_STD_DEV);//new HyperLogLog(log2m, new RegisterSet(this.registerSet.count));
- merged.addAll(this);
-
- if (estimators == null) {
- return merged;
- }
-
- for (ICardinality estimator : estimators) {
- if (!(estimator instanceof HyperLogLog)) {
- throw new HyperLogLogMergeException("Cannot merge estimators of different class");
- }
- HyperLogLog hll = (HyperLogLog) estimator;
- merged.addAll(hll);
- }
-
- return merged;
- }
-
- private Object writeReplace() {
- return new SerializationHolder(this);
- }
-
- /**
- * This class exists to support Externalizable semantics for
- * HyperLogLog objects without having to expose a public
- * constructor, public write/read methods, or pretend final
- * fields aren't final.
- *
- * In short, Externalizable allows you to skip some of the more
- * verbose meta-data default Serializable gets you, but still
- * includes the class name. In that sense, there is some cost
- * to this holder object because it has a longer class name. I
- * imagine people who care about optimizing for that have their
- * own work-around for long class names in general, or just use
- * a custom serialization framework. Therefore we make no attempt
- * to optimize that here (eg. by raising this from an inner class
- * and giving it an unhelpful name).
- */
- private static class SerializationHolder implements Externalizable {
-
- HyperLogLog hyperLogLogHolder;
-
- public SerializationHolder(HyperLogLog hyperLogLogHolder) {
- this.hyperLogLogHolder = hyperLogLogHolder;
- }
-
- /**
- * required for Externalizable
- */
- @SuppressWarnings("unused")
- public SerializationHolder() {
-
- }
-
- @Override
- public void writeExternal(ObjectOutput out) throws IOException {
- hyperLogLogHolder.writeBytes(out);
- }
-
- @Override
- public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
- hyperLogLogHolder = Builder.build(in);
- }
-
- private Object readResolve() {
- return hyperLogLogHolder;
- }
- }
-
- public static class Builder implements IBuilder<ICardinality>, Serializable {
-
- private static final long serialVersionUID = -979314356097156719L;
- private double rsd;
-
- public Builder(double rsd) {
- this.rsd = rsd;
- }
-
- @Override
- public HyperLogLog build() {
- return new HyperLogLog(rsd);
- }
-
- @Override
- public int sizeof() {
- int log2m = log2m(rsd);
- int k = 1 << log2m;
- return RegisterSet.getBits(k) * 4;
- }
-
- public static HyperLogLog build(byte[] bytes) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- return build(new DataInputStream(bais));
- }
-
- public static HyperLogLog build(DataInput serializedByteStream) throws IOException {
- int log2m = serializedByteStream.readInt();
- int byteArraySize = serializedByteStream.readInt();
- return new HyperLogLog(log2m,
- new RegisterSet(1 << log2m, Bits.getBits(serializedByteStream, byteArraySize)));
- }
- }
-
- @SuppressWarnings("serial")
- protected static class HyperLogLogMergeException extends CardinalityMergeException {
-
- public HyperLogLogMergeException(String message) {
- super(message);
- }
- }
-
- protected static double getAlphaMM(final int p, final int m) {
- // See the paper.
- switch (p) {
- case 4:
- return 0.673 * m * m;
- case 5:
- return 0.697 * m * m;
- case 6:
- return 0.709 * m * m;
- default:
- return (0.7213 / (1 + 1.079 / m)) * m * m;
- }
- }
-
- protected static double linearCounting(int m, double V) {
- return m * Math.log(m / V);
- }
-}