You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by sb...@apache.org on 2016/02/18 15:51:59 UTC
[5/5] incubator-geode git commit: GEODE-970: Remove duplicate HLL
classes
GEODE-970: Remove duplicate HLL classes
HLL classes were duplicated in com.gemstone.gemfire.internal.redis.hll and com.gemstone.gemfire.cache.hdfs.internal.cardinality packages.
They now live in com.gemstone.gemfire.internal.hll package.
Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/e685fd85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/e685fd85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/e685fd85
Branch: refs/heads/develop
Commit: e685fd85ac7e2607f70b47bfb448b1d91a56b103
Parents: 92be457
Author: Swapnil Bawaskar <sb...@pivotal.io>
Authored: Wed Feb 17 13:28:13 2016 -0800
Committer: Swapnil Bawaskar <sb...@pivotal.io>
Committed: Thu Feb 18 06:51:36 2016 -0800
----------------------------------------------------------------------
.../cache/hdfs/internal/cardinality/Bits.java | 54 -
.../cardinality/CardinalityMergeException.java | 42 -
.../hdfs/internal/cardinality/HyperLogLog.java | 313 -----
.../hdfs/internal/cardinality/IBuilder.java | 41 -
.../hdfs/internal/cardinality/ICardinality.java | 87 --
.../hdfs/internal/cardinality/MurmurHash.java | 261 -----
.../hdfs/internal/cardinality/RegisterSet.java | 136 ---
.../hdfs/internal/hoplog/AbstractHoplog.java | 2 +-
.../hdfs/internal/hoplog/HFileSortedOplog.java | 4 +-
.../hoplog/HdfsSortedOplogOrganizer.java | 8 +-
.../cache/hdfs/internal/hoplog/Hoplog.java | 3 +-
.../internal/hoplog/SequenceFileHoplog.java | 2 +-
.../com/gemstone/gemfire/internal/hll/Bits.java | 65 ++
.../internal/hll/CardinalityMergeException.java | 42 +
.../gemfire/internal/hll/HyperLogLog.java | 362 ++++++
.../gemfire/internal/hll/HyperLogLogPlus.java | 1070 ++++++++++++++++++
.../gemstone/gemfire/internal/hll/IBuilder.java | 41 +
.../gemfire/internal/hll/ICardinality.java | 89 ++
.../gemfire/internal/hll/MurmurHash.java | 261 +++++
.../gemfire/internal/hll/RegisterSet.java | 126 +++
.../gemfire/internal/redis/RegionProvider.java | 2 +-
.../internal/redis/executor/hll/Bits.java | 65 --
.../executor/hll/CardinalityMergeException.java | 42 -
.../redis/executor/hll/HyperLogLog.java | 360 ------
.../redis/executor/hll/HyperLogLogPlus.java | 1068 -----------------
.../internal/redis/executor/hll/IBuilder.java | 41 -
.../redis/executor/hll/ICardinality.java | 89 --
.../internal/redis/executor/hll/MurmurHash.java | 234 ----
.../redis/executor/hll/PFAddExecutor.java | 1 +
.../redis/executor/hll/PFCountExecutor.java | 2 +
.../redis/executor/hll/PFMergeExecutor.java | 2 +
.../redis/executor/hll/RegisterSet.java | 126 ---
.../gemfire/redis/GemFireRedisServer.java | 2 +-
33 files changed, 2073 insertions(+), 2970 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
deleted file mode 100644
index 4fa3443..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-
-public class Bits
-{
-
- public static int[] getBits(byte[] mBytes) throws IOException
- {
- int bitSize = mBytes.length / 4;
- int[] bits = new int[bitSize];
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
- for (int i = 0; i < bitSize; i++)
- {
- bits[i] = dis.readInt();
- }
- return bits;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
deleted file mode 100644
index a63f8e6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-@SuppressWarnings("serial")
-public abstract class CardinalityMergeException extends Exception
-{
- public CardinalityMergeException(String message)
- {
- super(message);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
deleted file mode 100644
index 6d945bc..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2012 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * Java implementation of HyperLogLog (HLL) algorithm from this paper:
- * <p/>
- * http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
- * <p/>
- * HLL is an improved version of LogLog that is capable of estimating
- * the cardinality of a set with accuracy = 1.04/sqrt(m) where
- * m = 2^b. So we can control accuracy vs space usage by increasing
- * or decreasing b.
- * <p/>
- * The main benefit of using HLL over LL is that it only requires 64%
- * of the space that LL does to get the same accuracy.
- * <p/>
- * This implementation implements a single counter. If a large (millions)
- * number of counters are required you may want to refer to:
- * <p/>
- * http://dsiutils.dsi.unimi.it/
- * <p/>
- * It has a more complex implementation of HLL that supports multiple counters
- * in a single object, drastically reducing the java overhead from creating
- * a large number of objects.
- * <p/>
- * This implementation leveraged a javascript implementation that Yammer has
- * been working on:
- * <p/>
- * https://github.com/yammer/probablyjs
- * <p>
- * Note that this implementation does not include the long range correction function
- * defined in the original paper. Empirical evidence shows that the correction
- * function causes more harm than good.
- * </p>
- *
- * <p>
- * Users have different motivations to use different types of hashing functions.
- * Rather than try to keep up with all available hash functions and to remove
- * the concern of causing future binary incompatibilities this class allows clients
- * to offer the value in hashed int or long form. This way clients are free
- * to change their hash function on their own time line. We recommend using Google's
- * Guava Murmur3_128 implementation as it provides good performance and speed when
- * high precision is required. In our tests the 32bit MurmurHash function included
- * in this project is faster and produces better results than the 32 bit murmur3
- * implementation google provides.
- * </p>
- */
-public class HyperLogLog implements ICardinality
-{
- private final RegisterSet registerSet;
- private final int log2m;
- private final double alphaMM;
-
-
- /**
- * Create a new HyperLogLog instance using the specified standard deviation.
- *
- * @param rsd - the relative standard deviation for the counter.
- * smaller values create counters that require more space.
- */
- public HyperLogLog(double rsd)
- {
- this(log2m(rsd));
- }
-
- private static int log2m(double rsd)
- {
- return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
- }
-
- /**
- * Create a new HyperLogLog instance. The log2m parameter defines the accuracy of
- * the counter. The larger the log2m the better the accuracy.
- * <p/>
- * accuracy = 1.04/sqrt(2^log2m)
- *
- * @param log2m - the number of bits to use as the basis for the HLL instance
- */
- public HyperLogLog(int log2m)
- {
- this(log2m, new RegisterSet((int) Math.pow(2, log2m)));
- }
-
- /**
- * Creates a new HyperLogLog instance using the given registers. Used for unmarshalling a serialized
- * instance and for merging multiple counters together.
- *
- * @param registerSet - the initial values for the register set
- */
- public HyperLogLog(int log2m, RegisterSet registerSet)
- {
- this.registerSet = registerSet;
- this.log2m = log2m;
- int m = (int) Math.pow(2, this.log2m);
-
- // See the paper.
- switch (log2m)
- {
- case 4:
- alphaMM = 0.673 * m * m;
- break;
- case 5:
- alphaMM = 0.697 * m * m;
- break;
- case 6:
- alphaMM = 0.709 * m * m;
- break;
- default:
- alphaMM = (0.7213 / (1 + 1.079 / m)) * m * m;
- }
- }
-
-
- @Override
- public boolean offerHashed(long hashedValue)
- {
- // j becomes the binary address determined by the first b log2m of x
- // j will be between 0 and 2^log2m
- final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
- final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
- return registerSet.updateIfGreater(j, r);
- }
-
- @Override
- public boolean offerHashed(int hashedValue)
- {
- // j becomes the binary address determined by the first b log2m of x
- // j will be between 0 and 2^log2m
- final int j = hashedValue >>> (Integer.SIZE - log2m);
- final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
- return registerSet.updateIfGreater(j, r);
- }
-
- @Override
- public boolean offer(Object o)
- {
- final int x = MurmurHash.hash(o);
- return offerHashed(x);
- }
-
-
- @Override
- public long cardinality()
- {
- double registerSum = 0;
- int count = registerSet.count;
- double zeros = 0.0;
- for (int j = 0; j < registerSet.count; j++)
- {
- int val = registerSet.get(j);
- registerSum += 1.0 / (1<<val);
- if (val == 0) {
- zeros++;
- }
- }
-
- double estimate = alphaMM * (1 / registerSum);
-
- if (estimate <= (5.0 / 2.0) * count)
- {
- // Small Range Estimate
- return Math.round(count * Math.log(count / zeros));
- }
- else
- {
- return Math.round(estimate);
- }
- }
-
- @Override
- public int sizeof()
- {
- return registerSet.size * 4;
- }
-
- @Override
- public byte[] getBytes() throws IOException
- {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
-
- dos.writeInt(log2m);
- dos.writeInt(registerSet.size * 4);
- for (int x : registerSet.bits())
- {
- dos.writeInt(x);
- }
-
- return baos.toByteArray();
- }
-
- /** Add all the elements of the other set to this set.
- *
- * This operation does not imply a loss of precision.
- *
- * @param other A compatible Hyperloglog instance (same log2m)
- * @throws CardinalityMergeException if other is not compatible
- */
- public void addAll(HyperLogLog other) throws CardinalityMergeException {
- if (this.sizeof() != other.sizeof())
- {
- throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
- }
-
- registerSet.merge(other.registerSet);
- }
-
- @Override
- public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException
- {
- HyperLogLog merged = new HyperLogLog(log2m);
- merged.addAll(this);
-
- if (estimators == null)
- {
- return merged;
- }
-
- for (ICardinality estimator : estimators)
- {
- if (!(estimator instanceof HyperLogLog))
- {
- throw new HyperLogLogMergeException("Cannot merge estimators of different class");
- }
- HyperLogLog hll = (HyperLogLog) estimator;
- merged.addAll(hll);
- }
-
- return merged;
- }
-
- public static class Builder implements IBuilder<ICardinality>, Serializable
- {
- private double rsd;
-
- public Builder(double rsd)
- {
- this.rsd = rsd;
- }
-
- @Override
- public HyperLogLog build()
- {
- return new HyperLogLog(rsd);
- }
-
- @Override
- public int sizeof()
- {
- int log2m = log2m(rsd);
- int k = (int) Math.pow(2, log2m);
- return RegisterSet.getBits(k) * 4;
- }
-
- public static HyperLogLog build(byte[] bytes) throws IOException
- {
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- DataInputStream oi = new DataInputStream(bais);
- int log2m = oi.readInt();
- int size = oi.readInt();
- byte[] longArrayBytes = new byte[size];
- oi.readFully(longArrayBytes);
- return new HyperLogLog(log2m, new RegisterSet((int) Math.pow(2, log2m), Bits.getBits(longArrayBytes)));
- }
- }
-
- @SuppressWarnings("serial")
- protected static class HyperLogLogMergeException extends CardinalityMergeException
- {
- public HyperLogLogMergeException(String message)
- {
- super(message);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
deleted file mode 100644
index 4321dc6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-
-public interface IBuilder<T>
-{
- T build();
-
- int sizeof();
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
deleted file mode 100644
index ae6d7d6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.IOException;
-
-
-public interface ICardinality
-{
- /**
- * @param o stream element
- * @return false if the value returned by cardinality() is unaffected by the appearance of o in the stream.
- */
- boolean offer(Object o);
-
- /**
- * Offer the value as a hashed long value
- *
- * @param hashedLong - the hash of the item to offer to the estimator
- * @return false if the value returned by cardinality() is unaffected by the appearance of hashedLong in the stream
- */
- boolean offerHashed(long hashedLong);
-
- /**
- * Offer the value as a hashed long value
- *
- * @param hashedInt - the hash of the item to offer to the estimator
- * @return false if the value returned by cardinality() is unaffected by the appearance of hashedInt in the stream
- */
- boolean offerHashed(int hashedInt);
-
- /**
- * @return the number of unique elements in the stream or an estimate thereof
- */
- long cardinality();
-
- /**
- * @return size in bytes needed for serialization
- */
- int sizeof();
-
- /**
- * @throws IOException
- */
- byte[] getBytes() throws IOException;
-
- /**
- * Merges estimators to produce a new estimator for the combined streams
- * of this estimator and those passed as arguments.
- *
- * Nor this estimator nor the one passed as parameters are modified.
- *
- * @param estimators Zero or more compatible estimators
- * @throws CardinalityMergeException If at least one of the estimators is not compatible with this one
- */
- ICardinality merge(ICardinality... estimators) throws CardinalityMergeException;
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
deleted file mode 100644
index bc760a8..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * This is a very fast, non-cryptographic hash suitable for general hash-based
- * lookup. See http://murmurhash.googlepages.com/ for more details.
- * <p/>
- * <p>
- * The C version of MurmurHash 2.0 found at that site was ported to Java by
- * Andrzej Bialecki (ab at getopt org).
- * </p>
- */
-public class MurmurHash
-{
- public static int hash(Object o)
- {
- if (o == null)
- {
- return 0;
- }
- if (o instanceof Long)
- {
- return hashLong((Long) o);
- }
- if (o instanceof Integer)
- {
- return hashLong((Integer) o);
- }
- if (o instanceof Double)
- {
- return hashLong(Double.doubleToRawLongBits((Double) o));
- }
- if (o instanceof Float)
- {
- return hashLong(Float.floatToRawIntBits((Float) o));
- }
- if (o instanceof String)
- {
- return hash(((String) o).getBytes());
- }
- if (o instanceof byte[])
- {
- return hash((byte[]) o);
- }
- return hash(o.toString());
- }
-
- public static int hash(byte[] data)
- {
- return hash(data, 0, data.length, -1);
- }
-
- public static int hash(byte[] data, int seed)
- {
- return hash(data, 0, data.length, seed);
- }
-
- public static int hash(byte[] data, int offset, int length, int seed)
- {
- int m = 0x5bd1e995;
- int r = 24;
-
- int h = seed ^ length;
-
- int len_4 = length >> 2;
-
- for (int i = 0; i < len_4; i++)
- {
- int i_4 = i << 2;
- int k = data[offset + i_4 + 3];
- k = k << 8;
- k = k | (data[offset + i_4 + 2] & 0xff);
- k = k << 8;
- k = k | (data[offset + i_4 + 1] & 0xff);
- k = k << 8;
- k = k | (data[offset + i_4 + 0] & 0xff);
- k *= m;
- k ^= k >>> r;
- k *= m;
- h *= m;
- h ^= k;
- }
-
- // avoid calculating modulo
- int len_m = len_4 << 2;
- int left = length - len_m;
-
- if (left != 0)
- {
- if (left >= 3)
- {
- h ^= (int) data[offset + length - 3] << 16;
- }
- if (left >= 2)
- {
- h ^= (int) data[offset + length - 2] << 8;
- }
- if (left >= 1)
- {
- h ^= (int) data[offset + length - 1];
- }
-
- h *= m;
- }
-
- h ^= h >>> 13;
- h *= m;
- h ^= h >>> 15;
-
- return h;
- }
-
- public static int hashLong(long data)
- {
- int m = 0x5bd1e995;
- int r = 24;
-
- int h = 0;
-
- int k = (int) data * m;
- k ^= k >>> r;
- h ^= k * m;
-
- k = (int) (data >> 32) * m;
- k ^= k >>> r;
- h *= m;
- h ^= k * m;
-
- h ^= h >>> 13;
- h *= m;
- h ^= h >>> 15;
-
- return h;
- }
-
- public static long hash64(Object o)
- {
- if (o == null)
- {
- return 0l;
- }
- else if (o instanceof String)
- {
- final byte[] bytes = ((String) o).getBytes();
- return hash64(bytes, bytes.length);
- }
- else if (o instanceof byte[])
- {
- final byte[] bytes = (byte[]) o;
- return hash64(bytes, bytes.length);
- }
- return hash64(o.toString());
- }
-
- // 64 bit implementation copied from here: https://github.com/tnm/murmurhash-java
-
- /**
- * Generates 64 bit hash from byte array with default seed value.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @return 64 bit hash of the given string
- */
- public static long hash64(final byte[] data, int length)
- {
- return hash64(data, length, 0xe17a1465);
- }
-
-
- /**
- * Generates 64 bit hash from byte array of the given length and seed.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @param seed initial seed value
- * @return 64 bit hash of the given array
- */
- public static long hash64(final byte[] data, int length, int seed)
- {
- final long m = 0xc6a4a7935bd1e995L;
- final int r = 47;
-
- long h = (seed & 0xffffffffl) ^ (length * m);
-
- int length8 = length / 8;
-
- for (int i = 0; i < length8; i++)
- {
- final int i8 = i * 8;
- long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
- + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
- + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
- + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
-
- k *= m;
- k ^= k >>> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- switch (length % 8)
- {
- case 7:
- h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
- case 6:
- h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
- case 5:
- h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
- case 4:
- h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
- case 3:
- h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
- case 2:
- h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
- case 1:
- h ^= (long) (data[length & ~7] & 0xff);
- h *= m;
- }
- ;
-
- h ^= h >>> r;
- h *= m;
- h ^= h >>> r;
-
- return h;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
deleted file mode 100644
index 6f1b919..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2012 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-public class RegisterSet
-{
- public final static int LOG2_BITS_PER_WORD = 6;
- public final static int REGISTER_SIZE = 5;
-
- public final int count;
- public final int size;
-
- private final int[] M;
-
- public RegisterSet(int count)
- {
- this(count, null);
- }
-
- public RegisterSet(int count, int[] initialValues)
- {
- this.count = count;
- int bits = getBits(count);
-
- if (initialValues == null)
- {
- if (bits == 0)
- {
- this.M = new int[1];
- }
- else if (bits % Integer.SIZE == 0)
- {
- this.M = new int[bits];
- }
- else
- {
- this.M = new int[bits + 1];
- }
- }
- else
- {
- this.M = initialValues;
- }
- this.size = this.M.length;
- }
-
- public static int getBits(int count)
- {
- return count / LOG2_BITS_PER_WORD;
- }
-
- public void set(int position, int value)
- {
- int bucketPos = position / LOG2_BITS_PER_WORD;
- int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
- this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift);
- }
-
- public int get(int position)
- {
- int bucketPos = position / LOG2_BITS_PER_WORD;
- int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
- return (this.M[bucketPos] & (0x1f << shift)) >>> shift;
- }
-
- public boolean updateIfGreater(int position, int value)
- {
- int bucket = position / LOG2_BITS_PER_WORD;
- int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
- int mask = 0x1f << shift;
-
- // Use long to avoid sign issues with the left-most shift
- long curVal = this.M[bucket] & mask;
- long newVal = value << shift;
- if (curVal < newVal) {
- this.M[bucket] = (int)((this.M[bucket] & ~mask) | newVal);
- return true;
- } else {
- return false;
- }
- }
-
- public void merge(RegisterSet that)
- {
- for (int bucket = 0; bucket < M.length; bucket++)
- {
- int word = 0;
- for (int j = 0; j < LOG2_BITS_PER_WORD; j++)
- {
- int mask = 0x1f << (REGISTER_SIZE * j);
-
- int thisVal = (this.M[bucket] & mask);
- int thatVal = (that.M[bucket] & mask);
- word |= (thisVal < thatVal) ? thatVal : thisVal;
- }
- this.M[bucket] = word;
- }
- }
-
- public int[] bits()
- {
- int[] copy = new int[size];
- System.arraycopy(M, 0, copy, 0, M.length);
- return copy;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
index 636dd91..d2fdbe7 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
@@ -19,6 +19,7 @@ package com.gemstone.gemfire.cache.hdfs.internal.hoplog;
import java.io.IOException;
import java.util.regex.Matcher;
+import com.gemstone.gemfire.internal.hll.ICardinality;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -32,7 +33,6 @@ import org.apache.hadoop.io.compress.SnappyCodec;
import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
import com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile;
import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.CompressionType;
import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.Writer.Option;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
index 8fc9c8e..5ba20d2 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
@@ -29,6 +29,8 @@ import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.concurrent.atomic.AtomicBoolean;
+import com.gemstone.gemfire.internal.hll.HyperLogLog;
+import com.gemstone.gemfire.internal.hll.ICardinality;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.ipc.RemoteException;
@@ -37,8 +39,6 @@ import org.apache.hadoop.util.ShutdownHookManager;
import com.gemstone.gemfire.cache.CacheClosedException;
import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
import com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.HyperLogLog;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
import com.gemstone.gemfire.internal.cache.persistence.soplog.DelegatingSerializedComparator;
import com.gemstone.gemfire.internal.cache.persistence.soplog.HFileStoreStatistics;
import com.gemstone.gemfire.internal.cache.persistence.soplog.SortedOplogStatistics;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
index 9890b3b..61b925b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
@@ -40,6 +40,10 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.gemstone.gemfire.internal.hll.CardinalityMergeException;
+import com.gemstone.gemfire.internal.hll.HyperLogLog;
+import com.gemstone.gemfire.internal.hll.ICardinality;
+import com.gemstone.gemfire.internal.hll.MurmurHash;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -54,10 +58,6 @@ import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
import com.gemstone.gemfire.cache.hdfs.HDFSStore;
import com.gemstone.gemfire.cache.hdfs.internal.QueuedPersistentEvent;
import com.gemstone.gemfire.cache.hdfs.internal.SortedHoplogPersistedEvent;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.CardinalityMergeException;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.HyperLogLog;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.MurmurHash;
import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSCompactionManager.CompactionRequest;
import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSRegionDirector.HdfsRegionManager;
import com.gemstone.gemfire.cache.hdfs.internal.hoplog.Hoplog.HoplogReader;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
index 113e49b..a5030ae 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
@@ -16,12 +16,13 @@
*/
package com.gemstone.gemfire.cache.hdfs.internal.hoplog;
+import com.gemstone.gemfire.internal.hll.ICardinality;
+
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.EnumMap;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
/**
* Ordered sequence file
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
index 04cbb05..fbb8f14 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.EnumMap;
+import com.gemstone.gemfire.internal.hll.ICardinality;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -32,7 +33,6 @@ import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HoplogSetReader.HoplogIterator;
import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile;
import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.Reader;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
new file mode 100755
index 0000000..bd0b33c
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+public class Bits {
+
+ public static int[] getBits(byte[] mBytes) throws IOException {
+ int bitSize = mBytes.length / 4;
+ int[] bits = new int[bitSize];
+ DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
+ for (int i = 0; i < bitSize; i++) {
+ bits[i] = dis.readInt();
+ }
+ return bits;
+ }
+
+ /**
+ * This method might be better described as
+ * "byte array to int array" or "data input to int array"
+ */
+ public static int[] getBits(DataInput dataIn, int byteLength) throws IOException {
+ int bitSize = byteLength / 4;
+ int[] bits = new int[bitSize];
+ for (int i = 0; i < bitSize; i++) {
+ bits[i] = dataIn.readInt();
+ }
+ return bits;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
new file mode 100755
index 0000000..5fd6b0b
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+@SuppressWarnings("serial")
+public abstract class CardinalityMergeException extends Exception {
+
+ public CardinalityMergeException(String message) {
+ super(message);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
new file mode 100755
index 0000000..fa5508d
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2012 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.gemstone.gemfire.internal.redis.executor.hll.HllExecutor;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.io.Serializable;
+
+/**
+ * Java implementation of HyperLogLog (HLL) algorithm from this paper:
+ * <p/>
+ * http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
+ * <p/>
+ * HLL is an improved version of LogLog that is capable of estimating
+ * the cardinality of a set with accuracy = 1.04/sqrt(m) where
+ * m = 2^b. So we can control accuracy vs space usage by increasing
+ * or decreasing b.
+ * <p/>
+ * The main benefit of using HLL over LL is that it only requires 64%
+ * of the space that LL does to get the same accuracy.
+ * <p/>
+ * This implementation implements a single counter. If a large (millions)
+ * number of counters are required you may want to refer to:
+ * <p/>
+ * http://dsiutils.dsi.unimi.it/
+ * <p/>
+ * It has a more complex implementation of HLL that supports multiple counters
+ * in a single object, drastically reducing the java overhead from creating
+ * a large number of objects.
+ * <p/>
+ * This implementation leveraged a javascript implementation that Yammer has
+ * been working on:
+ * <p/>
+ * https://github.com/yammer/probablyjs
+ * <p>
+ * Note that this implementation does not include the long range correction function
+ * defined in the original paper. Empirical evidence shows that the correction
+ * function causes more harm than good.
+ * </p>
+ * <p/>
+ * <p>
+ * Users have different motivations to use different types of hashing functions.
+ * Rather than try to keep up with all available hash functions and to remove
+ * the concern of causing future binary incompatibilities this class allows clients
+ * to offer the value in hashed int or long form. This way clients are free
+ * to change their hash function on their own time line. We recommend using Google's
+ * Guava Murmur3_128 implementation as it provides good performance and speed when
+ * high precision is required. In our tests the 32bit MurmurHash function included
+ * in this project is faster and produces better results than the 32 bit murmur3
+ * implementation google provides.
+ * </p>
+ */
+public class HyperLogLog implements ICardinality, Serializable {
+
+ private static final long serialVersionUID = -4661220245111112301L;
+ private final RegisterSet registerSet;
+ private final int log2m;
+ private final double alphaMM;
+
+
+ /**
+ * Create a new HyperLogLog instance using the specified standard deviation.
+ *
+ * @param rsd - the relative standard deviation for the counter.
+ * smaller values create counters that require more space.
+ */
+ public HyperLogLog(double rsd) {
+ this(log2m(rsd));
+ }
+
+ private static int log2m(double rsd) {
+ return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
+ }
+
+ /**
+ * Create a new HyperLogLog instance. The log2m parameter defines the accuracy of
+ * the counter. The larger the log2m the better the accuracy.
+ * <p/>
+ * accuracy = 1.04/sqrt(2^log2m)
+ *
+ * @param log2m - the number of bits to use as the basis for the HLL instance
+ */
+ public HyperLogLog(int log2m) {
+ this(log2m, new RegisterSet(1 << log2m));
+ }
+
+ /**
+ * Creates a new HyperLogLog instance using the given registers. Used for unmarshalling a serialized
+ * instance and for merging multiple counters together.
+ *
+ * @param registerSet - the initial values for the register set
+ */
+ @Deprecated
+ public HyperLogLog(int log2m, RegisterSet registerSet) {
+ if (log2m < 0 || log2m > 30) {
+ throw new IllegalArgumentException("log2m argument is "
+ + log2m + " and is outside the range [0, 30]");
+ }
+ this.registerSet = registerSet;
+ this.log2m = log2m;
+ int m = 1 << this.log2m;
+
+ alphaMM = getAlphaMM(log2m, m);
+ }
+
+ @Override
+ public boolean offerHashed(long hashedValue) {
+ // j becomes the binary address determined by the first b log2m of x
+ // j will be between 0 and 2^log2m
+ final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
+ final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
+ return registerSet.updateIfGreater(j, r);
+ }
+
+ @Override
+ public boolean offerHashed(int hashedValue) {
+ // j becomes the binary address determined by the first b log2m of x
+ // j will be between 0 and 2^log2m
+ final int j = hashedValue >>> (Integer.SIZE - log2m);
+ final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
+ return registerSet.updateIfGreater(j, r);
+ }
+
+ @Override
+ public boolean offer(Object o) {
+ final int x = MurmurHash.hash(o);
+ return offerHashed(x);
+ }
+
+
+ @Override
+ public long cardinality() {
+ double registerSum = 0;
+ int count = registerSet.count;
+ double zeros = 0.0;
+ for (int j = 0; j < registerSet.count; j++) {
+ int val = registerSet.get(j);
+ registerSum += 1.0 / (1 << val);
+ if (val == 0) {
+ zeros++;
+ }
+ }
+
+ double estimate = alphaMM * (1 / registerSum);
+
+ if (estimate <= (5.0 / 2.0) * count) {
+ // Small Range Estimate
+ return Math.round(linearCounting(count, zeros));
+ } else {
+ return Math.round(estimate);
+ }
+ }
+
+ @Override
+ public int sizeof() {
+ return registerSet.size * 4;
+ }
+
+ @Override
+ public byte[] getBytes() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(baos);
+ writeBytes(dos);
+
+ return baos.toByteArray();
+ }
+
+ private void writeBytes(DataOutput serializedByteStream) throws IOException {
+ serializedByteStream.writeInt(log2m);
+ serializedByteStream.writeInt(registerSet.size * 4);
+ for (int x : registerSet.readOnlyBits()) {
+ serializedByteStream.writeInt(x);
+ }
+ }
+
+ /**
+ * Add all the elements of the other set to this set.
+ * <p/>
+ * This operation does not imply a loss of precision.
+ *
+ * @param other A compatible Hyperloglog instance (same log2m)
+ * @throws CardinalityMergeException if other is not compatible
+ */
+ public void addAll(HyperLogLog other) throws CardinalityMergeException {
+ if (this.sizeof() != other.sizeof()) {
+ throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
+ }
+
+ registerSet.merge(other.registerSet);
+ }
+
+ @Override
+ public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
+ HyperLogLog merged = new HyperLogLog(HllExecutor.DEFAULT_HLL_STD_DEV);//new HyperLogLog(log2m, new RegisterSet(this.registerSet.count));
+ merged.addAll(this);
+
+ if (estimators == null) {
+ return merged;
+ }
+
+ for (ICardinality estimator : estimators) {
+ if (!(estimator instanceof HyperLogLog)) {
+ throw new HyperLogLogMergeException("Cannot merge estimators of different class");
+ }
+ HyperLogLog hll = (HyperLogLog) estimator;
+ merged.addAll(hll);
+ }
+
+ return merged;
+ }
+
+ private Object writeReplace() {
+ return new SerializationHolder(this);
+ }
+
+ /**
+ * This class exists to support Externalizable semantics for
+ * HyperLogLog objects without having to expose a public
+ * constructor, public write/read methods, or pretend final
+ * fields aren't final.
+ *
+ * In short, Externalizable allows you to skip some of the more
+ * verbose meta-data default Serializable gets you, but still
+ * includes the class name. In that sense, there is some cost
+ * to this holder object because it has a longer class name. I
+ * imagine people who care about optimizing for that have their
+ * own work-around for long class names in general, or just use
+ * a custom serialization framework. Therefore we make no attempt
+ * to optimize that here (eg. by raising this from an inner class
+ * and giving it an unhelpful name).
+ */
+ private static class SerializationHolder implements Externalizable {
+
+ HyperLogLog hyperLogLogHolder;
+
+ public SerializationHolder(HyperLogLog hyperLogLogHolder) {
+ this.hyperLogLogHolder = hyperLogLogHolder;
+ }
+
+ /**
+ * required for Externalizable
+ */
+ @SuppressWarnings("unused")
+ public SerializationHolder() {
+
+ }
+
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ hyperLogLogHolder.writeBytes(out);
+ }
+
+ @Override
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ hyperLogLogHolder = Builder.build(in);
+ }
+
+ private Object readResolve() {
+ return hyperLogLogHolder;
+ }
+ }
+
+ public static class Builder implements IBuilder<ICardinality>, Serializable {
+
+ private static final long serialVersionUID = -979314356097156719L;
+ private double rsd;
+
+ public Builder(double rsd) {
+ this.rsd = rsd;
+ }
+
+ @Override
+ public HyperLogLog build() {
+ return new HyperLogLog(rsd);
+ }
+
+ @Override
+ public int sizeof() {
+ int log2m = log2m(rsd);
+ int k = 1 << log2m;
+ return RegisterSet.getBits(k) * 4;
+ }
+
+ public static HyperLogLog build(byte[] bytes) throws IOException {
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ return build(new DataInputStream(bais));
+ }
+
+ public static HyperLogLog build(DataInput serializedByteStream) throws IOException {
+ int log2m = serializedByteStream.readInt();
+ int byteArraySize = serializedByteStream.readInt();
+ return new HyperLogLog(log2m,
+ new RegisterSet(1 << log2m, Bits.getBits(serializedByteStream, byteArraySize)));
+ }
+ }
+
+ @SuppressWarnings("serial")
+ protected static class HyperLogLogMergeException extends CardinalityMergeException {
+
+ public HyperLogLogMergeException(String message) {
+ super(message);
+ }
+ }
+
+ protected static double getAlphaMM(final int p, final int m) {
+ // See the paper.
+ switch (p) {
+ case 4:
+ return 0.673 * m * m;
+ case 5:
+ return 0.697 * m * m;
+ case 6:
+ return 0.709 * m * m;
+ default:
+ return (0.7213 / (1 + 1.079 / m)) * m * m;
+ }
+ }
+
+ protected static double linearCounting(int m, double V) {
+ return m * Math.log(m / V);
+ }
+}