You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/08/24 21:18:11 UTC
[2/2] incubator-datafu git commit: Address minor review feedback for
HyperLogLogPlusPlus
Address minor review feedback for HyperLogLogPlusPlus
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/64354370
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/64354370
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/64354370
Branch: refs/heads/master
Commit: 643543706e6ad63c9c997f94bfdc979c63506339
Parents: 7ed3902
Author: Matthew Hayes <ma...@gmail.com>
Authored: Mon Aug 24 12:07:02 2015 -0700
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 24 12:07:02 2015 -0700
----------------------------------------------------------------------
.../java/datafu/pig/stats/HyperLogLogPlusPlus.java | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/64354370/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java b/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
index 28927a5..2068801 100644
--- a/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
+++ b/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
@@ -38,10 +38,6 @@ import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Final;
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Initial;
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Intermediate;
-
/**
* A UDF that applies the HyperLogLog++ cardinality estimation algorithm.
*
@@ -146,14 +142,11 @@ public class HyperLogLogPlusPlus extends AlgebraicEvalFunc<Long>
// count should always be 1 if bag is non empty
DataBag bag = (DataBag) input.get(0);
Iterator<Tuple> it = bag.iterator();
+ Tuple t = null;
if (it.hasNext()) {
- Tuple t = (Tuple) it.next();
- if (t != null && t.size() > 0 && t.get(0) != null) {
- long x = MurmurHash.hash64(t);
- return mTupleFactory.newTuple((Object) x);
- }
+ t = (Tuple) it.next();
}
- return mTupleFactory.newTuple((Object) MurmurHash.hash64(null));
+ return mTupleFactory.newTuple((Object) MurmurHash.hash64(t));
}
}
@@ -208,7 +201,7 @@ public class HyperLogLogPlusPlus extends AlgebraicEvalFunc<Long>
Tuple t = it.next();
Object data = t.get(0);
if (data instanceof Long) {
- estimator.offer(data);
+ estimator.offerHashed((Long)data);
} else if (data instanceof DataByteArray) {
DataByteArray bytes = (DataByteArray) data;
HyperLogLogPlus newEstimator;