You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2015/08/24 21:18:11 UTC

[2/2] incubator-datafu git commit: Address minor review feedback for HyperLogLogPlusPlus

Address minor review feedback for HyperLogLogPlusPlus


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/64354370
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/64354370
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/64354370

Branch: refs/heads/master
Commit: 643543706e6ad63c9c997f94bfdc979c63506339
Parents: 7ed3902
Author: Matthew Hayes <ma...@gmail.com>
Authored: Mon Aug 24 12:07:02 2015 -0700
Committer: Matthew Hayes <ma...@gmail.com>
Committed: Mon Aug 24 12:07:02 2015 -0700

----------------------------------------------------------------------
 .../java/datafu/pig/stats/HyperLogLogPlusPlus.java   | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/64354370/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java b/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
index 28927a5..2068801 100644
--- a/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
+++ b/datafu-pig/src/main/java/datafu/pig/stats/HyperLogLogPlusPlus.java
@@ -38,10 +38,6 @@ import com.clearspring.analytics.hash.MurmurHash;
 import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
 
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Final;
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Initial;
-import datafu.pig.stats.entropy.EmpiricalCountEntropy.Intermediate;
-
 /**
  * A UDF that applies the HyperLogLog++ cardinality estimation algorithm.
  * 
@@ -146,14 +142,11 @@ public class HyperLogLogPlusPlus extends AlgebraicEvalFunc<Long>
       // count should always be 1 if bag is non empty
       DataBag bag = (DataBag) input.get(0);
       Iterator<Tuple> it = bag.iterator();
+      Tuple t = null;
       if (it.hasNext()) {
-        Tuple t = (Tuple) it.next();
-        if (t != null && t.size() > 0 && t.get(0) != null) {
-          long x = MurmurHash.hash64(t);
-          return mTupleFactory.newTuple((Object) x);
-        }
+        t = (Tuple) it.next();
       }
-      return mTupleFactory.newTuple((Object) MurmurHash.hash64(null));
+      return mTupleFactory.newTuple((Object) MurmurHash.hash64(t));
     }
   }
 
@@ -208,7 +201,7 @@ public class HyperLogLogPlusPlus extends AlgebraicEvalFunc<Long>
       Tuple t = it.next();
       Object data = t.get(0);
       if (data instanceof Long) {
-        estimator.offer(data);
+        estimator.offerHashed((Long)data);
       } else if (data instanceof DataByteArray) {
         DataByteArray bytes = (DataByteArray) data;
         HyperLogLogPlus newEstimator;