You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/14 07:30:47 UTC

[1/5] kylin git commit: KYLIN-1832 HyperLogLog performance optimization

Repository: kylin
Updated Branches:
  refs/heads/master 530365131 -> e6e330a8b


http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterTest.java
deleted file mode 100644
index 5b7c565..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterTest.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hll;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * @author yangli9
- * 
- */
-public class HyperLogLogCounterTest {
-
-    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-    Random rand1 = new Random(1);
-    Random rand2 = new Random(2);
-    Random rand3 = new Random(3);
-    int errorCount1 = 0;
-    int errorCount2 = 0;
-    int errorCount3 = 0;
-
-    @Test
-    public void testOneAdd() throws IOException {
-        HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(14);
-        HyperLogLogPlusCounter one = new HyperLogLogPlusCounter(14);
-        for (int i = 0; i < 1000000; i++) {
-            one.clear();
-            one.add(rand1.nextInt());
-            hllc.merge(one);
-        }
-        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
-    }
-
-    @Test
-    public void testPeekLength() throws IOException {
-        HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(10);
-        HyperLogLogPlusCounter copy = new HyperLogLogPlusCounter(10);
-        byte[] value = new byte[10];
-        for (int i = 0; i < 200000; i++) {
-            rand1.nextBytes(value);
-            hllc.add(value);
-
-            buf.clear();
-            hllc.writeRegisters(buf);
-
-            int len = buf.position();
-            buf.position(0);
-            assertEquals(len, hllc.peekLength(buf));
-
-            copy.readRegisters(buf);
-            assertEquals(len, buf.position());
-            assertEquals(hllc, copy);
-        }
-        buf.clear();
-    }
-
-    private Set<String> generateTestData(int n) {
-        Set<String> testData = new HashSet<String>();
-        for (int i = 0; i < n; i++) {
-            String[] samples = generateSampleData();
-            for (String sample : samples) {
-                testData.add(sample);
-            }
-        }
-        return testData;
-    }
-
-    // simulate the visit (=visitor+id)
-    private String[] generateSampleData() {
-
-        StringBuilder buf = new StringBuilder();
-        for (int i = 0; i < 19; i++) {
-            buf.append(Math.abs(rand1.nextInt()) % 10);
-        }
-        String header = buf.toString();
-
-        int size = Math.abs(rand3.nextInt()) % 9 + 1;
-        String[] samples = new String[size];
-        for (int k = 0; k < size; k++) {
-            buf = new StringBuilder(header);
-            buf.append("-");
-            for (int i = 0; i < 10; i++) {
-                buf.append(Math.abs(rand3.nextInt()) % 10);
-            }
-            samples[k] = buf.toString();
-        }
-
-        return samples;
-    }
-
-    @Test
-    public void countTest() throws IOException {
-        int n = 10;
-        for (int i = 0; i < 5; i++) {
-            count(n);
-            n *= 10;
-        }
-    }
-
-    private void count(int n) throws IOException {
-        Set<String> testSet = generateTestData(n);
-
-        HyperLogLogPlusCounter hllc = newHLLC();
-        for (String testData : testSet) {
-            hllc.add(Bytes.toBytes(testData));
-        }
-        long estimate = hllc.getCountEstimate();
-        double errorRate = hllc.getErrorRate();
-        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
-        System.out.println(estimate);
-        System.out.println(testSet.size());
-        System.out.println(errorRate);
-        System.out.println("=" + actualError);
-        Assert.assertTrue(actualError < errorRate * 3.0);
-
-        checkSerialize(hllc);
-    }
-
-    private void checkSerialize(HyperLogLogPlusCounter hllc) throws IOException {
-        long estimate = hllc.getCountEstimate();
-        buf.clear();
-        hllc.writeRegisters(buf);
-        buf.flip();
-        hllc.readRegisters(buf);
-        Assert.assertEquals(estimate, hllc.getCountEstimate());
-    }
-
-    @Test
-    public void mergeTest() throws IOException {
-        double error = 0;
-        int n = 100;
-        for (int i = 0; i < n; i++) {
-            double e = merge(i);
-            error += e;
-        }
-        System.out.println("Total average error is " + error / n);
-
-        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
-        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
-        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
-
-        Assert.assertTrue(errorCount1 <= n * 0.30);
-        Assert.assertTrue(errorCount2 <= n * 0.05);
-        Assert.assertTrue(errorCount3 <= n * 0.02);
-    }
-
-    private double merge(int round) throws IOException {
-        int ln = 20;
-        int dn = 100 * (round + 1);
-        Set<String> testSet = new HashSet<String>();
-        HyperLogLogPlusCounter[] hllcs = new HyperLogLogPlusCounter[ln];
-        for (int i = 0; i < ln; i++) {
-            hllcs[i] = newHLLC();
-            for (int k = 0; k < dn; k++) {
-                String[] samples = generateSampleData();
-                for (String data : samples) {
-                    testSet.add(data);
-                    hllcs[i].add(Bytes.toBytes(data));
-                }
-            }
-        }
-        HyperLogLogPlusCounter mergeHllc = newHLLC();
-        for (HyperLogLogPlusCounter hllc : hllcs) {
-            mergeHllc.merge(serDes(hllc));
-        }
-
-        double errorRate = mergeHllc.getErrorRate();
-        long estimate = mergeHllc.getCountEstimate();
-        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
-
-        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
-        Assert.assertTrue(actualError < 0.1);
-
-        if (actualError > errorRate) {
-            errorCount1++;
-        }
-        if (actualError > 2 * errorRate) {
-            errorCount2++;
-        }
-        if (actualError > 3 * errorRate) {
-            errorCount3++;
-        }
-
-        return actualError;
-    }
-
-    private HyperLogLogPlusCounter serDes(HyperLogLogPlusCounter hllc) throws IOException {
-        buf.clear();
-        hllc.writeRegisters(buf);
-        buf.flip();
-        HyperLogLogPlusCounter copy = new HyperLogLogPlusCounter(hllc.getPrecision());
-        copy.readRegisters(buf);
-        Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
-        return copy;
-    }
-
-    @Test
-    public void testPerformance() throws IOException {
-        int N = 3; // reduce N HLLC into one
-        int M = 1000; // for M times, use 100000 for real perf test
-
-        HyperLogLogPlusCounter samples[] = new HyperLogLogPlusCounter[N];
-        for (int i = 0; i < N; i++) {
-            samples[i] = newHLLC();
-            for (String str : generateTestData(10000))
-                samples[i].add(str);
-        }
-
-        System.out.println("Perf test running ... ");
-        long start = System.currentTimeMillis();
-        HyperLogLogPlusCounter sum = newHLLC();
-        for (int i = 0; i < M; i++) {
-            sum.clear();
-            for (int j = 0; j < N; j++) {
-                sum.merge(samples[j]);
-                checkSerialize(sum);
-            }
-        }
-        long duration = System.currentTimeMillis() - start;
-        System.out.println("Perf test result: " + duration / 1000 + " seconds");
-    }
-
-    @Test
-    public void testEquivalence() {
-        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
-        byte[] b = new byte[] { 3, 4, 42 };
-        HyperLogLogPlusCounter ha = new HyperLogLogPlusCounter();
-        HyperLogLogPlusCounter hb = new HyperLogLogPlusCounter();
-        ha.add(a, 1, 3);
-        hb.add(b);
-
-        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
-    }
-
-    private HyperLogLogPlusCounter newHLLC() {
-        return new HyperLogLogPlusCounter(16);
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
new file mode 100644
index 0000000..feb8c8e
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hll2;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+public class HyperLogLogCounterNewTest {
+    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+    Random rand1 = new Random(1);
+    Random rand2 = new Random(2);
+    Random rand3 = new Random(3);
+    int errorCount1 = 0;
+    int errorCount2 = 0;
+    int errorCount3 = 0;
+
+    @Test
+    public void testOneAdd() throws IOException {
+        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
+        HyperLogLogPlusCounterNew one = new HyperLogLogPlusCounterNew(14);
+        for (int i = 0; i < 1000000; i++) {
+            one.clear();
+            one.add(rand1.nextInt());
+            hllc.merge(one);
+        }
+        System.out.println(hllc.getCountEstimate());
+        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+    }
+
+    @Test
+    public void tesSparseEstimate() throws IOException {
+        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
+        for (int i = 0; i < 10; i++) {
+            hllc.add(i);
+        }
+        System.out.println(hllc.getCountEstimate());
+        assertTrue(hllc.getCountEstimate() > 10 * 0.9);
+    }
+
+    @Test
+    public void countTest() throws IOException {
+        int n = 10;
+        for (int i = 0; i < 5; i++) {
+            count(n);
+            n *= 10;
+        }
+    }
+
+    @Test
+    public void mergeTest() throws IOException {
+        double error = 0;
+        int n = 100;
+        for (int i = 0; i < n; i++) {
+            double e = merge(i);
+            error += e;
+        }
+        System.out.println("Total average error is " + error / n);
+
+        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
+        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
+        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
+
+        Assert.assertTrue(errorCount1 <= n * 0.30);
+        Assert.assertTrue(errorCount2 <= n * 0.05);
+        Assert.assertTrue(errorCount3 <= n * 0.02);
+    }
+
+    /*
+    compare the result of two different hll counter
+     */
+    @Test
+    public void compareResult() {
+        int p = 12; //4096
+        int m = 1 << p;
+    
+        for (int t = 0; t < 5; t++) {
+            //compare sparse
+            HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
+            HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
+    
+            for (int i = 0; i < 20; i++) {
+                //int r = rand1.nextInt();
+                oldCounter.add(i);
+                newCounter.add(i);
+            }
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+            //compare dense
+            for (int i = 0; i < m; i++) {
+                oldCounter.add(i);
+                newCounter.add(i);
+            }
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+        }
+    
+    }
+
+    @Test
+    public void testPeekLength() throws IOException {
+        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(10);
+        HyperLogLogPlusCounterNew copy = new HyperLogLogPlusCounterNew(10);
+        byte[] value = new byte[10];
+        for (int i = 0; i < 200000; i++) {
+            rand1.nextBytes(value);
+            hllc.add(value);
+
+            buf.clear();
+            hllc.writeRegisters(buf);
+
+            int len = buf.position();
+            buf.position(0);
+            assertEquals(len, hllc.peekLength(buf));
+
+            copy.readRegisters(buf);
+            assertEquals(len, buf.position());
+            assertEquals(hllc, copy);
+        }
+        buf.clear();
+    }
+
+    @Test
+    public void testEquivalence() {
+        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+        byte[] b = new byte[] { 3, 4, 42 };
+        HyperLogLogPlusCounterNew ha = new HyperLogLogPlusCounterNew();
+        HyperLogLogPlusCounterNew hb = new HyperLogLogPlusCounterNew();
+        ha.add(a, 1, 3);
+        hb.add(b);
+
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+    }
+
+    @Test
+    public void testAutoChangeToSparse() {
+        int p = 15;
+        int m = 1 << p;
+        HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
+        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+        double over = HyperLogLogPlusCounterNew.overflowFactor * m;
+        int overFlow = (int) over + 1000;
+        for (int i = 0; i < overFlow; i++)
+            counter.add(i);
+        assertEquals(RegisterType.DENSE, counter.getRegisterType());
+    }
+
+    @Test
+    public void testSerialilze() throws Exception {
+        //test sparse serialize
+        int p = 15;
+        int m = 1 << p;
+        HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
+        counter.add(123);
+        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+        checkSerialize(counter);
+        //test dense serialize
+        double over = HyperLogLogPlusCounterNew.overflowFactor * m;
+        int overFlow = (int) over + 1000;
+        for (int i = 0; i < overFlow; i++)
+            counter.add(i);
+        assertEquals(RegisterType.DENSE, counter.getRegisterType());
+        checkSerialize(counter);
+    }
+
+    private Set<String> generateTestData(int n) {
+        Set<String> testData = new HashSet<String>();
+        for (int i = 0; i < n; i++) {
+            String[] samples = generateSampleData();
+            for (String sample : samples) {
+                testData.add(sample);
+            }
+        }
+        return testData;
+    }
+
+    // simulate the visit (=visitor+id)
+    private String[] generateSampleData() {
+
+        StringBuilder buf = new StringBuilder();
+        for (int i = 0; i < 19; i++) {
+            buf.append(Math.abs(rand1.nextInt()) % 10);
+        }
+        String header = buf.toString();
+
+        int size = Math.abs(rand3.nextInt()) % 9 + 1;
+        String[] samples = new String[size];
+        for (int k = 0; k < size; k++) {
+            buf = new StringBuilder(header);
+            buf.append("-");
+            for (int i = 0; i < 10; i++) {
+                buf.append(Math.abs(rand3.nextInt()) % 10);
+            }
+            samples[k] = buf.toString();
+        }
+
+        return samples;
+    }
+
+    private double merge(int round) throws IOException {
+        int ln = 20;
+        int dn = 100 * (round + 1);
+        Set<String> testSet = new HashSet<String>();
+        HyperLogLogPlusCounterNew[] hllcs = new HyperLogLogPlusCounterNew[ln];
+        for (int i = 0; i < ln; i++) {
+            hllcs[i] = newHLLC();
+            for (int k = 0; k < dn; k++) {
+                String[] samples = generateSampleData();
+                for (String data : samples) {
+                    testSet.add(data);
+                    hllcs[i].add(Bytes.toBytes(data));
+                }
+            }
+        }
+        HyperLogLogPlusCounterNew mergeHllc = newHLLC();
+        for (HyperLogLogPlusCounterNew hllc : hllcs) {
+            mergeHllc.merge(hllc);
+        }
+
+        double errorRate = mergeHllc.getErrorRate();
+        long estimate = mergeHllc.getCountEstimate();
+        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+        Assert.assertTrue(actualError < 0.1);
+
+        if (actualError > errorRate) {
+            errorCount1++;
+        }
+        if (actualError > 2 * errorRate) {
+            errorCount2++;
+        }
+        if (actualError > 3 * errorRate) {
+            errorCount3++;
+        }
+
+        return actualError;
+    }
+
+    private HyperLogLogPlusCounterNew newHLLC() {
+        return new HyperLogLogPlusCounterNew(16);
+    }
+
+    private void count(int n) throws IOException {
+        Set<String> testSet = generateTestData(n);
+
+        HyperLogLogPlusCounterNew hllc = newHLLC();
+        for (String testData : testSet) {
+            hllc.add(Bytes.toBytes(testData));
+        }
+        long estimate = hllc.getCountEstimate();
+        double errorRate = hllc.getErrorRate();
+        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+        System.out.println(estimate);
+        System.out.println(testSet.size());
+        System.out.println(errorRate);
+        System.out.println("=" + actualError);
+        Assert.assertTrue(actualError < errorRate * 3.0);
+
+        checkSerialize(hllc);
+    }
+
+    private void checkSerialize(HyperLogLogPlusCounterNew hllc) throws IOException {
+        long estimate = hllc.getCountEstimate();
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        hllc.readRegisters(buf);
+        Assert.assertEquals(estimate, hllc.getCountEstimate());
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
new file mode 100644
index 0000000..bfb87f9
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hll2;
+
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+public class NewHyperLogLogBenchmarkTest {
+
+    public static final Random rand = new Random(1);
+
+    final int testTimes = 10000;
+
+    @Test
+    public void denseToDenseRegisterMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+
+        System.out.println("m : " + m);
+        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
+        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
+            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
+            final HyperLogLogPlusCounterNew newCounter2 = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
+            for (int i = 0; i < testTimes; i++)
+                newCounter2.add(i);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.DENSE, newCounter2.getRegisterType());
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
+    }
+
+    @Test
+    public void sparseToSparseMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        System.out.println("m : " + m);
+        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
+        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
+            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
+            final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
+    }
+
+    @Test
+    public void sparseToDenseRegisterMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        System.out.println("m : " + m);
+        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
+        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
+            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
+            final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
+    }
+
+    @Test
+    public void sparseSerializeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
+        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        oldCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        oldCounter.readRegisters(buf);
+                    }
+                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        newCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        newCounter.readRegisters(buf);
+                    }
+                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            System.out.println("old serialize time : " + oldTime);
+            System.out.println("new serialize time : " + newTime);
+        }
+        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
+    }
+
+    @Test
+    public void denseSerializeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
+        HyperLogLogPlusCounterNew.overflowFactor = 0; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        oldCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        oldCounter.readRegisters(buf);
+                    }
+                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality, RegisterType.DENSE);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        newCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        newCounter.readRegisters(buf);
+                    }
+                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            System.out.println("old serialize time : " + oldTime);
+            System.out.println("new serialize time : " + newTime);
+        }
+        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
+    }
+
+    interface TestCase {
+        void run() throws Exception;
+    }
+
+    public long runTestCase(TestCase testCase) throws Exception {
+        long startTime = System.currentTimeMillis();
+        testCase.run();
+        return System.currentTimeMillis() - startTime;
+    }
+
+    public HyperLogLogPlusCounterOld getRandOldCounter(int p, int num) {
+        HyperLogLogPlusCounterOld c = new HyperLogLogPlusCounterOld(p);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num) {
+        HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num, RegisterType type) {
+        HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p, type);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public static int[] getTestDataDivide(int m) {
+        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index 21af1e6..5445491 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -53,7 +53,7 @@ import org.apache.kylin.cube.kv.CubeDimEncMap;
 import org.apache.kylin.cube.kv.RowKeyEncoder;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -76,7 +76,7 @@ public class CubeStatsReader {
     final int samplingPercentage;
     final int mapperNumberOfFirstBuild; // becomes meaningless after merge
     final double mapperOverlapRatioOfFirstBuild; // becomes meaningless after merge
-    final Map<Long, HyperLogLogPlusCounter> cuboidRowEstimatesHLL;
+    final Map<Long, HyperLogLogPlusCounterNew> cuboidRowEstimatesHLL;
     final CuboidScheduler cuboidScheduler;
 
     public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
@@ -96,7 +96,7 @@ public class CubeStatsReader {
             int percentage = 100;
             int mapperNumber = 0;
             double mapperOverlapRatio = 0;
-            Map<Long, HyperLogLogPlusCounter> counterMap = Maps.newHashMap();
+            Map<Long, HyperLogLogPlusCounterNew> counterMap = Maps.newHashMap();
 
             LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
             BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
@@ -108,7 +108,7 @@ public class CubeStatsReader {
                 } else if (key.get() == -2) {
                     mapperNumber = Bytes.toInt(value.getBytes());
                 } else if (key.get() > 0) {
-                    HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConfig.getCubeStatsHLLPrecision());
+                    HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConfig.getCubeStatsHLLPrecision());
                     ByteArray byteArray = new ByteArray(value.getBytes());
                     hll.readRegisters(byteArray.asBuffer());
                     counterMap.put(key.get(), hll);
@@ -161,9 +161,9 @@ public class CubeStatsReader {
         return mapperOverlapRatioOfFirstBuild;
     }
 
-    public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HyperLogLogPlusCounter> hllcMap, int samplingPercentage) {
+    public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HyperLogLogPlusCounterNew> hllcMap, int samplingPercentage) {
         Map<Long, Long> cuboidRowCountMap = Maps.newHashMap();
-        for (Map.Entry<Long, HyperLogLogPlusCounter> entry : hllcMap.entrySet()) {
+        for (Map.Entry<Long, HyperLogLogPlusCounterNew> entry : hllcMap.entrySet()) {
             // No need to adjust according sampling percentage. Assumption is that data set is far
             // more than cardinality. Even a percentage of the data should already see all cardinalities.
             cuboidRowCountMap.put(entry.getKey(), entry.getValue().getCountEstimate());

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
index 74a2107..219cdf2 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
@@ -33,17 +33,17 @@ import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 
 public class CubeStatsWriter {
 
     public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
-            Map<Long, HyperLogLogPlusCounter> cuboidHLLMap, int samplingPercentage) throws IOException {
+            Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage) throws IOException {
         writeCuboidStatistics(conf, outputPath, cuboidHLLMap, samplingPercentage, 0, 0);
     }
 
     public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
-            Map<Long, HyperLogLogPlusCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
+            Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
         Path seqFilePath = new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
 
         List<Long> allCuboids = new ArrayList<Long>();

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 776d750..0d388c7 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -47,7 +47,7 @@ import org.apache.kylin.engine.mr.KylinReducer;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -64,7 +64,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
     private List<TblColRef> columnList;
     private String statisticsOutput = null;
     private List<Long> baseCuboidRowCountInMappers;
-    protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+    protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = null;
     protected long baseCuboidId;
     protected CubeDesc cubeDesc;
     private long totalRowsBeforeMerge = 0;
@@ -156,7 +156,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
             // for hll
             long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
             for (Text value : values) {
-                HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(cubeConfig.getCubeStatsHLLPrecision());
+                HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(cubeConfig.getCubeStatsHLLPrecision());
                 ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
                 hll.readRegisters(bf);
 
@@ -270,7 +270,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
         if (isStatistics) {
             // output the hll info
             long grandTotal = 0;
-            for (HyperLogLogPlusCounter hll : cuboidHLLMap.values()) {
+            for (HyperLogLogPlusCounterNew hll : cuboidHLLMap.values()) {
                 grandTotal += hll.getCountEstimate();
             }
             double mapperOverlapRatio = grandTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grandTotal;

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index a5c8fc0..c0575f1 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -29,7 +29,7 @@ import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.model.TblColRef;
 
 import com.google.common.collect.Lists;
@@ -45,7 +45,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
     protected CuboidScheduler cuboidScheduler = null;
     protected int nRowKey;
     private Integer[][] allCuboidsBitSet = null;
-    private HyperLogLogPlusCounter[] allCuboidsHLL = null;
+    private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
     private Long[] cuboidIds;
     private HashFunction hf = null;
     private int rowCount = 0;
@@ -76,9 +76,9 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
             allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
             cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);
 
-            allCuboidsHLL = new HyperLogLogPlusCounter[cuboidIds.length];
+            allCuboidsHLL = new HyperLogLogPlusCounterNew[cuboidIds.length];
             for (int i = 0; i < cuboidIds.length; i++) {
-                allCuboidsHLL[i] = new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
+                allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision());
             }
 
             hf = Hashing.murmur3_32();
@@ -207,7 +207,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
         if (collectStatistics) {
             ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
             // output each cuboid's hll to reducer, key is 0 - cuboidId
-            HyperLogLogPlusCounter hll;
+            HyperLogLogPlusCounterNew hll;
             for (int i = 0; i < cuboidIds.length; i++) {
                 hll = allCuboidsHLL[i];
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index 88f6ba2..e839989 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -47,7 +47,7 @@ import org.apache.kylin.job.exception.ExecuteException;
 import org.apache.kylin.job.execution.AbstractExecutable;
 import org.apache.kylin.job.execution.ExecutableContext;
 import org.apache.kylin.job.execution.ExecuteResult;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -56,7 +56,7 @@ import com.google.common.collect.Maps;
 public class MergeStatisticsStep extends AbstractExecutable {
     private static final Logger logger = LoggerFactory.getLogger(MergeStatisticsStep.class);
 
-    protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = Maps.newHashMap();
+    protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
 
     public MergeStatisticsStep() {
         super();
@@ -100,7 +100,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
                             // sampling percentage;
                             averageSamplingPercentage += Bytes.toInt(value.getBytes());
                         } else if (key.get() > 0) {
-                            HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(kylinConf.getCubeStatsHLLPrecision());
+                            HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConf.getCubeStatsHLLPrecision());
                             ByteArray byteArray = new ByteArray(value.getBytes());
                             hll.readRegisters(byteArray.asBuffer());
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
index 89d23fa..cae3b62 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
@@ -24,7 +24,7 @@ import java.util.List;
 import org.apache.commons.lang.RandomStringUtils;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -45,7 +45,7 @@ public class CubeSamplingTest {
     private Integer[][] allCuboidsBitSet;
     private HashFunction hf = null;
     private long baseCuboidId;
-    private HyperLogLogPlusCounter[] allCuboidsHLL = null;
+    private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
     private final byte[] seperator = Bytes.toBytes(",");
 
     @Before
@@ -61,9 +61,9 @@ public class CubeSamplingTest {
 
         allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[allCuboidsBitSetList.size()][]);
         System.out.println("Totally have " + allCuboidsBitSet.length + " cuboids.");
-        allCuboidsHLL = new HyperLogLogPlusCounter[allCuboids.size()];
+        allCuboidsHLL = new HyperLogLogPlusCounterNew[allCuboids.size()];
         for (int i = 0; i < allCuboids.size(); i++) {
-            allCuboidsHLL[i] = new HyperLogLogPlusCounter(14);
+            allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(14);
         }
 
         //  hf = Hashing.goodFastHash(32);

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
index ca8684f..a00db94 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kylin.engine.mr.HadoopUtil;
 import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.junit.Test;
 
 import com.google.common.collect.Maps;
@@ -48,7 +48,7 @@ public class FactDistinctColumnsReducerTest {
         }
 
         System.out.println(outputPath);
-        Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = Maps.newHashMap();
+        Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
         CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
         FileSystem.getLocal(conf).delete(outputPath, true);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
index 10c74f3..76212c8 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
@@ -83,7 +83,7 @@ import org.apache.kylin.engine.spark.cube.DefaultTupleConverter;
 import org.apache.kylin.engine.spark.util.IteratorUtils;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.MeasureAggregators;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -241,15 +241,15 @@ public class SparkCubing extends AbstractApplication {
         }
     }
 
-    private Map<Long, HyperLogLogPlusCounter> sampling(final JavaRDD<List<String>> rowJavaRDD, final String cubeName, String segmentId) throws Exception {
+    private Map<Long, HyperLogLogPlusCounterNew> sampling(final JavaRDD<List<String>> rowJavaRDD, final String cubeName, String segmentId) throws Exception {
         CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).reloadCubeLocal(cubeName);
         CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
         CubeDesc cubeDesc = cubeInstance.getDescriptor();
         CuboidScheduler cuboidScheduler = new CuboidScheduler(cubeDesc);
         List<Long> allCuboidIds = cuboidScheduler.getAllCuboidIds();
-        final HashMap<Long, HyperLogLogPlusCounter> zeroValue = Maps.newHashMap();
+        final HashMap<Long, HyperLogLogPlusCounterNew> zeroValue = Maps.newHashMap();
         for (Long id : allCuboidIds) {
-            zeroValue.put(id, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
+            zeroValue.put(id, new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
         }
 
         CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
@@ -278,12 +278,12 @@ public class SparkCubing extends AbstractApplication {
             row_hashcodes[i] = new ByteArray();
         }
 
-        final HashMap<Long, HyperLogLogPlusCounter> samplingResult = rowJavaRDD.aggregate(zeroValue, new Function2<HashMap<Long, HyperLogLogPlusCounter>, List<String>, HashMap<Long, HyperLogLogPlusCounter>>() {
+        final HashMap<Long, HyperLogLogPlusCounterNew> samplingResult = rowJavaRDD.aggregate(zeroValue, new Function2<HashMap<Long, HyperLogLogPlusCounterNew>, List<String>, HashMap<Long, HyperLogLogPlusCounterNew>>() {
 
             final HashFunction hashFunction = Hashing.murmur3_128();
 
             @Override
-            public HashMap<Long, HyperLogLogPlusCounter> call(HashMap<Long, HyperLogLogPlusCounter> v1, List<String> v2) throws Exception {
+            public HashMap<Long, HyperLogLogPlusCounterNew> call(HashMap<Long, HyperLogLogPlusCounterNew> v1, List<String> v2) throws Exception {
                 for (int i = 0; i < nRowKey; i++) {
                     Hasher hc = hashFunction.newHasher();
                     String colValue = v2.get(rowKeyColumnIndexes[i]);
@@ -296,7 +296,7 @@ public class SparkCubing extends AbstractApplication {
 
                 for (Map.Entry<Long, Integer[]> entry : allCuboidsBitSet.entrySet()) {
                     Hasher hc = hashFunction.newHasher();
-                    HyperLogLogPlusCounter counter = v1.get(entry.getKey());
+                    HyperLogLogPlusCounterNew counter = v1.get(entry.getKey());
                     final Integer[] cuboidBitSet = entry.getValue();
                     for (int position = 0; position < cuboidBitSet.length; position++) {
                         hc.putBytes(row_hashcodes[cuboidBitSet[position]].array());
@@ -305,14 +305,14 @@ public class SparkCubing extends AbstractApplication {
                 }
                 return v1;
             }
-        }, new Function2<HashMap<Long, HyperLogLogPlusCounter>, HashMap<Long, HyperLogLogPlusCounter>, HashMap<Long, HyperLogLogPlusCounter>>() {
+        }, new Function2<HashMap<Long, HyperLogLogPlusCounterNew>, HashMap<Long, HyperLogLogPlusCounterNew>, HashMap<Long, HyperLogLogPlusCounterNew>>() {
             @Override
-            public HashMap<Long, HyperLogLogPlusCounter> call(HashMap<Long, HyperLogLogPlusCounter> v1, HashMap<Long, HyperLogLogPlusCounter> v2) throws Exception {
+            public HashMap<Long, HyperLogLogPlusCounterNew> call(HashMap<Long, HyperLogLogPlusCounterNew> v1, HashMap<Long, HyperLogLogPlusCounterNew> v2) throws Exception {
                 Preconditions.checkArgument(v1.size() == v2.size());
                 Preconditions.checkArgument(v1.size() > 0);
-                for (Map.Entry<Long, HyperLogLogPlusCounter> entry : v1.entrySet()) {
-                    final HyperLogLogPlusCounter counter1 = entry.getValue();
-                    final HyperLogLogPlusCounter counter2 = v2.get(entry.getKey());
+                for (Map.Entry<Long, HyperLogLogPlusCounterNew> entry : v1.entrySet()) {
+                    final HyperLogLogPlusCounterNew counter1 = entry.getValue();
+                    final HyperLogLogPlusCounterNew counter2 = v2.get(entry.getKey());
                     counter1.merge(Preconditions.checkNotNull(counter2, "counter cannot be null"));
                 }
                 return v1;
@@ -470,7 +470,7 @@ public class SparkCubing extends AbstractApplication {
         ClassUtil.addClasspath(confPath);
     }
 
-    private byte[][] createHTable(String cubeName, String segmentId, Map<Long, HyperLogLogPlusCounter> samplingResult) throws Exception {
+    private byte[][] createHTable(String cubeName, String segmentId, Map<Long, HyperLogLogPlusCounterNew> samplingResult) throws Exception {
         final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
         final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
         final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
@@ -614,7 +614,7 @@ public class SparkCubing extends AbstractApplication {
             }
         });
 
-        final Map<Long, HyperLogLogPlusCounter> samplingResult = sampling(rowJavaRDD, cubeName, segmentId);
+        final Map<Long, HyperLogLogPlusCounterNew> samplingResult = sampling(rowJavaRDD, cubeName, segmentId);
         final byte[][] splitKeys = createHTable(cubeName, segmentId, samplingResult);
 
         final String hfile = build(rowJavaRDD, cubeName, segmentId, splitKeys);

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
index 06a07ca..230249f 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
@@ -35,18 +35,18 @@ import org.apache.kylin.engine.mr.MRUtil;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.TableDesc;
 
 /**
  * @author Jack
- * 
+ *
  */
 public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritable, BytesWritable> {
 
-    private Map<Integer, HyperLogLogPlusCounter> hllcMap = new HashMap<Integer, HyperLogLogPlusCounter>();
+    private Map<Integer, HyperLogLogPlusCounterNew> hllcMap = new HashMap<Integer, HyperLogLogPlusCounterNew>();
     public static final String DEFAULT_DELIM = ",";
 
     private int counter = 0;
@@ -87,9 +87,9 @@ public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritab
         counter++;
     }
 
-    private HyperLogLogPlusCounter getHllc(Integer key) {
+    private HyperLogLogPlusCounterNew getHllc(Integer key) {
         if (!hllcMap.containsKey(key)) {
-            hllcMap.put(key, new HyperLogLogPlusCounter());
+            hllcMap.put(key, new HyperLogLogPlusCounterNew());
         }
         return hllcMap.get(key);
     }
@@ -100,7 +100,7 @@ public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritab
         ByteBuffer buf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
         while (it.hasNext()) {
             int key = it.next();
-            HyperLogLogPlusCounter hllc = hllcMap.get(key);
+            HyperLogLogPlusCounterNew hllc = hllcMap.get(key);
             buf.clear();
             hllc.writeRegisters(buf);
             buf.flip();

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
index ea66999..32cc6d9 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
@@ -32,7 +32,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.engine.mr.KylinReducer;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 
 /**
  * @author Jack
@@ -41,7 +41,7 @@ import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
 public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWritable, IntWritable, LongWritable> {
 
     public static final int ONE = 1;
-    private Map<Integer, HyperLogLogPlusCounter> hllcMap = new HashMap<Integer, HyperLogLogPlusCounter>();
+    private Map<Integer, HyperLogLogPlusCounterNew> hllcMap = new HashMap<Integer, HyperLogLogPlusCounterNew>();
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -53,16 +53,16 @@ public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWri
         int skey = key.get();
         for (BytesWritable v : values) {
             ByteBuffer buffer = ByteBuffer.wrap(v.getBytes());
-            HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter();
+            HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew();
             hll.readRegisters(buffer);
             getHllc(skey).merge(hll);
             hll.clear();
         }
     }
 
-    private HyperLogLogPlusCounter getHllc(Integer key) {
+    private HyperLogLogPlusCounterNew getHllc(Integer key) {
         if (!hllcMap.containsKey(key)) {
-            hllcMap.put(key, new HyperLogLogPlusCounter());
+            hllcMap.put(key, new HyperLogLogPlusCounterNew());
         }
         return hllcMap.get(key);
     }
@@ -78,7 +78,7 @@ public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWri
         it = keys.iterator();
         while (it.hasNext()) {
             int key = it.next();
-            HyperLogLogPlusCounter hllc = hllcMap.get(key);
+            HyperLogLogPlusCounterNew hllc = hllcMap.get(key);
             ByteBuffer buf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
             buf.clear();
             hllc.writeRegisters(buf);

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
----------------------------------------------------------------------
diff --git a/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java b/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
index d27860a..410543a 100644
--- a/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
+++ b/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
 import org.apache.hadoop.mrunit.types.Pair;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -57,7 +57,7 @@ public class ColumnCardinalityReducerTest {
     }
 
     private byte[] getBytes(String str) throws IOException {
-        HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter();
+        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew();
         StringTokenizer tokenizer = new StringTokenizer(str, ColumnCardinalityMapper.DEFAULT_DELIM);
         int i = 0;
         while (tokenizer.hasMoreTokens()) {


[4/5] kylin git commit: KYLIN-1832 code review

Posted by li...@apache.org.
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
deleted file mode 100644
index 5d17fea..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hll;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * @author yangli9
- * 
- */
-public class HyperLogLogCounterOldTest {
-
-    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-    Random rand1 = new Random(1);
-    Random rand2 = new Random(2);
-    Random rand3 = new Random(3);
-    int errorCount1 = 0;
-    int errorCount2 = 0;
-    int errorCount3 = 0;
-
-    @Test
-    public void testOneAdd() throws IOException {
-        HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(14);
-        HyperLogLogPlusCounterOld one = new HyperLogLogPlusCounterOld(14);
-        for (int i = 0; i < 1000000; i++) {
-            one.clear();
-            one.add(rand1.nextInt());
-            hllc.merge(one);
-        }
-        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
-    }
-
-    @Test
-    public void testPeekLength() throws IOException {
-        HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(10);
-        HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(10);
-        byte[] value = new byte[10];
-        for (int i = 0; i < 200000; i++) {
-            rand1.nextBytes(value);
-            hllc.add(value);
-
-            buf.clear();
-            hllc.writeRegisters(buf);
-
-            int len = buf.position();
-            buf.position(0);
-            assertEquals(len, hllc.peekLength(buf));
-
-            copy.readRegisters(buf);
-            assertEquals(len, buf.position());
-            assertEquals(hllc, copy);
-        }
-        buf.clear();
-    }
-
-    private Set<String> generateTestData(int n) {
-        Set<String> testData = new HashSet<String>();
-        for (int i = 0; i < n; i++) {
-            String[] samples = generateSampleData();
-            for (String sample : samples) {
-                testData.add(sample);
-            }
-        }
-        return testData;
-    }
-
-    // simulate the visit (=visitor+id)
-    private String[] generateSampleData() {
-
-        StringBuilder buf = new StringBuilder();
-        for (int i = 0; i < 19; i++) {
-            buf.append(Math.abs(rand1.nextInt()) % 10);
-        }
-        String header = buf.toString();
-
-        int size = Math.abs(rand3.nextInt()) % 9 + 1;
-        String[] samples = new String[size];
-        for (int k = 0; k < size; k++) {
-            buf = new StringBuilder(header);
-            buf.append("-");
-            for (int i = 0; i < 10; i++) {
-                buf.append(Math.abs(rand3.nextInt()) % 10);
-            }
-            samples[k] = buf.toString();
-        }
-
-        return samples;
-    }
-
-    @Test
-    public void countTest() throws IOException {
-        int n = 10;
-        for (int i = 0; i < 5; i++) {
-            count(n);
-            n *= 10;
-        }
-    }
-
-    private void count(int n) throws IOException {
-        Set<String> testSet = generateTestData(n);
-
-        HyperLogLogPlusCounterOld hllc = newHLLC();
-        for (String testData : testSet) {
-            hllc.add(Bytes.toBytes(testData));
-        }
-        long estimate = hllc.getCountEstimate();
-        double errorRate = hllc.getErrorRate();
-        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
-        System.out.println(estimate);
-        System.out.println(testSet.size());
-        System.out.println(errorRate);
-        System.out.println("=" + actualError);
-        Assert.assertTrue(actualError < errorRate * 3.0);
-
-        checkSerialize(hllc);
-    }
-
-    private void checkSerialize(HyperLogLogPlusCounterOld hllc) throws IOException {
-        long estimate = hllc.getCountEstimate();
-        buf.clear();
-        hllc.writeRegisters(buf);
-        buf.flip();
-        hllc.readRegisters(buf);
-        Assert.assertEquals(estimate, hllc.getCountEstimate());
-    }
-
-    @Test
-    public void mergeTest() throws IOException {
-        double error = 0;
-        int n = 100;
-        for (int i = 0; i < n; i++) {
-            double e = merge(i);
-            error += e;
-        }
-        System.out.println("Total average error is " + error / n);
-
-        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
-        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
-        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
-
-        Assert.assertTrue(errorCount1 <= n * 0.30);
-        Assert.assertTrue(errorCount2 <= n * 0.05);
-        Assert.assertTrue(errorCount3 <= n * 0.02);
-    }
-
-    private double merge(int round) throws IOException {
-        int ln = 20;
-        int dn = 100 * (round + 1);
-        Set<String> testSet = new HashSet<String>();
-        HyperLogLogPlusCounterOld[] hllcs = new HyperLogLogPlusCounterOld[ln];
-        for (int i = 0; i < ln; i++) {
-            hllcs[i] = newHLLC();
-            for (int k = 0; k < dn; k++) {
-                String[] samples = generateSampleData();
-                for (String data : samples) {
-                    testSet.add(data);
-                    hllcs[i].add(Bytes.toBytes(data));
-                }
-            }
-        }
-        HyperLogLogPlusCounterOld mergeHllc = newHLLC();
-        for (HyperLogLogPlusCounterOld hllc : hllcs) {
-            mergeHllc.merge(serDes(hllc));
-        }
-
-        double errorRate = mergeHllc.getErrorRate();
-        long estimate = mergeHllc.getCountEstimate();
-        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
-
-        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
-        Assert.assertTrue(actualError < 0.1);
-
-        if (actualError > errorRate) {
-            errorCount1++;
-        }
-        if (actualError > 2 * errorRate) {
-            errorCount2++;
-        }
-        if (actualError > 3 * errorRate) {
-            errorCount3++;
-        }
-
-        return actualError;
-    }
-
-    private HyperLogLogPlusCounterOld serDes(HyperLogLogPlusCounterOld hllc) throws IOException {
-        buf.clear();
-        hllc.writeRegisters(buf);
-        buf.flip();
-        HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(hllc.getPrecision());
-        copy.readRegisters(buf);
-        Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
-        return copy;
-    }
-
-    @Test
-    public void testPerformance() throws IOException {
-        int N = 3; // reduce N HLLC into one
-        int M = 1000; // for M times, use 100000 for real perf test
-
-        HyperLogLogPlusCounterOld samples[] = new HyperLogLogPlusCounterOld[N];
-        for (int i = 0; i < N; i++) {
-            samples[i] = newHLLC();
-            for (String str : generateTestData(10000))
-                samples[i].add(str);
-        }
-
-        System.out.println("Perf test running ... ");
-        long start = System.currentTimeMillis();
-        HyperLogLogPlusCounterOld sum = newHLLC();
-        for (int i = 0; i < M; i++) {
-            sum.clear();
-            for (int j = 0; j < N; j++) {
-                sum.merge(samples[j]);
-                checkSerialize(sum);
-            }
-        }
-        long duration = System.currentTimeMillis() - start;
-        System.out.println("Perf test result: " + duration / 1000 + " seconds");
-    }
-
-    @Test
-    public void testEquivalence() {
-        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
-        byte[] b = new byte[] { 3, 4, 42 };
-        HyperLogLogPlusCounterOld ha = new HyperLogLogPlusCounterOld();
-        HyperLogLogPlusCounterOld hb = new HyperLogLogPlusCounterOld();
-        ha.add(a, 1, 3);
-        hb.add(b);
-
-        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
-    }
-
-    private HyperLogLogPlusCounterOld newHLLC() {
-        return new HyperLogLogPlusCounterOld(16);
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
deleted file mode 100644
index feb8c8e..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-package org.apache.kylin.measure.hll2;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Created by xiefan on 16-12-12.
- */
-public class HyperLogLogCounterNewTest {
-    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-    Random rand1 = new Random(1);
-    Random rand2 = new Random(2);
-    Random rand3 = new Random(3);
-    int errorCount1 = 0;
-    int errorCount2 = 0;
-    int errorCount3 = 0;
-
-    @Test
-    public void testOneAdd() throws IOException {
-        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
-        HyperLogLogPlusCounterNew one = new HyperLogLogPlusCounterNew(14);
-        for (int i = 0; i < 1000000; i++) {
-            one.clear();
-            one.add(rand1.nextInt());
-            hllc.merge(one);
-        }
-        System.out.println(hllc.getCountEstimate());
-        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
-    }
-
-    @Test
-    public void tesSparseEstimate() throws IOException {
-        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
-        for (int i = 0; i < 10; i++) {
-            hllc.add(i);
-        }
-        System.out.println(hllc.getCountEstimate());
-        assertTrue(hllc.getCountEstimate() > 10 * 0.9);
-    }
-
-    @Test
-    public void countTest() throws IOException {
-        int n = 10;
-        for (int i = 0; i < 5; i++) {
-            count(n);
-            n *= 10;
-        }
-    }
-
-    @Test
-    public void mergeTest() throws IOException {
-        double error = 0;
-        int n = 100;
-        for (int i = 0; i < n; i++) {
-            double e = merge(i);
-            error += e;
-        }
-        System.out.println("Total average error is " + error / n);
-
-        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
-        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
-        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
-
-        Assert.assertTrue(errorCount1 <= n * 0.30);
-        Assert.assertTrue(errorCount2 <= n * 0.05);
-        Assert.assertTrue(errorCount3 <= n * 0.02);
-    }
-
-    /*
-    compare the result of two different hll counter
-     */
-    @Test
-    public void compareResult() {
-        int p = 12; //4096
-        int m = 1 << p;
-    
-        for (int t = 0; t < 5; t++) {
-            //compare sparse
-            HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
-            HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
-    
-            for (int i = 0; i < 20; i++) {
-                //int r = rand1.nextInt();
-                oldCounter.add(i);
-                newCounter.add(i);
-            }
-            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
-            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-            //compare dense
-            for (int i = 0; i < m; i++) {
-                oldCounter.add(i);
-                newCounter.add(i);
-            }
-            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-        }
-    
-    }
-
-    @Test
-    public void testPeekLength() throws IOException {
-        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(10);
-        HyperLogLogPlusCounterNew copy = new HyperLogLogPlusCounterNew(10);
-        byte[] value = new byte[10];
-        for (int i = 0; i < 200000; i++) {
-            rand1.nextBytes(value);
-            hllc.add(value);
-
-            buf.clear();
-            hllc.writeRegisters(buf);
-
-            int len = buf.position();
-            buf.position(0);
-            assertEquals(len, hllc.peekLength(buf));
-
-            copy.readRegisters(buf);
-            assertEquals(len, buf.position());
-            assertEquals(hllc, copy);
-        }
-        buf.clear();
-    }
-
-    @Test
-    public void testEquivalence() {
-        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
-        byte[] b = new byte[] { 3, 4, 42 };
-        HyperLogLogPlusCounterNew ha = new HyperLogLogPlusCounterNew();
-        HyperLogLogPlusCounterNew hb = new HyperLogLogPlusCounterNew();
-        ha.add(a, 1, 3);
-        hb.add(b);
-
-        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
-    }
-
-    @Test
-    public void testAutoChangeToSparse() {
-        int p = 15;
-        int m = 1 << p;
-        HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
-        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
-        double over = HyperLogLogPlusCounterNew.overflowFactor * m;
-        int overFlow = (int) over + 1000;
-        for (int i = 0; i < overFlow; i++)
-            counter.add(i);
-        assertEquals(RegisterType.DENSE, counter.getRegisterType());
-    }
-
-    @Test
-    public void testSerialilze() throws Exception {
-        //test sparse serialize
-        int p = 15;
-        int m = 1 << p;
-        HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
-        counter.add(123);
-        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
-        checkSerialize(counter);
-        //test dense serialize
-        double over = HyperLogLogPlusCounterNew.overflowFactor * m;
-        int overFlow = (int) over + 1000;
-        for (int i = 0; i < overFlow; i++)
-            counter.add(i);
-        assertEquals(RegisterType.DENSE, counter.getRegisterType());
-        checkSerialize(counter);
-    }
-
-    private Set<String> generateTestData(int n) {
-        Set<String> testData = new HashSet<String>();
-        for (int i = 0; i < n; i++) {
-            String[] samples = generateSampleData();
-            for (String sample : samples) {
-                testData.add(sample);
-            }
-        }
-        return testData;
-    }
-
-    // simulate the visit (=visitor+id)
-    private String[] generateSampleData() {
-
-        StringBuilder buf = new StringBuilder();
-        for (int i = 0; i < 19; i++) {
-            buf.append(Math.abs(rand1.nextInt()) % 10);
-        }
-        String header = buf.toString();
-
-        int size = Math.abs(rand3.nextInt()) % 9 + 1;
-        String[] samples = new String[size];
-        for (int k = 0; k < size; k++) {
-            buf = new StringBuilder(header);
-            buf.append("-");
-            for (int i = 0; i < 10; i++) {
-                buf.append(Math.abs(rand3.nextInt()) % 10);
-            }
-            samples[k] = buf.toString();
-        }
-
-        return samples;
-    }
-
-    private double merge(int round) throws IOException {
-        int ln = 20;
-        int dn = 100 * (round + 1);
-        Set<String> testSet = new HashSet<String>();
-        HyperLogLogPlusCounterNew[] hllcs = new HyperLogLogPlusCounterNew[ln];
-        for (int i = 0; i < ln; i++) {
-            hllcs[i] = newHLLC();
-            for (int k = 0; k < dn; k++) {
-                String[] samples = generateSampleData();
-                for (String data : samples) {
-                    testSet.add(data);
-                    hllcs[i].add(Bytes.toBytes(data));
-                }
-            }
-        }
-        HyperLogLogPlusCounterNew mergeHllc = newHLLC();
-        for (HyperLogLogPlusCounterNew hllc : hllcs) {
-            mergeHllc.merge(hllc);
-        }
-
-        double errorRate = mergeHllc.getErrorRate();
-        long estimate = mergeHllc.getCountEstimate();
-        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
-
-        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
-        Assert.assertTrue(actualError < 0.1);
-
-        if (actualError > errorRate) {
-            errorCount1++;
-        }
-        if (actualError > 2 * errorRate) {
-            errorCount2++;
-        }
-        if (actualError > 3 * errorRate) {
-            errorCount3++;
-        }
-
-        return actualError;
-    }
-
-    private HyperLogLogPlusCounterNew newHLLC() {
-        return new HyperLogLogPlusCounterNew(16);
-    }
-
-    private void count(int n) throws IOException {
-        Set<String> testSet = generateTestData(n);
-
-        HyperLogLogPlusCounterNew hllc = newHLLC();
-        for (String testData : testSet) {
-            hllc.add(Bytes.toBytes(testData));
-        }
-        long estimate = hllc.getCountEstimate();
-        double errorRate = hllc.getErrorRate();
-        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
-        System.out.println(estimate);
-        System.out.println(testSet.size());
-        System.out.println(errorRate);
-        System.out.println("=" + actualError);
-        Assert.assertTrue(actualError < errorRate * 3.0);
-
-        checkSerialize(hllc);
-    }
-
-    private void checkSerialize(HyperLogLogPlusCounterNew hllc) throws IOException {
-        long estimate = hllc.getCountEstimate();
-        buf.clear();
-        hllc.writeRegisters(buf);
-        buf.flip();
-        hllc.readRegisters(buf);
-        Assert.assertEquals(estimate, hllc.getCountEstimate());
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
deleted file mode 100644
index bfb87f9..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-package org.apache.kylin.measure.hll2;
-
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Test;
-
-import java.nio.ByteBuffer;
-import java.util.Random;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * Created by xiefan on 16-12-12.
- */
-public class NewHyperLogLogBenchmarkTest {
-
-    public static final Random rand = new Random(1);
-
-    final int testTimes = 10000;
-
-    @Test
-    public void denseToDenseRegisterMergeBenchmark() throws Exception {
-        final int p = 15;
-        int m = 1 << p;
-
-        System.out.println("m : " + m);
-        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
-        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
-            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
-            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
-            long oldTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-
-                    for (int i = 0; i < testTimes; i++) {
-                        oldCounter.merge(oldCounter2);
-                    }
-                }
-            });
-            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
-            final HyperLogLogPlusCounterNew newCounter2 = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
-            for (int i = 0; i < testTimes; i++)
-                newCounter2.add(i);
-            long newTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-                    for (int i = 0; i < testTimes; i++) {
-                        newCounter.merge(newCounter2);
-                    }
-                }
-            });
-            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            assertEquals(RegisterType.DENSE, newCounter2.getRegisterType());
-            System.out.println("----------------------------");
-            System.out.println("cardinality : " + cardinality);
-            System.out.println("old time : " + oldTime);
-            System.out.println("new time : " + newTime);
-        }
-        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
-    }
-
-    @Test
-    public void sparseToSparseMergeBenchmark() throws Exception {
-        final int p = 15;
-        int m = 1 << p;
-        System.out.println("m : " + m);
-        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
-        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
-            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
-            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
-            long oldTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-
-                    for (int i = 0; i < testTimes; i++) {
-                        oldCounter.merge(oldCounter2);
-                    }
-                }
-            });
-            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
-            final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
-            long newTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-                    for (int i = 0; i < testTimes; i++) {
-                        newCounter.merge(newCounter2);
-                    }
-                }
-            });
-            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
-            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
-            System.out.println("----------------------------");
-            System.out.println("cardinality : " + cardinality);
-            System.out.println("old time : " + oldTime);
-            System.out.println("new time : " + newTime);
-        }
-        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
-    }
-
-    @Test
-    public void sparseToDenseRegisterMergeBenchmark() throws Exception {
-        final int p = 15;
-        int m = 1 << p;
-        System.out.println("m : " + m);
-        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
-        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
-            System.out.println("----------------------------");
-            System.out.println("cardinality : " + cardinality);
-            final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
-            final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
-            long oldTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-                    for (int i = 0; i < testTimes; i++) {
-                        oldCounter.merge(oldCounter2);
-                    }
-                }
-            });
-            final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
-            final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
-            long newTime = runTestCase(new TestCase() {
-                @Override
-                public void run() {
-                    for (int i = 0; i < testTimes; i++) {
-                        newCounter.merge(newCounter2);
-                    }
-                }
-            });
-            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
-            System.out.println("old time : " + oldTime);
-            System.out.println("new time : " + newTime);
-        }
-        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
-    }
-
-    @Test
-    public void sparseSerializeBenchmark() throws Exception {
-        final int p = 15;
-        int m = 1 << p;
-        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
-        HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
-            System.out.println("----------------------------");
-            System.out.println("cardinality : " + cardinality);
-            final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
-            long oldTime = runTestCase(new TestCase() {
-                @Override
-                public void run() throws Exception {
-                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-                    long totalBytes = 0;
-                    for (int i = 0; i < testTimes; i++) {
-                        buf.clear();
-                        oldCounter.writeRegisters(buf);
-                        totalBytes += buf.position();
-                        buf.flip();
-                        oldCounter.readRegisters(buf);
-                    }
-                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
-                }
-            });
-            final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality);
-            long newTime = runTestCase(new TestCase() {
-                @Override
-                public void run() throws Exception {
-                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-                    long totalBytes = 0;
-                    for (int i = 0; i < testTimes; i++) {
-                        buf.clear();
-                        newCounter.writeRegisters(buf);
-                        totalBytes += buf.position();
-                        buf.flip();
-                        newCounter.readRegisters(buf);
-                    }
-                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
-                }
-            });
-            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
-            System.out.println("old serialize time : " + oldTime);
-            System.out.println("new serialize time : " + newTime);
-        }
-        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
-    }
-
-    @Test
-    public void denseSerializeBenchmark() throws Exception {
-        final int p = 15;
-        int m = 1 << p;
-        double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
-        HyperLogLogPlusCounterNew.overflowFactor = 0; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
-            System.out.println("----------------------------");
-            System.out.println("cardinality : " + cardinality);
-            final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
-            long oldTime = runTestCase(new TestCase() {
-                @Override
-                public void run() throws Exception {
-                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-                    long totalBytes = 0;
-                    for (int i = 0; i < testTimes; i++) {
-                        buf.clear();
-                        oldCounter.writeRegisters(buf);
-                        totalBytes += buf.position();
-                        buf.flip();
-                        oldCounter.readRegisters(buf);
-                    }
-                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
-                }
-            });
-            final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality, RegisterType.DENSE);
-            long newTime = runTestCase(new TestCase() {
-                @Override
-                public void run() throws Exception {
-                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-                    long totalBytes = 0;
-                    for (int i = 0; i < testTimes; i++) {
-                        buf.clear();
-                        newCounter.writeRegisters(buf);
-                        totalBytes += buf.position();
-                        buf.flip();
-                        newCounter.readRegisters(buf);
-                    }
-                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
-                }
-            });
-            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            System.out.println("old serialize time : " + oldTime);
-            System.out.println("new serialize time : " + newTime);
-        }
-        HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
-    }
-
-    interface TestCase {
-        void run() throws Exception;
-    }
-
-    public long runTestCase(TestCase testCase) throws Exception {
-        long startTime = System.currentTimeMillis();
-        testCase.run();
-        return System.currentTimeMillis() - startTime;
-    }
-
-    public HyperLogLogPlusCounterOld getRandOldCounter(int p, int num) {
-        HyperLogLogPlusCounterOld c = new HyperLogLogPlusCounterOld(p);
-        for (int i = 0; i < num; i++)
-            c.add(i);
-        return c;
-    }
-
-    public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num) {
-        HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p);
-        for (int i = 0; i < num; i++)
-            c.add(i);
-        return c;
-    }
-
-    public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num, RegisterType type) {
-        HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p, type);
-        for (int i = 0; i < num; i++)
-            c.add(i);
-        return c;
-    }
-
-    public static int[] getTestDataDivide(int m) {
-        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
new file mode 100644
index 0000000..c4a97cd
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * @author yangli9
+ * 
+ */
+@SuppressWarnings("deprecation")
+public class HLLCounterOldTest {
+
+    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+    Random rand1 = new Random(1);
+    Random rand2 = new Random(2);
+    Random rand3 = new Random(3);
+    int errorCount1 = 0;
+    int errorCount2 = 0;
+    int errorCount3 = 0;
+
+    @Test
+    public void testOneAdd() throws IOException {
+        HLLCounterOld hllc = new HLLCounterOld(14);
+        HLLCounterOld one = new HLLCounterOld(14);
+        for (int i = 0; i < 1000000; i++) {
+            one.clear();
+            one.add(rand1.nextInt());
+            hllc.merge(one);
+        }
+        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+    }
+
+    @Test
+    public void testPeekLength() throws IOException {
+        HLLCounterOld hllc = new HLLCounterOld(10);
+        HLLCounterOld copy = new HLLCounterOld(10);
+        byte[] value = new byte[10];
+        for (int i = 0; i < 200000; i++) {
+            rand1.nextBytes(value);
+            hllc.add(value);
+
+            buf.clear();
+            hllc.writeRegisters(buf);
+
+            int len = buf.position();
+            buf.position(0);
+            assertEquals(len, hllc.peekLength(buf));
+
+            copy.readRegisters(buf);
+            assertEquals(len, buf.position());
+            assertEquals(hllc, copy);
+        }
+        buf.clear();
+    }
+
+    private Set<String> generateTestData(int n) {
+        Set<String> testData = new HashSet<String>();
+        for (int i = 0; i < n; i++) {
+            String[] samples = generateSampleData();
+            for (String sample : samples) {
+                testData.add(sample);
+            }
+        }
+        return testData;
+    }
+
+    // simulate the visit (=visitor+id)
+    private String[] generateSampleData() {
+
+        StringBuilder buf = new StringBuilder();
+        for (int i = 0; i < 19; i++) {
+            buf.append(Math.abs(rand1.nextInt()) % 10);
+        }
+        String header = buf.toString();
+
+        int size = Math.abs(rand3.nextInt()) % 9 + 1;
+        String[] samples = new String[size];
+        for (int k = 0; k < size; k++) {
+            buf = new StringBuilder(header);
+            buf.append("-");
+            for (int i = 0; i < 10; i++) {
+                buf.append(Math.abs(rand3.nextInt()) % 10);
+            }
+            samples[k] = buf.toString();
+        }
+
+        return samples;
+    }
+
+    @Test
+    public void countTest() throws IOException {
+        int n = 10;
+        for (int i = 0; i < 5; i++) {
+            count(n);
+            n *= 10;
+        }
+    }
+
+    private void count(int n) throws IOException {
+        Set<String> testSet = generateTestData(n);
+
+        HLLCounterOld hllc = newHLLC();
+        for (String testData : testSet) {
+            hllc.add(Bytes.toBytes(testData));
+        }
+        long estimate = hllc.getCountEstimate();
+        double errorRate = hllc.getErrorRate();
+        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+        System.out.println(estimate);
+        System.out.println(testSet.size());
+        System.out.println(errorRate);
+        System.out.println("=" + actualError);
+        Assert.assertTrue(actualError < errorRate * 3.0);
+
+        checkSerialize(hllc);
+    }
+
+    private void checkSerialize(HLLCounterOld hllc) throws IOException {
+        long estimate = hllc.getCountEstimate();
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        hllc.readRegisters(buf);
+        Assert.assertEquals(estimate, hllc.getCountEstimate());
+    }
+
+    @Test
+    public void mergeTest() throws IOException {
+        double error = 0;
+        int n = 100;
+        for (int i = 0; i < n; i++) {
+            double e = merge(i);
+            error += e;
+        }
+        System.out.println("Total average error is " + error / n);
+
+        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
+        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
+        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
+
+        Assert.assertTrue(errorCount1 <= n * 0.30);
+        Assert.assertTrue(errorCount2 <= n * 0.05);
+        Assert.assertTrue(errorCount3 <= n * 0.02);
+    }
+
+    private double merge(int round) throws IOException {
+        int ln = 20;
+        int dn = 100 * (round + 1);
+        Set<String> testSet = new HashSet<String>();
+        HLLCounterOld[] hllcs = new HLLCounterOld[ln];
+        for (int i = 0; i < ln; i++) {
+            hllcs[i] = newHLLC();
+            for (int k = 0; k < dn; k++) {
+                String[] samples = generateSampleData();
+                for (String data : samples) {
+                    testSet.add(data);
+                    hllcs[i].add(Bytes.toBytes(data));
+                }
+            }
+        }
+        HLLCounterOld mergeHllc = newHLLC();
+        for (HLLCounterOld hllc : hllcs) {
+            mergeHllc.merge(serDes(hllc));
+        }
+
+        double errorRate = mergeHllc.getErrorRate();
+        long estimate = mergeHllc.getCountEstimate();
+        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+        Assert.assertTrue(actualError < 0.1);
+
+        if (actualError > errorRate) {
+            errorCount1++;
+        }
+        if (actualError > 2 * errorRate) {
+            errorCount2++;
+        }
+        if (actualError > 3 * errorRate) {
+            errorCount3++;
+        }
+
+        return actualError;
+    }
+
+    private HLLCounterOld serDes(HLLCounterOld hllc) throws IOException {
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        HLLCounterOld copy = new HLLCounterOld(hllc.getPrecision());
+        copy.readRegisters(buf);
+        Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
+        return copy;
+    }
+
+    @Test
+    public void testPerformance() throws IOException {
+        int N = 3; // reduce N HLLC into one
+        int M = 1000; // for M times, use 100000 for real perf test
+
+        HLLCounterOld samples[] = new HLLCounterOld[N];
+        for (int i = 0; i < N; i++) {
+            samples[i] = newHLLC();
+            for (String str : generateTestData(10000))
+                samples[i].add(str);
+        }
+
+        System.out.println("Perf test running ... ");
+        long start = System.currentTimeMillis();
+        HLLCounterOld sum = newHLLC();
+        for (int i = 0; i < M; i++) {
+            sum.clear();
+            for (int j = 0; j < N; j++) {
+                sum.merge(samples[j]);
+                checkSerialize(sum);
+            }
+        }
+        long duration = System.currentTimeMillis() - start;
+        System.out.println("Perf test result: " + duration / 1000 + " seconds");
+    }
+
+    @Test
+    public void testEquivalence() {
+        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+        byte[] b = new byte[] { 3, 4, 42 };
+        HLLCounterOld ha = new HLLCounterOld();
+        HLLCounterOld hb = new HLLCounterOld();
+        ha.add(a, 1, 3);
+        hb.add(b);
+
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+    }
+
+    private HLLCounterOld newHLLC() {
+        return new HLLCounterOld(16);
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
new file mode 100644
index 0000000..26ad4a7
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+@SuppressWarnings("deprecation")
+public class HLLCounterTest {
+    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+    Random rand1 = new Random(1);
+    Random rand2 = new Random(2);
+    Random rand3 = new Random(3);
+    int errorCount1 = 0;
+    int errorCount2 = 0;
+    int errorCount3 = 0;
+
+    @Test
+    public void testOneAdd() throws IOException {
+        HLLCounter hllc = new HLLCounter(14);
+        HLLCounter one = new HLLCounter(14);
+        for (int i = 0; i < 1000000; i++) {
+            one.clear();
+            one.add(rand1.nextInt());
+            hllc.merge(one);
+        }
+        System.out.println(hllc.getCountEstimate());
+        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+    }
+
+    @Test
+    public void tesSparseEstimate() throws IOException {
+        HLLCounter hllc = new HLLCounter(14);
+        for (int i = 0; i < 10; i++) {
+            hllc.add(i);
+        }
+        System.out.println(hllc.getCountEstimate());
+        assertTrue(hllc.getCountEstimate() > 10 * 0.9);
+    }
+
+    @Test
+    public void countTest() throws IOException {
+        int n = 10;
+        for (int i = 0; i < 5; i++) {
+            count(n);
+            n *= 10;
+        }
+    }
+
+    @Test
+    public void mergeTest() throws IOException {
+        double error = 0;
+        int n = 100;
+        for (int i = 0; i < n; i++) {
+            double e = merge(i);
+            error += e;
+        }
+        System.out.println("Total average error is " + error / n);
+
+        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
+        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
+        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
+
+        Assert.assertTrue(errorCount1 <= n * 0.30);
+        Assert.assertTrue(errorCount2 <= n * 0.05);
+        Assert.assertTrue(errorCount3 <= n * 0.02);
+    }
+
+    /* compare the result of two different hll counter */
+    @Test
+    public void compareResult() throws IOException {
+        int p = 12; //4096
+        int m = 1 << p;
+        
+        ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+    
+        for (int t = 0; t < 5; t++) {
+            //compare sparse
+            HLLCounterOld oldCounter = new HLLCounterOld(p);
+            HLLCounter newCounter = new HLLCounter(p);
+            HLLCounter newCounter2 = new HLLCounter(p);
+
+            for (int i = 0; i < 20; i++) {
+                int r = rand1.nextInt();
+                oldCounter.add(r);
+                newCounter.add(r);
+            }
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+            
+            buf.clear();
+            oldCounter.writeRegisters(buf);
+            buf.flip();
+            newCounter2.readRegisters(buf);
+            assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
+            
+            //compare dense
+            for (int i = 0; i < m / 2; i++) {
+                int r = rand1.nextInt();
+                oldCounter.add(r);
+                newCounter.add(r);
+            }
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+            
+            buf.clear();
+            oldCounter.writeRegisters(buf);
+            buf.flip();
+            newCounter2.readRegisters(buf);
+            assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
+        }
+    }
+
+    @Test
+    public void testPeekLength() throws IOException {
+        HLLCounter hllc = new HLLCounter(10);
+        HLLCounter copy = new HLLCounter(10);
+        byte[] value = new byte[10];
+        for (int i = 0; i < 200000; i++) {
+            rand1.nextBytes(value);
+            hllc.add(value);
+
+            buf.clear();
+            hllc.writeRegisters(buf);
+
+            int len = buf.position();
+            buf.position(0);
+            assertEquals(len, hllc.peekLength(buf));
+
+            copy.readRegisters(buf);
+            assertEquals(len, buf.position());
+            assertEquals(hllc, copy);
+        }
+        buf.clear();
+    }
+
+    @Test
+    public void testEquivalence() {
+        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+        byte[] b = new byte[] { 3, 4, 42 };
+        HLLCounter ha = new HLLCounter();
+        HLLCounter hb = new HLLCounter();
+        ha.add(a, 1, 3);
+        hb.add(b);
+
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+    }
+
+    @Test
+    public void testAutoChangeToSparse() {
+        int p = 15;
+        int m = 1 << p;
+        HLLCounter counter = new HLLCounter(p);
+        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+        double over = HLLCounter.OVERFLOW_FACTOR * m;
+        int overFlow = (int) over + 1000;
+        for (int i = 0; i < overFlow; i++)
+            counter.add(i);
+        assertEquals(RegisterType.DENSE, counter.getRegisterType());
+    }
+
+    @Test
+    public void testSerialilze() throws Exception {
+        //test sparse serialize
+        int p = 15;
+        int m = 1 << p;
+        HLLCounter counter = new HLLCounter(p);
+        counter.add(123);
+        assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+        checkSerialize(counter);
+        //test dense serialize
+        double over = HLLCounter.OVERFLOW_FACTOR * m;
+        int overFlow = (int) over + 1000;
+        for (int i = 0; i < overFlow; i++)
+            counter.add(i);
+        assertEquals(RegisterType.DENSE, counter.getRegisterType());
+        checkSerialize(counter);
+    }
+
+    private Set<String> generateTestData(int n) {
+        Set<String> testData = new HashSet<String>();
+        for (int i = 0; i < n; i++) {
+            String[] samples = generateSampleData();
+            for (String sample : samples) {
+                testData.add(sample);
+            }
+        }
+        return testData;
+    }
+
+    // simulate the visit (=visitor+id)
+    private String[] generateSampleData() {
+
+        StringBuilder buf = new StringBuilder();
+        for (int i = 0; i < 19; i++) {
+            buf.append(Math.abs(rand1.nextInt()) % 10);
+        }
+        String header = buf.toString();
+
+        int size = Math.abs(rand3.nextInt()) % 9 + 1;
+        String[] samples = new String[size];
+        for (int k = 0; k < size; k++) {
+            buf = new StringBuilder(header);
+            buf.append("-");
+            for (int i = 0; i < 10; i++) {
+                buf.append(Math.abs(rand3.nextInt()) % 10);
+            }
+            samples[k] = buf.toString();
+        }
+
+        return samples;
+    }
+
+    private double merge(int round) throws IOException {
+        int ln = 20;
+        int dn = 100 * (round + 1);
+        Set<String> testSet = new HashSet<String>();
+        HLLCounter[] hllcs = new HLLCounter[ln];
+        for (int i = 0; i < ln; i++) {
+            hllcs[i] = newHLLC();
+            for (int k = 0; k < dn; k++) {
+                String[] samples = generateSampleData();
+                for (String data : samples) {
+                    testSet.add(data);
+                    hllcs[i].add(Bytes.toBytes(data));
+                }
+            }
+        }
+        HLLCounter mergeHllc = newHLLC();
+        for (HLLCounter hllc : hllcs) {
+            mergeHllc.merge(hllc);
+        }
+
+        double errorRate = mergeHllc.getErrorRate();
+        long estimate = mergeHllc.getCountEstimate();
+        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+        Assert.assertTrue(actualError < 0.1);
+
+        if (actualError > errorRate) {
+            errorCount1++;
+        }
+        if (actualError > 2 * errorRate) {
+            errorCount2++;
+        }
+        if (actualError > 3 * errorRate) {
+            errorCount3++;
+        }
+
+        return actualError;
+    }
+
+    private HLLCounter newHLLC() {
+        return new HLLCounter(16);
+    }
+
+    private void count(int n) throws IOException {
+        Set<String> testSet = generateTestData(n);
+
+        HLLCounter hllc = newHLLC();
+        for (String testData : testSet) {
+            hllc.add(Bytes.toBytes(testData));
+        }
+        long estimate = hllc.getCountEstimate();
+        double errorRate = hllc.getErrorRate();
+        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+        System.out.println(estimate);
+        System.out.println(testSet.size());
+        System.out.println(errorRate);
+        System.out.println("=" + actualError);
+        Assert.assertTrue(actualError < errorRate * 3.0);
+
+        checkSerialize(hllc);
+    }
+
+    private void checkSerialize(HLLCounter hllc) throws IOException {
+        long estimate = hllc.getCountEstimate();
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        hllc.readRegisters(buf);
+        Assert.assertEquals(estimate, hllc.getCountEstimate());
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
new file mode 100644
index 0000000..586c007
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+@SuppressWarnings("deprecation")
+public class NewHyperLogLogBenchmarkTest {
+
+    public static final Random rand = new Random(1);
+
+    final int testTimes = 10000;
+
+    @Test
+    public void denseToDenseRegisterMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+
+        System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
+        double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+        HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            final HLLCounterOld oldCounter = new HLLCounterOld(p);
+            final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HLLCounter newCounter = new HLLCounter(p, RegisterType.DENSE);
+            final HLLCounter newCounter2 = new HLLCounter(p, RegisterType.DENSE);
+            for (int i = 0; i < testTimes; i++)
+                newCounter2.add(i);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.DENSE, newCounter2.getRegisterType());
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HLLCounter.OVERFLOW_FACTOR = oldFactor;
+    }
+
+    @Test
+    public void sparseToSparseMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        System.out.println("sparseToSparseMergeBenchmark(), m : " + m);
+        double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+        HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            final HLLCounterOld oldCounter = new HLLCounterOld(p);
+            final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HLLCounter newCounter = new HLLCounter(p);
+            final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HLLCounter.OVERFLOW_FACTOR = oldFactor;
+    }
+
+    @Test
+    public void sparseToDenseRegisterMergeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        System.out.println("sparseToDenseRegisterMergeBenchmark(), m : " + m);
+        double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+        HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HLLCounterOld oldCounter = new HLLCounterOld(p);
+            final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        oldCounter.merge(oldCounter2);
+                    }
+                }
+            });
+            final HLLCounter newCounter = new HLLCounter(p, RegisterType.DENSE);
+            final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() {
+                    for (int i = 0; i < testTimes; i++) {
+                        newCounter.merge(newCounter2);
+                    }
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            System.out.println("old time : " + oldTime);
+            System.out.println("new time : " + newTime);
+        }
+        HLLCounter.OVERFLOW_FACTOR = oldFactor;
+    }
+
+    @Test
+    public void sparseSerializeBenchmark() throws Exception {
+        final int p = 15;
+        int m = 1 << p;
+        double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+        HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+        System.out.println("sparseSerializeBenchmark()");
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HLLCounterOld oldCounter = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        oldCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        oldCounter.readRegisters(buf);
+                    }
+                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            final HLLCounter newCounter = getRandNewCounter(p, cardinality);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        newCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        newCounter.readRegisters(buf);
+                    }
+                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            System.out.println("old serialize time : " + oldTime);
+            System.out.println("new serialize time : " + newTime);
+        }
+        HLLCounter.OVERFLOW_FACTOR = oldFactor;
+    }
+
+    @Test
+    public void denseSerializeBenchmark() throws Exception {
+        final int p = 15;
+        final int m = 1 << p;
+        double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+        HLLCounter.OVERFLOW_FACTOR = 0; //keep sparse
+        System.out.println("denseSerializeBenchmark()");
+        for (int cardinality : getTestDataDivide(m)) {
+            System.out.println("----------------------------");
+            System.out.println("cardinality : " + cardinality);
+            final HLLCounterOld oldCounter = getRandOldCounter(p, cardinality);
+            long oldTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        oldCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        oldCounter.readRegisters(buf);
+                    }
+                    System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            final HLLCounter newCounter = getRandNewCounter(p, cardinality, RegisterType.DENSE);
+            long newTime = runTestCase(new TestCase() {
+                @Override
+                public void run() throws Exception {
+                    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+                    long totalBytes = 0;
+                    for (int i = 0; i < testTimes; i++) {
+                        buf.clear();
+                        newCounter.writeRegisters(buf);
+                        totalBytes += buf.position();
+                        buf.flip();
+                        newCounter.readRegisters(buf);
+                    }
+                    System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+                }
+            });
+            assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+            System.out.println("old serialize time : " + oldTime);
+            System.out.println("new serialize time : " + newTime);
+        }
+        HLLCounter.OVERFLOW_FACTOR = oldFactor;
+    }
+
+    interface TestCase {
+        void run() throws Exception;
+    }
+
+    public long runTestCase(TestCase testCase) throws Exception {
+        long startTime = System.currentTimeMillis();
+        testCase.run();
+        return System.currentTimeMillis() - startTime;
+    }
+
+    public HLLCounterOld getRandOldCounter(int p, int num) {
+        HLLCounterOld c = new HLLCounterOld(p);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public HLLCounter getRandNewCounter(int p, int num) {
+        HLLCounter c = new HLLCounter(p);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public HLLCounter getRandNewCounter(int p, int num, RegisterType type) {
+        HLLCounter c = new HLLCounter(p, type);
+        for (int i = 0; i < num; i++)
+            c.add(i);
+        return c;
+    }
+
+    public static int[] getTestDataDivide(int m) {
+        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index 5445491..ffba181 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -53,7 +53,7 @@ import org.apache.kylin.cube.kv.CubeDimEncMap;
 import org.apache.kylin.cube.kv.RowKeyEncoder;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -76,7 +76,7 @@ public class CubeStatsReader {
     final int samplingPercentage;
     final int mapperNumberOfFirstBuild; // becomes meaningless after merge
     final double mapperOverlapRatioOfFirstBuild; // becomes meaningless after merge
-    final Map<Long, HyperLogLogPlusCounterNew> cuboidRowEstimatesHLL;
+    final Map<Long, HLLCounter> cuboidRowEstimatesHLL;
     final CuboidScheduler cuboidScheduler;
 
     public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
@@ -96,7 +96,7 @@ public class CubeStatsReader {
             int percentage = 100;
             int mapperNumber = 0;
             double mapperOverlapRatio = 0;
-            Map<Long, HyperLogLogPlusCounterNew> counterMap = Maps.newHashMap();
+            Map<Long, HLLCounter> counterMap = Maps.newHashMap();
 
             LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
             BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
@@ -108,7 +108,7 @@ public class CubeStatsReader {
                 } else if (key.get() == -2) {
                     mapperNumber = Bytes.toInt(value.getBytes());
                 } else if (key.get() > 0) {
-                    HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConfig.getCubeStatsHLLPrecision());
+                    HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
                     ByteArray byteArray = new ByteArray(value.getBytes());
                     hll.readRegisters(byteArray.asBuffer());
                     counterMap.put(key.get(), hll);
@@ -161,9 +161,9 @@ public class CubeStatsReader {
         return mapperOverlapRatioOfFirstBuild;
     }
 
-    public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HyperLogLogPlusCounterNew> hllcMap, int samplingPercentage) {
+    public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HLLCounter> hllcMap, int samplingPercentage) {
         Map<Long, Long> cuboidRowCountMap = Maps.newHashMap();
-        for (Map.Entry<Long, HyperLogLogPlusCounterNew> entry : hllcMap.entrySet()) {
+        for (Map.Entry<Long, HLLCounter> entry : hllcMap.entrySet()) {
             // No need to adjust according sampling percentage. Assumption is that data set is far
             // more than cardinality. Even a percentage of the data should already see all cardinalities.
             cuboidRowCountMap.put(entry.getKey(), entry.getValue().getCountEstimate());

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
index 219cdf2..8f400c3 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
@@ -33,17 +33,17 @@ import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 
 public class CubeStatsWriter {
 
     public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
-            Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage) throws IOException {
+            Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage) throws IOException {
         writeCuboidStatistics(conf, outputPath, cuboidHLLMap, samplingPercentage, 0, 0);
     }
 
     public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
-            Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
+            Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
         Path seqFilePath = new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
 
         List<Long> allCuboids = new ArrayList<Long>();

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 0d388c7..3115fe4 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -47,7 +47,7 @@ import org.apache.kylin.engine.mr.KylinReducer;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -64,7 +64,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
     private List<TblColRef> columnList;
     private String statisticsOutput = null;
     private List<Long> baseCuboidRowCountInMappers;
-    protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = null;
+    protected Map<Long, HLLCounter> cuboidHLLMap = null;
     protected long baseCuboidId;
     protected CubeDesc cubeDesc;
     private long totalRowsBeforeMerge = 0;
@@ -156,7 +156,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
             // for hll
             long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
             for (Text value : values) {
-                HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(cubeConfig.getCubeStatsHLLPrecision());
+                HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision());
                 ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
                 hll.readRegisters(bf);
 
@@ -270,7 +270,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
         if (isStatistics) {
             // output the hll info
             long grandTotal = 0;
-            for (HyperLogLogPlusCounterNew hll : cuboidHLLMap.values()) {
+            for (HLLCounter hll : cuboidHLLMap.values()) {
                 grandTotal += hll.getCountEstimate();
             }
             double mapperOverlapRatio = grandTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grandTotal;

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index c0575f1..5692c76 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -29,7 +29,7 @@ import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.model.TblColRef;
 
 import com.google.common.collect.Lists;
@@ -45,7 +45,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
     protected CuboidScheduler cuboidScheduler = null;
     protected int nRowKey;
     private Integer[][] allCuboidsBitSet = null;
-    private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
+    private HLLCounter[] allCuboidsHLL = null;
     private Long[] cuboidIds;
     private HashFunction hf = null;
     private int rowCount = 0;
@@ -76,9 +76,9 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
             allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
             cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);
 
-            allCuboidsHLL = new HyperLogLogPlusCounterNew[cuboidIds.length];
+            allCuboidsHLL = new HLLCounter[cuboidIds.length];
             for (int i = 0; i < cuboidIds.length; i++) {
-                allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision());
+                allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
             }
 
             hf = Hashing.murmur3_32();
@@ -207,7 +207,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
         if (collectStatistics) {
             ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
             // output each cuboid's hll to reducer, key is 0 - cuboidId
-            HyperLogLogPlusCounterNew hll;
+            HLLCounter hll;
             for (int i = 0; i < cuboidIds.length; i++) {
                 hll = allCuboidsHLL[i];
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index e839989..811fc24 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -47,7 +47,7 @@ import org.apache.kylin.job.exception.ExecuteException;
 import org.apache.kylin.job.execution.AbstractExecutable;
 import org.apache.kylin.job.execution.ExecutableContext;
 import org.apache.kylin.job.execution.ExecuteResult;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -56,7 +56,7 @@ import com.google.common.collect.Maps;
 public class MergeStatisticsStep extends AbstractExecutable {
     private static final Logger logger = LoggerFactory.getLogger(MergeStatisticsStep.class);
 
-    protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
+    protected Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
 
     public MergeStatisticsStep() {
         super();
@@ -100,7 +100,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
                             // sampling percentage;
                             averageSamplingPercentage += Bytes.toInt(value.getBytes());
                         } else if (key.get() > 0) {
-                            HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConf.getCubeStatsHLLPrecision());
+                            HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision());
                             ByteArray byteArray = new ByteArray(value.getBytes());
                             hll.readRegisters(byteArray.asBuffer());
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
index cae3b62..beec00f 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
@@ -24,7 +24,7 @@ import java.util.List;
 import org.apache.commons.lang.RandomStringUtils;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -45,7 +45,7 @@ public class CubeSamplingTest {
     private Integer[][] allCuboidsBitSet;
     private HashFunction hf = null;
     private long baseCuboidId;
-    private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
+    private HLLCounter[] allCuboidsHLL = null;
     private final byte[] seperator = Bytes.toBytes(",");
 
     @Before
@@ -61,9 +61,9 @@ public class CubeSamplingTest {
 
         allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[allCuboidsBitSetList.size()][]);
         System.out.println("Totally have " + allCuboidsBitSet.length + " cuboids.");
-        allCuboidsHLL = new HyperLogLogPlusCounterNew[allCuboids.size()];
+        allCuboidsHLL = new HLLCounter[allCuboids.size()];
         for (int i = 0; i < allCuboids.size(); i++) {
-            allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(14);
+            allCuboidsHLL[i] = new HLLCounter(14);
         }
 
         //  hf = Hashing.goodFastHash(32);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
index a00db94..f6f790e 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kylin.engine.mr.HadoopUtil;
 import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.junit.Test;
 
 import com.google.common.collect.Maps;
@@ -48,7 +48,7 @@ public class FactDistinctColumnsReducerTest {
         }
 
         System.out.println(outputPath);
-        Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
+        Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
         CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
         FileSystem.getLocal(conf).delete(outputPath, true);
 


[2/5] kylin git commit: KYLIN-1832 HyperLogLog performance optimization

Posted by li...@apache.org.
KYLIN-1832 HyperLogLog performance optimization

Signed-off-by: Li Yang <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f05404d5
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f05404d5
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f05404d5

Branch: refs/heads/master
Commit: f05404d5576b52c70cf26eb1bccde1c27cd3852f
Parents: 5303651
Author: xiefan46 <95...@qq.com>
Authored: Fri Dec 9 16:53:04 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Wed Dec 14 11:07:42 2016 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/cube/util/CubingUtils.java |  14 +-
 .../apache/kylin/gridtable/UnitTestSupport.java |  22 +-
 .../benchmark/GTScannerBenchmark2.java          |   4 +-
 .../gridtable/AggregationCacheMemSizeTest.java  |   4 +-
 .../metadata/measure/MeasureCodecTest.java      |   4 +-
 .../org/apache/kylin/measure/MeasureType.java   |   2 +-
 .../kylin/measure/MeasureTypeFactory.java       |   2 +-
 .../kylin/measure/hllc/DenseRegister.java       |  91 +++++
 .../kylin/measure/hllc/HLLCAggregator.java      |  10 +-
 .../kylin/measure/hllc/HLLCMeasureType.java     |  20 +-
 .../kylin/measure/hllc/HLLCSerializer.java      |  16 +-
 .../measure/hllc/HLLDistinctCountAggFunc.java   |  22 +-
 .../measure/hllc/HyperLogLogPlusCounter.java    | 392 -------------------
 .../measure/hllc/HyperLogLogPlusCounterNew.java | 388 ++++++++++++++++++
 .../measure/hllc/HyperLogLogPlusCounterOld.java | 392 +++++++++++++++++++
 .../org/apache/kylin/measure/hllc/Register.java |  37 ++
 .../apache/kylin/measure/hllc/RegisterType.java |  25 ++
 .../kylin/measure/hllc/SparseRegister.java      |  98 +++++
 .../measure/AggregatorMemEstimateTest.java      |   4 +-
 .../measure/hll/HyperLogLogCounterOldTest.java  | 265 +++++++++++++
 .../measure/hll/HyperLogLogCounterTest.java     | 265 -------------
 .../measure/hll2/HyperLogLogCounterNewTest.java | 301 ++++++++++++++
 .../hll2/NewHyperLogLogBenchmarkTest.java       | 288 ++++++++++++++
 .../kylin/engine/mr/common/CubeStatsReader.java |  12 +-
 .../kylin/engine/mr/common/CubeStatsWriter.java |   6 +-
 .../mr/steps/FactDistinctColumnsReducer.java    |   8 +-
 .../mr/steps/FactDistinctHiveColumnsMapper.java |  10 +-
 .../engine/mr/steps/MergeStatisticsStep.java    |   6 +-
 .../kylin/engine/mr/steps/CubeSamplingTest.java |   8 +-
 .../steps/FactDistinctColumnsReducerTest.java   |   4 +-
 .../apache/kylin/engine/spark/SparkCubing.java  |  28 +-
 .../cardinality/ColumnCardinalityMapper.java    |  12 +-
 .../cardinality/ColumnCardinalityReducer.java   |  12 +-
 .../ColumnCardinalityReducerTest.java           |   4 +-
 34 files changed, 2002 insertions(+), 774 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index 413b907..35139a4 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -38,7 +38,7 @@ import org.apache.kylin.dict.DictionaryGenerator;
 import org.apache.kylin.dict.DictionaryInfo;
 import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.dict.IterableDictionaryValueEnumerator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.source.ReadableTable;
@@ -59,7 +59,7 @@ public class CubingUtils {
 
     private static Logger logger = LoggerFactory.getLogger(CubingUtils.class);
 
-    public static Map<Long, HyperLogLogPlusCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) {
+    public static Map<Long, HyperLogLogPlusCounterNew> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) {
         final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc);
         final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length;
         final List<Long> allCuboidIds = new CuboidScheduler(cubeDesc).getAllCuboidIds();
@@ -84,9 +84,9 @@ public class CubingUtils {
                 return result;
             }
         });
-        final Map<Long, HyperLogLogPlusCounter> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
+        final Map<Long, HyperLogLogPlusCounterNew> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
         for (Long cuboidId : allCuboidIds) {
-            result.put(cuboidId, new HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
+            result.put(cuboidId, new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
             Integer[] cuboidBitSet = new Integer[Long.bitCount(cuboidId)];
 
             long mask = Long.highestOneBit(baseCuboidId);
@@ -118,9 +118,9 @@ public class CubingUtils {
                 }
             }
 
-            for (Map.Entry<Long, HyperLogLogPlusCounter> longHyperLogLogPlusCounterEntry : result.entrySet()) {
-                Long cuboidId = longHyperLogLogPlusCounterEntry.getKey();
-                HyperLogLogPlusCounter counter = longHyperLogLogPlusCounterEntry.getValue();
+            for (Map.Entry<Long, HyperLogLogPlusCounterNew> longHyperLogLogPlusCounterNewEntry : result.entrySet()) {
+                Long cuboidId = longHyperLogLogPlusCounterNewEntry.getKey();
+                HyperLogLogPlusCounterNew counter = longHyperLogLogPlusCounterNewEntry.getValue();
                 Hasher hc = hf.newHasher();
                 final Integer[] cuboidBitSet = allCuboidsBitSet.get(cuboidId);
                 for (int position = 0; position < cuboidBitSet.length; position++) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java b/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
index 3396fd2..6cbf237 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
@@ -26,7 +26,7 @@ import java.util.List;
 import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.common.util.ImmutableBitSet;
 import org.apache.kylin.gridtable.GTInfo.Builder;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.datatype.LongMutable;
 
@@ -106,16 +106,16 @@ public class UnitTestSupport {
             String d_01_15 = datePlus("2015-01-15", i * 4);
             String d_01_16 = datePlus("2015-01-16", i * 4);
             String d_01_17 = datePlus("2015-01-17", i * 4);
-            result.add(newRec(info, d_01_14, "Yang", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_14, "Luke", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_15, "Xu", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_15, "Dong", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_15, "Jason", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_16, "Mahone", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_16, "Shaofeng", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_16, "Qianhao", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_16, "George", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
-            result.add(newRec(info, d_01_17, "Kejia", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounter(14)));
+            result.add(newRec(info, d_01_14, "Yang", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_14, "Luke", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_15, "Xu", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_15, "Dong", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_15, "Jason", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_16, "Mahone", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_16, "Shaofeng", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_16, "Qianhao", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_16, "George", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_17, "Kejia", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
         }
         return result;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
index 40a5e01..f80bd24 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
@@ -34,7 +34,7 @@ import org.apache.kylin.gridtable.GTScanRequest;
 import org.apache.kylin.gridtable.GTScanRequestBuilder;
 import org.apache.kylin.gridtable.IGTScanner;
 import org.apache.kylin.gridtable.benchmark.SortedGTRecordGenerator.Randomizer;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.filter.ColumnTupleFilter;
 import org.apache.kylin.metadata.filter.CompareTupleFilter;
@@ -80,7 +80,7 @@ public class GTScannerBenchmark2 {
         gen.addDimension(100, 4, null);
         gen.addMeasure(8);
         gen.addMeasure(8, new Randomizer() {
-            HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(12);
+            HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(12);
 
             @Override
             public int fillRandom(Random rand, byte[] array, int offset) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
index 00c0bd0..66a6b51 100644
--- a/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
@@ -26,7 +26,7 @@ import org.apache.kylin.measure.basic.LongSumAggregator;
 import org.apache.kylin.measure.bitmap.BitmapAggregator;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
 import org.apache.kylin.measure.hllc.HLLCAggregator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
 import org.github.jamm.MemoryMeter;
@@ -105,7 +105,7 @@ public class AggregationCacheMemSizeTest {
 
     private HLLCAggregator createHLLCAggr() {
         HLLCAggregator hllcAggregator = new HLLCAggregator(14);
-        hllcAggregator.aggregate(new HyperLogLogPlusCounter(14));
+        hllcAggregator.aggregate(new HyperLogLogPlusCounterNew(14));
         return hllcAggregator;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
index 18680ec..cd1aa96 100644
--- a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
@@ -26,7 +26,7 @@ import java.nio.ByteBuffer;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
 import org.apache.kylin.metadata.model.FunctionDesc;
@@ -57,7 +57,7 @@ public class MeasureCodecTest extends LocalFileMetadataTestCase {
         DoubleMutable d = new DoubleMutable(1.0);
         LongMutable l = new LongMutable(2);
         BigDecimal b = new BigDecimal("333.1234");
-        HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(16);
+        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(16);
         hllc.add("1234567");
         hllc.add("abcdefg");
         BitmapCounter bitmap = new BitmapCounter();

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
index de1b442..031636e 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
@@ -36,7 +36,7 @@ import org.apache.kylin.metadata.tuple.TupleInfo;
  * MeasureType captures how a kind of aggregation is defined, how it is calculated 
  * during cube build, and how it is involved in query and storage scan.
  * 
- * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounter
+ * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounterOld
  */
 abstract public class MeasureType<T> {
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
index c5bd482..d94dec9 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
@@ -62,7 +62,7 @@ import com.google.common.collect.Maps;
   }
 </pre>
  * 
- * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounter
+ * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounterOld
  */
 abstract public class MeasureTypeFactory<T> {
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
new file mode 100644
index 0000000..26ee6ab
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Created by xiefan on 16-12-9.
+ */
+public class DenseRegister implements Register {
+    private int p;
+
+    private int m;
+
+    private byte[] register;
+
+    public DenseRegister(int p) {
+        this.m = 1 << p;
+        this.register = new byte[m];
+    }
+
+    public void set(int pos, byte value) {
+        register[pos] = value;
+    }
+
+    @Override
+    public Byte get(int pos) {
+        return register[pos];
+    }
+
+    @Override
+    public void merge(Register another) {
+        if (another instanceof DenseRegister) {
+            DenseRegister dr = (DenseRegister) another;
+            for (int i = 0; i < register.length; i++) {
+                if (dr.register[i] > register[i])
+                    register[i] = dr.register[i];
+            }
+        } else {
+            SparseRegister sr = (SparseRegister) another;
+            Collection<Map.Entry<Integer, Byte>> allValue = sr.getAllValue();
+            for (Map.Entry<Integer, Byte> entry : allValue) {
+                if (entry.getValue() > register[entry.getKey()])
+                    register[entry.getKey()] = entry.getValue();
+            }
+        }
+    }
+
+    @Override
+    public void clear() {
+        byte zero = (byte) 0;
+        Arrays.fill(register, zero);
+    }
+
+    @Override
+    public int getSize() {
+        int size = 0;
+        for (int i = 0; i < m; i++) {
+            if (register[i] > 0)
+                size++;
+        }
+        return size;
+    }
+
+    @Override
+    public int getHashCode() {
+        return Arrays.hashCode(register);
+    }
+
+    public byte[] getRawRegister() {
+        return this.register;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
index aea2df1..ca73285 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
@@ -23,10 +23,10 @@ import org.apache.kylin.measure.MeasureAggregator;
 /**
  */
 @SuppressWarnings("serial")
-public class HLLCAggregator extends MeasureAggregator<HyperLogLogPlusCounter> {
+public class HLLCAggregator extends MeasureAggregator<HyperLogLogPlusCounterNew> {
 
     final int precision;
-    HyperLogLogPlusCounter sum = null;
+    HyperLogLogPlusCounterNew sum = null;
 
     public HLLCAggregator(int precision) {
         this.precision = precision;
@@ -38,15 +38,15 @@ public class HLLCAggregator extends MeasureAggregator<HyperLogLogPlusCounter> {
     }
 
     @Override
-    public void aggregate(HyperLogLogPlusCounter value) {
+    public void aggregate(HyperLogLogPlusCounterNew value) {
         if (sum == null)
-            sum = new HyperLogLogPlusCounter(value);
+            sum = new HyperLogLogPlusCounterNew(value);
         else
             sum.merge(value);
     }
 
     @Override
-    public HyperLogLogPlusCounter getState() {
+    public HyperLogLogPlusCounterNew getState() {
         return sum;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
index 0e58dca..481fa4e 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
@@ -33,15 +33,15 @@ import org.apache.kylin.metadata.model.TblColRef;
 
 import com.google.common.collect.ImmutableMap;
 
-public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounter> {
+public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounterNew> {
 
     public static final String FUNC_COUNT_DISTINCT = FunctionDesc.FUNC_COUNT_DISTINCT;
     public static final String DATATYPE_HLLC = "hllc";
 
-    public static class Factory extends MeasureTypeFactory<HyperLogLogPlusCounter> {
+    public static class Factory extends MeasureTypeFactory<HyperLogLogPlusCounterNew> {
 
         @Override
-        public MeasureType<HyperLogLogPlusCounter> createMeasureType(String funcName, DataType dataType) {
+        public MeasureType<HyperLogLogPlusCounterNew> createMeasureType(String funcName, DataType dataType) {
             return new HLLCMeasureType(funcName, dataType);
         }
 
@@ -56,7 +56,7 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounter> {
         }
 
         @Override
-        public Class<? extends DataTypeSerializer<HyperLogLogPlusCounter>> getAggrDataTypeSerializer() {
+        public Class<? extends DataTypeSerializer<HyperLogLogPlusCounterNew>> getAggrDataTypeSerializer() {
             return HLLCSerializer.class;
         }
     }
@@ -91,13 +91,13 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounter> {
     }
 
     @Override
-    public MeasureIngester<HyperLogLogPlusCounter> newIngester() {
-        return new MeasureIngester<HyperLogLogPlusCounter>() {
-            HyperLogLogPlusCounter current = new HyperLogLogPlusCounter(dataType.getPrecision());
+    public MeasureIngester<HyperLogLogPlusCounterNew> newIngester() {
+        return new MeasureIngester<HyperLogLogPlusCounterNew>() {
+            HyperLogLogPlusCounterNew current = new HyperLogLogPlusCounterNew(dataType.getPrecision());
 
             @Override
-            public HyperLogLogPlusCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
-                HyperLogLogPlusCounter hllc = current;
+            public HyperLogLogPlusCounterNew valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
+                HyperLogLogPlusCounterNew hllc = current;
                 hllc.clear();
                 for (String v : values) {
                     if (v != null)
@@ -109,7 +109,7 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounter> {
     }
 
     @Override
-    public MeasureAggregator<HyperLogLogPlusCounter> newAggregator() {
+    public MeasureAggregator<HyperLogLogPlusCounterNew> newAggregator() {
         return new HLLCAggregator(dataType.getPrecision());
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
index 4d08b6f..1d01abc 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
@@ -28,10 +28,10 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer;
  * @author yangli9
  * 
  */
-public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
+public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounterNew> {
 
     // be thread-safe and avoid repeated obj creation
-    private ThreadLocal<HyperLogLogPlusCounter> current = new ThreadLocal<HyperLogLogPlusCounter>();
+    private ThreadLocal<HyperLogLogPlusCounterNew> current = new ThreadLocal<HyperLogLogPlusCounterNew>();
 
     private int precision;
 
@@ -40,7 +40,7 @@ public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
     }
 
     @Override
-    public void serialize(HyperLogLogPlusCounter value, ByteBuffer out) {
+    public void serialize(HyperLogLogPlusCounterNew value, ByteBuffer out) {
         try {
             value.writeRegisters(out);
         } catch (IOException e) {
@@ -48,18 +48,18 @@ public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
         }
     }
 
-    private HyperLogLogPlusCounter current() {
-        HyperLogLogPlusCounter hllc = current.get();
+    private HyperLogLogPlusCounterNew current() {
+        HyperLogLogPlusCounterNew hllc = current.get();
         if (hllc == null) {
-            hllc = new HyperLogLogPlusCounter(precision);
+            hllc = new HyperLogLogPlusCounterNew(precision);
             current.set(hllc);
         }
         return hllc;
     }
 
     @Override
-    public HyperLogLogPlusCounter deserialize(ByteBuffer in) {
-        HyperLogLogPlusCounter hllc = current();
+    public HyperLogLogPlusCounterNew deserialize(ByteBuffer in) {
+        HyperLogLogPlusCounterNew hllc = current();
         try {
             hllc.readRegisters(in);
         } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
index 8f2a0fa..a72ad09 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
@@ -31,21 +31,21 @@ public class HLLDistinctCountAggFunc {
 
     private static final Logger logger = LoggerFactory.getLogger(HLLDistinctCountAggFunc.class);
 
-    public static HyperLogLogPlusCounter init() {
+    public static HyperLogLogPlusCounterNew init() {
         return null;
     }
 
-    public static HyperLogLogPlusCounter initAdd(Object v) {
+    public static HyperLogLogPlusCounterNew initAdd(Object v) {
         if (v instanceof Long) { // holistic case
             long l = (Long) v;
             return new FixedValueHLLCMockup(l);
         } else {
-            HyperLogLogPlusCounter c = (HyperLogLogPlusCounter) v;
-            return new HyperLogLogPlusCounter(c);
+            HyperLogLogPlusCounterNew c = (HyperLogLogPlusCounterNew) v;
+            return new HyperLogLogPlusCounterNew(c);
         }
     }
 
-    public static HyperLogLogPlusCounter add(HyperLogLogPlusCounter counter, Object v) {
+    public static HyperLogLogPlusCounterNew add(HyperLogLogPlusCounterNew counter, Object v) {
         if (v instanceof Long) { // holistic case
             long l = (Long) v;
             if (counter == null) {
@@ -58,9 +58,9 @@ public class HLLDistinctCountAggFunc {
                 return counter;
             }
         } else {
-            HyperLogLogPlusCounter c = (HyperLogLogPlusCounter) v;
+            HyperLogLogPlusCounterNew c = (HyperLogLogPlusCounterNew) v;
             if (counter == null) {
-                return new HyperLogLogPlusCounter(c);
+                return new HyperLogLogPlusCounterNew(c);
             } else {
                 counter.merge(c);
                 return counter;
@@ -68,16 +68,16 @@ public class HLLDistinctCountAggFunc {
         }
     }
 
-    public static HyperLogLogPlusCounter merge(HyperLogLogPlusCounter counter0, Object counter1) {
+    public static HyperLogLogPlusCounterNew merge(HyperLogLogPlusCounterNew counter0, Object counter1) {
         return add(counter0, counter1);
     }
 
-    public static long result(HyperLogLogPlusCounter counter) {
+    public static long result(HyperLogLogPlusCounterNew counter) {
         return counter == null ? 0L : counter.getCountEstimate();
     }
 
     @SuppressWarnings("serial")
-    private static class FixedValueHLLCMockup extends HyperLogLogPlusCounter {
+    private static class FixedValueHLLCMockup extends HyperLogLogPlusCounterNew {
 
         private Long value = null;
 
@@ -107,7 +107,7 @@ public class HLLDistinctCountAggFunc {
         }
 
         @Override
-        public void merge(HyperLogLogPlusCounter another) {
+        public void merge(HyperLogLogPlusCounterNew another) {
             throw new UnsupportedOperationException();
         }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java
deleted file mode 100644
index 00407f9..0000000
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hllc;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.Arrays;
-
-import org.apache.kylin.common.util.BytesUtil;
-
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-
-/**
- * About compression, test on HLLC data shows
- * 
- * - LZF compression ratio is around 65%-80%, fast
- * - GZIP compression ratio is around 41%-46%, very slow
- * 
- * @author yangli9
- */
-@SuppressWarnings("serial")
-public class HyperLogLogPlusCounter implements Serializable, Comparable<HyperLogLogPlusCounter> {
-
-    private final int p;
-    private final int m;
-    private final HashFunction hashFunc;
-    byte[] registers;
-    int singleBucket;
-
-    public HyperLogLogPlusCounter() {
-        this(10);
-    }
-
-    public HyperLogLogPlusCounter(int p) {
-        this(p, Hashing.murmur3_128());
-    }
-
-    public HyperLogLogPlusCounter(HyperLogLogPlusCounter another) {
-        this(another.p, another.hashFunc);
-        merge(another);
-    }
-
-    /** The larger p is, the more storage (2^p bytes), the better accuracy */
-    private HyperLogLogPlusCounter(int p, HashFunction hashFunc) {
-        this.p = p;
-        this.m = 1 << p;//(int) Math.pow(2, p);
-        this.hashFunc = hashFunc;
-        this.registers = new byte[m];
-        this.singleBucket = -1;
-    }
-
-    public void clear() {
-        byte zero = (byte) 0;
-        if (singleBucket == -1) {
-            //nothing
-        } else if (singleBucket >= 0) {
-            registers[singleBucket] = 0;
-        } else {
-            Arrays.fill(registers, zero);
-        }
-        singleBucket = -1;
-    }
-
-    public void add(int value) {
-        add(hashFunc.hashInt(value).asLong());
-    }
-
-    public void add(String value) {
-        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
-    }
-
-    public void add(byte[] value) {
-        add(hashFunc.hashBytes(value).asLong());
-    }
-
-    public void add(byte[] value, int offset, int length) {
-        add(hashFunc.hashBytes(value, offset, length).asLong());
-    }
-
-    protected void add(long hash) {
-        int bucketMask = m - 1;
-        int bucket = (int) (hash & bucketMask);
-        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
-
-        if (firstOnePos > registers[bucket])
-            registers[bucket] = (byte) firstOnePos;
-
-        if (singleBucket == -1)
-            singleBucket = bucket;
-        else
-            singleBucket = Integer.MIN_VALUE;
-    }
-
-    public void merge(HyperLogLogPlusCounter another) {
-        assert this.p == another.p;
-        assert this.hashFunc == another.hashFunc;
-
-        // quick path for single value HLLC
-        if (another.singleBucket == -1) {
-            return;
-        } else if (another.singleBucket >= 0) {
-            int b = another.singleBucket;
-            if (registers[b] < another.registers[b])
-                registers[b] = another.registers[b];
-        } else {
-            // normal path
-            for (int i = 0; i < m; i++) {
-                if (registers[i] < another.registers[i])
-                    registers[i] = another.registers[i];
-            }
-        }
-        singleBucket = Integer.MIN_VALUE;
-    }
-
-    public long getCountEstimate() {
-        return new HLLCSnapshot(this).getCountEstimate();
-    }
-
-    public int getPrecision() {
-        return this.p;
-    }
-
-    public double getErrorRate() {
-        return 1.04 / Math.sqrt(m);
-    }
-
-    private int size() {
-        if (singleBucket == -1) {
-            return 0;
-        } else if (singleBucket >= 0) {
-            return 1;
-        } else {
-            int size = 0;
-            for (int i = 0; i < m; i++) {
-                if (registers[i] > 0)
-                    size++;
-            }
-            return size;
-        }
-    }
-
-    @Override
-    public String toString() {
-        return "" + getCountEstimate();
-    }
-
-    // ============================================================================
-
-    // a memory efficient snapshot of HLL registers which can yield count
-    // estimate later
-    public static class HLLCSnapshot {
-        byte p;
-        double registerSum;
-        int zeroBuckets;
-
-        public HLLCSnapshot(HyperLogLogPlusCounter hllc) {
-            p = (byte) hllc.p;
-            registerSum = 0;
-            zeroBuckets = 0;
-
-            byte[] registers = hllc.registers;
-            for (int i = 0; i < hllc.m; i++) {
-                if (registers[i] == 0) {
-                    registerSum++;
-                    zeroBuckets++;
-                } else {
-                    registerSum += 1.0 / (1L << registers[i]);
-                }
-            }
-        }
-
-        public long getCountEstimate() {
-            int m = 1 << p;
-            double alpha = 0.7213 / (1 + 1.079 / m);
-            double estimate = alpha * m * m / registerSum;
-
-            // small cardinality adjustment
-            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
-                estimate = m * Math.log(m * 1.0 / zeroBuckets);
-            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
-                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
-            }
-
-            return Math.round(estimate);
-        }
-    }
-
-    // ============================================================================
-
-    public void writeRegisters(final ByteBuffer out) throws IOException {
-
-        final int indexLen = getRegisterIndexSize();
-        int size = size();
-
-        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
-        byte scheme;
-        if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
-            scheme = 0; // map
-        else
-            scheme = 1; // array
-        out.put(scheme);
-
-        if (scheme == 0) { // map scheme
-            BytesUtil.writeVInt(size, out);
-            if (singleBucket == -1) {
-                // no non-zero register
-            } else if (singleBucket >= 0) {
-                writeUnsigned(singleBucket, indexLen, out);
-                out.put(registers[singleBucket]);
-            } else {
-                for (int i = 0; i < m; i++) {
-                    if (registers[i] > 0) {
-                        writeUnsigned(i, indexLen, out);
-                        out.put(registers[i]);
-                    }
-                }
-            }
-        } else if (scheme == 1) { // array scheme
-            out.put(registers);
-        } else
-            throw new IllegalStateException();
-    }
-
-    public void readRegisters(ByteBuffer in) throws IOException {
-        byte scheme = in.get();
-
-        if (scheme == 0) { // map scheme
-            clear();
-            int size = BytesUtil.readVInt(in);
-            if (size > m)
-                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
-            int indexLen = getRegisterIndexSize();
-            int key = 0;
-            for (int i = 0; i < size; i++) {
-                key = readUnsigned(in, indexLen);
-                registers[key] = in.get();
-            }
-
-            if (size == 0)
-                singleBucket = -1;
-            else if (size == 1)
-                singleBucket = key;
-            else
-                singleBucket = Integer.MIN_VALUE;
-
-        } else if (scheme == 1) { // array scheme
-            in.get(registers);
-            singleBucket = Integer.MIN_VALUE;
-        } else
-            throw new IllegalStateException();
-    }
-
-    public int peekLength(ByteBuffer in) {
-        int mark = in.position();
-        int len;
-
-        byte scheme = in.get();
-        if (scheme == 0) { // map scheme
-            int size = BytesUtil.readVInt(in);
-            int indexLen = getRegisterIndexSize();
-            len = in.position() - mark + (indexLen + 1) * size;
-        } else {
-            len = in.position() - mark + m;
-        }
-
-        in.position(mark);
-        return len;
-    }
-
-    public int maxLength() {
-        return 1 + m;
-    }
-
-    public void writeRegistersArray(final ByteBuffer out) {
-        out.put(this.registers);
-    }
-
-    public void readRegistersArray(ByteBuffer in) {
-        in.get(registers, 0, m);
-        singleBucket = Integer.MIN_VALUE;
-    }
-
-    private int getRegisterIndexSize() {
-        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
-    }
-
-    @Override
-    public int hashCode() {
-        final int prime = 31;
-        int result = 1;
-        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
-        result = prime * result + p;
-        result = prime * result + Arrays.hashCode(registers);
-        return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (this == obj)
-            return true;
-        if (obj == null)
-            return false;
-        if (getClass() != obj.getClass())
-            return false;
-        HyperLogLogPlusCounter other = (HyperLogLogPlusCounter) obj;
-        if (hashFunc == null) {
-            if (other.hashFunc != null)
-                return false;
-        } else if (!hashFunc.equals(other.hashFunc))
-            return false;
-        if (p != other.p)
-            return false;
-        if (!Arrays.equals(registers, other.registers))
-            return false;
-        return true;
-    }
-
-    @Override
-    public int compareTo(HyperLogLogPlusCounter o) {
-        if (o == null)
-            return 1;
-
-        long e1 = this.getCountEstimate();
-        long e2 = o.getCountEstimate();
-
-        if (e1 == e2)
-            return 0;
-        else if (e1 > e2)
-            return 1;
-        else
-            return -1;
-    }
-
-    public static void main(String[] args) throws IOException {
-        dumpErrorRates();
-    }
-
-    static void dumpErrorRates() {
-        for (int p = 10; p <= 18; p++) {
-            double rate = new HyperLogLogPlusCounter(p).getErrorRate();
-            double er = Math.round(rate * 10000) / 100D;
-            double er2 = Math.round(rate * 2 * 10000) / 100D;
-            double er3 = Math.round(rate * 3 * 10000) / 100D;
-            long size = Math.round(Math.pow(2, p));
-            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
-        }
-    }
-
-    /**
-     *
-     * @param num
-     * @param size
-     * @param out
-     */
-    public static void writeUnsigned(int num, int size, ByteBuffer out) {
-        for (int i = 0; i < size; i++) {
-            out.put((byte) num);
-            num >>>= 8;
-        }
-    }
-
-    public static int readUnsigned(ByteBuffer in, int size) {
-        int integer = 0;
-        int mask = 0xff;
-        int shift = 0;
-        for (int i = 0; i < size; i++) {
-            integer |= (in.get() << shift) & mask;
-            mask = mask << 8;
-            shift += 8;
-        }
-        return integer;
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
new file mode 100644
index 0000000..d7329f6
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+import org.apache.kylin.common.util.BytesUtil;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Map;
+
+@SuppressWarnings("serial")
+public class HyperLogLogPlusCounterNew implements Serializable, Comparable<HyperLogLogPlusCounterNew> {
+
+    private int p;
+
+    private int m;
+
+    private HashFunction hashFunc = Hashing.murmur3_128();
+
+    private Register register;
+
+    public static double overflowFactor = 0.01;
+
+    public HyperLogLogPlusCounterNew(int p, RegisterType type, HashFunction hashFunc) {
+        this.p = p;
+        this.m = 1 << p;//(int) Math.pow(2, p);
+        this.hashFunc = hashFunc;
+        if (type == RegisterType.SPARSE) {
+            double over = overflowFactor * m;
+            this.register = new SparseRegister((int) over);
+        } else {
+            this.register = new DenseRegister(p);
+        }
+    }
+
+    public HyperLogLogPlusCounterNew() {
+        this(10, RegisterType.SPARSE, Hashing.murmur3_128());
+    }
+
+    public HyperLogLogPlusCounterNew(int p) {
+        this(p, RegisterType.SPARSE, Hashing.murmur3_128());
+    }
+
+    public HyperLogLogPlusCounterNew(int p, RegisterType type) {
+        this(p, type, Hashing.murmur3_128());
+    }
+
+    public HyperLogLogPlusCounterNew(int p, HashFunction hashFunc) {
+        this(p, RegisterType.SPARSE, hashFunc);
+    }
+
+    public HyperLogLogPlusCounterNew(HyperLogLogPlusCounterNew another) {
+        this(another.p, another.hashFunc);
+        merge(another);
+    }
+
+    public void add(int value) {
+        add(hashFunc.hashInt(value).asLong());
+    }
+
+    public void add(String value) {
+        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
+    }
+
+    public void add(byte[] value) {
+        add(hashFunc.hashBytes(value).asLong());
+    }
+
+    public void add(byte[] value, int offset, int length) {
+        add(hashFunc.hashBytes(value, offset, length).asLong());
+    }
+
+    protected void add(long hash) {
+        int bucketMask = m - 1;
+        int bucket = (int) (hash & bucketMask);
+        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
+        Byte b = register.get(bucket);
+        if (b == null || (byte) firstOnePos > b) {
+            register.set(bucket, (byte) firstOnePos);
+        }
+        if (register instanceof SparseRegister) {
+            if (((SparseRegister) register).isOverThreshold()) {
+                register = ((SparseRegister) register).toDense(p);
+            }
+        }
+    }
+
+    public void merge(HyperLogLogPlusCounterNew another) {
+        assert this.p == another.p;
+        assert this.hashFunc == another.hashFunc;
+        if (register instanceof SparseRegister && another.register instanceof SparseRegister) {
+            register.merge(another.register);
+            if (((SparseRegister) register).isOverThreshold()) {
+                register = ((SparseRegister) register).toDense(p);
+            }
+        } else if (register instanceof SparseRegister && another.register instanceof DenseRegister) {
+            register = ((SparseRegister) register).toDense(p);
+            register.merge(another.register);
+        } else {
+            register.merge(another.register);
+        }
+    }
+
+    public long getCountEstimate() {
+        return new HLLCSnapshot(this).getCountEstimate();
+    }
+
+    public int getPrecision() {
+        return this.p;
+    }
+
+    public double getErrorRate() {
+        return 1.04 / Math.sqrt(m);
+    }
+
+    @Override
+    public String toString() {
+        return "" + getCountEstimate();
+    }
+
+    // ============================================================================
+
+    // a memory efficient snapshot of HLL registers which can yield count
+    // estimate later
+    public static class HLLCSnapshot {
+        byte p;
+        double registerSum;
+        int zeroBuckets;
+
+        public HLLCSnapshot(HyperLogLogPlusCounterNew hllc) {
+            p = (byte) hllc.p;
+            registerSum = 0;
+            zeroBuckets = 0;
+            Register register = hllc.getRegister();
+            DenseRegister dr;
+            if (register instanceof SparseRegister) {
+                dr = ((SparseRegister) register).toDense(p);
+            } else {
+                dr = (DenseRegister) register;
+            }
+            byte[] registers = dr.getRawRegister();
+            for (int i = 0; i < hllc.m; i++) {
+                if (registers[i] == 0) {
+                    registerSum++;
+                    zeroBuckets++;
+                } else {
+                    registerSum += 1.0 / (1L << registers[i]);
+                }
+            }
+        }
+
+        public long getCountEstimate() {
+            int m = 1 << p;
+            double alpha = 0.7213 / (1 + 1.079 / m);
+            double estimate = alpha * m * m / registerSum;
+
+            // small cardinality adjustment
+            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
+                estimate = m * Math.log(m * 1.0 / zeroBuckets);
+            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
+                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
+            }
+
+            return Math.round(estimate);
+        }
+    }
+
+    public static void main(String[] args) throws IOException {
+        dumpErrorRates();
+    }
+
+    static void dumpErrorRates() {
+        for (int p = 10; p <= 18; p++) {
+            double rate = new HyperLogLogPlusCounterNew(p, RegisterType.SPARSE).getErrorRate();
+            double er = Math.round(rate * 10000) / 100D;
+            double er2 = Math.round(rate * 2 * 10000) / 100D;
+            double er3 = Math.round(rate * 3 * 10000) / 100D;
+            long size = Math.round(Math.pow(2, p));
+            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
+        }
+    }
+
+    public Register getRegister() {
+        return register;
+    }
+
+    public void clear() {
+        register.clear();
+    }
+
+    public RegisterType getRegisterType() {
+        if (register instanceof SparseRegister)
+            return RegisterType.SPARSE;
+        else
+            return RegisterType.DENSE;
+    }
+
+    // ============================================================================
+
+    public void writeRegisters(final ByteBuffer out) throws IOException {
+
+        final int indexLen = getRegisterIndexSize();
+        int size = size();
+
+        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
+        byte scheme;
+        //byte type;
+        if (register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+            scheme = 0; //map
+        } else {
+            scheme = 1; // array
+        }
+        out.put(scheme);
+        if (scheme == 0) { // map scheme
+            BytesUtil.writeVInt(size, out);
+            if (register instanceof SparseRegister) { //sparse\u3000register
+                Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
+                for (Map.Entry<Integer, Byte> entry : allValue) {
+                    writeUnsigned(entry.getKey(), indexLen, out);
+                    out.put(entry.getValue());
+                }
+            } else { //dense register
+                byte[] registers = ((DenseRegister) register).getRawRegister();
+                for (int i = 0; i < m; i++) {
+                    if (registers[i] > 0) {
+                        writeUnsigned(i, indexLen, out);
+                        out.put(registers[i]);
+                    }
+                }
+            }
+        } else if (scheme == 1) { // array scheme
+            out.put(((DenseRegister) register).getRawRegister());
+        } else
+            throw new IllegalStateException();
+    }
+
+    public void readRegisters(ByteBuffer in) throws IOException {
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            clear();
+            int size = BytesUtil.readVInt(in);
+            if (size > m)
+                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
+            double over = overflowFactor * m;
+            if (size > (int) over) {
+                this.register = new DenseRegister(p);
+            } else {
+                this.register = new SparseRegister((int) over);//default is sparse
+            }
+            int indexLen = getRegisterIndexSize();
+            int key = 0;
+            for (int i = 0; i < size; i++) {
+                key = readUnsigned(in, indexLen);
+                register.set(key, in.get());
+            }
+        } else if (scheme == 1) { // array scheme
+            this.register = new DenseRegister(p);
+            for (int i = 0; i < m; i++) {
+                register.set(i, in.get());
+            }
+        } else
+            throw new IllegalStateException();
+    }
+
+    public int peekLength(ByteBuffer in) {
+        int mark = in.position();
+        int len;
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            int size = BytesUtil.readVInt(in);
+            int indexLen = getRegisterIndexSize();
+            len = in.position() - mark + (indexLen + 1) * size;
+        } else {
+            len = in.position() - mark + m;
+        }
+
+        in.position(mark);
+        return len;
+    }
+
+    public int maxLength() {
+        return 1 + m;
+    }
+
+    private int getRegisterIndexSize() {
+        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
+        result = prime * result + p;
+        result = prime * result + register.getHashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        HyperLogLogPlusCounterNew other = (HyperLogLogPlusCounterNew) obj;
+        if (hashFunc == null) {
+            if (other.hashFunc != null)
+                return false;
+        } else if (!hashFunc.equals(other.hashFunc))
+            return false;
+        if (p != other.p)
+            return false;
+        if (this.getRegisterType() != other.getRegisterType())
+            return false;
+        if (register.getHashCode() != other.register.getHashCode())
+            return false;
+        return true;
+    }
+
+    @Override
+    public int compareTo(HyperLogLogPlusCounterNew o) {
+        if (o == null)
+            return 1;
+
+        long e1 = this.getCountEstimate();
+        long e2 = o.getCountEstimate();
+
+        if (e1 == e2)
+            return 0;
+        else if (e1 > e2)
+            return 1;
+        else
+            return -1;
+    }
+
+    /**
+     *
+     * @param num
+     * @param size
+     * @param out
+     */
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
+
+    private int size() {
+        return register.getSize();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
new file mode 100644
index 0000000..cb5533e
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
@@ -0,0 +1,392 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.kylin.common.util.BytesUtil;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+/**
+ * About compression, test on HLLC data shows
+ * 
+ * - LZF compression ratio is around 65%-80%, fast
+ * - GZIP compression ratio is around 41%-46%, very slow
+ * 
+ * @author yangli9
+ */
+@SuppressWarnings("serial")
+public class HyperLogLogPlusCounterOld implements Serializable, Comparable<HyperLogLogPlusCounterOld> {
+
+    private final int p;
+    private final int m;
+    private final HashFunction hashFunc;
+    byte[] registers;
+    int singleBucket;
+
+    public HyperLogLogPlusCounterOld() {
+        this(10);
+    }
+
+    public HyperLogLogPlusCounterOld(int p) {
+        this(p, Hashing.murmur3_128());
+    }
+
+    public HyperLogLogPlusCounterOld(HyperLogLogPlusCounterOld another) {
+        this(another.p, another.hashFunc);
+        merge(another);
+    }
+
+    /** The larger p is, the more storage (2^p bytes), the better accuracy */
+    private HyperLogLogPlusCounterOld(int p, HashFunction hashFunc) {
+        this.p = p;
+        this.m = 1 << p;//(int) Math.pow(2, p);
+        this.hashFunc = hashFunc;
+        this.registers = new byte[m];
+        this.singleBucket = -1;
+    }
+
+    public void clear() {
+        byte zero = (byte) 0;
+        if (singleBucket == -1) {
+            //nothing
+        } else if (singleBucket >= 0) {
+            registers[singleBucket] = 0;
+        } else {
+            Arrays.fill(registers, zero);
+        }
+        singleBucket = -1;
+    }
+
+    public void add(int value) {
+        add(hashFunc.hashInt(value).asLong());
+    }
+
+    public void add(String value) {
+        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
+    }
+
+    public void add(byte[] value) {
+        add(hashFunc.hashBytes(value).asLong());
+    }
+
+    public void add(byte[] value, int offset, int length) {
+        add(hashFunc.hashBytes(value, offset, length).asLong());
+    }
+
+    protected void add(long hash) {
+        int bucketMask = m - 1;
+        int bucket = (int) (hash & bucketMask);
+        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
+
+        if (firstOnePos > registers[bucket])
+            registers[bucket] = (byte) firstOnePos;
+
+        if (singleBucket == -1)
+            singleBucket = bucket;
+        else
+            singleBucket = Integer.MIN_VALUE;
+    }
+
+    public void merge(HyperLogLogPlusCounterOld another) {
+        assert this.p == another.p;
+        assert this.hashFunc == another.hashFunc;
+
+        // quick path for single value HLLC
+        if (another.singleBucket == -1) {
+            return;
+        } else if (another.singleBucket >= 0) {
+            int b = another.singleBucket;
+            if (registers[b] < another.registers[b])
+                registers[b] = another.registers[b];
+        } else {
+            // normal path
+            for (int i = 0; i < m; i++) {
+                if (registers[i] < another.registers[i])
+                    registers[i] = another.registers[i];
+            }
+        }
+        singleBucket = Integer.MIN_VALUE;
+    }
+
+    public long getCountEstimate() {
+        return new HLLCSnapshot(this).getCountEstimate();
+    }
+
+    public int getPrecision() {
+        return this.p;
+    }
+
+    public double getErrorRate() {
+        return 1.04 / Math.sqrt(m);
+    }
+
+    private int size() {
+        if (singleBucket == -1) {
+            return 0;
+        } else if (singleBucket >= 0) {
+            return 1;
+        } else {
+            int size = 0;
+            for (int i = 0; i < m; i++) {
+                if (registers[i] > 0)
+                    size++;
+            }
+            return size;
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "" + getCountEstimate();
+    }
+
+    // ============================================================================
+
+    // a memory efficient snapshot of HLL registers which can yield count
+    // estimate later
+    public static class HLLCSnapshot {
+        byte p;
+        double registerSum;
+        int zeroBuckets;
+
+        public HLLCSnapshot(HyperLogLogPlusCounterOld hllc) {
+            p = (byte) hllc.p;
+            registerSum = 0;
+            zeroBuckets = 0;
+
+            byte[] registers = hllc.registers;
+            for (int i = 0; i < hllc.m; i++) {
+                if (registers[i] == 0) {
+                    registerSum++;
+                    zeroBuckets++;
+                } else {
+                    registerSum += 1.0 / (1L << registers[i]);
+                }
+            }
+        }
+
+        public long getCountEstimate() {
+            int m = 1 << p;
+            double alpha = 0.7213 / (1 + 1.079 / m);
+            double estimate = alpha * m * m / registerSum;
+
+            // small cardinality adjustment
+            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
+                estimate = m * Math.log(m * 1.0 / zeroBuckets);
+            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
+                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
+            }
+
+            return Math.round(estimate);
+        }
+    }
+
+    // ============================================================================
+
+    public void writeRegisters(final ByteBuffer out) throws IOException {
+
+        final int indexLen = getRegisterIndexSize();
+        int size = size();
+
+        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
+        byte scheme;
+        if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
+            scheme = 0; // map
+        else
+            scheme = 1; // array
+        out.put(scheme);
+
+        if (scheme == 0) { // map scheme
+            BytesUtil.writeVInt(size, out);
+            if (singleBucket == -1) {
+                // no non-zero register
+            } else if (singleBucket >= 0) {
+                writeUnsigned(singleBucket, indexLen, out);
+                out.put(registers[singleBucket]);
+            } else {
+                for (int i = 0; i < m; i++) {
+                    if (registers[i] > 0) {
+                        writeUnsigned(i, indexLen, out);
+                        out.put(registers[i]);
+                    }
+                }
+            }
+        } else if (scheme == 1) { // array scheme
+            out.put(registers);
+        } else
+            throw new IllegalStateException();
+    }
+
+    public void readRegisters(ByteBuffer in) throws IOException {
+        byte scheme = in.get();
+
+        if (scheme == 0) { // map scheme
+            clear();
+            int size = BytesUtil.readVInt(in);
+            if (size > m)
+                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
+            int indexLen = getRegisterIndexSize();
+            int key = 0;
+            for (int i = 0; i < size; i++) {
+                key = readUnsigned(in, indexLen);
+                registers[key] = in.get();
+            }
+
+            if (size == 0)
+                singleBucket = -1;
+            else if (size == 1)
+                singleBucket = key;
+            else
+                singleBucket = Integer.MIN_VALUE;
+
+        } else if (scheme == 1) { // array scheme
+            in.get(registers);
+            singleBucket = Integer.MIN_VALUE;
+        } else
+            throw new IllegalStateException();
+    }
+
+    public int peekLength(ByteBuffer in) {
+        int mark = in.position();
+        int len;
+
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            int size = BytesUtil.readVInt(in);
+            int indexLen = getRegisterIndexSize();
+            len = in.position() - mark + (indexLen + 1) * size;
+        } else {
+            len = in.position() - mark + m;
+        }
+
+        in.position(mark);
+        return len;
+    }
+
+    public int maxLength() {
+        return 1 + m;
+    }
+
+    /*public void writeRegistersArray(final ByteBuffer out) {
+        out.put(this.registers);
+    }
+
+    public void readRegistersArray(ByteBuffer in) {
+        in.get(registers, 0, m);
+        singleBucket = Integer.MIN_VALUE;
+    }*/
+
+    private int getRegisterIndexSize() {
+        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
+        result = prime * result + p;
+        result = prime * result + Arrays.hashCode(registers);
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        HyperLogLogPlusCounterOld other = (HyperLogLogPlusCounterOld) obj;
+        if (hashFunc == null) {
+            if (other.hashFunc != null)
+                return false;
+        } else if (!hashFunc.equals(other.hashFunc))
+            return false;
+        if (p != other.p)
+            return false;
+        if (!Arrays.equals(registers, other.registers))
+            return false;
+        return true;
+    }
+
+    @Override
+    public int compareTo(HyperLogLogPlusCounterOld o) {
+        if (o == null)
+            return 1;
+
+        long e1 = this.getCountEstimate();
+        long e2 = o.getCountEstimate();
+
+        if (e1 == e2)
+            return 0;
+        else if (e1 > e2)
+            return 1;
+        else
+            return -1;
+    }
+
+    public static void main(String[] args) throws IOException {
+        dumpErrorRates();
+    }
+
+    static void dumpErrorRates() {
+        for (int p = 10; p <= 18; p++) {
+            double rate = new HyperLogLogPlusCounterOld(p).getErrorRate();
+            double er = Math.round(rate * 10000) / 100D;
+            double er2 = Math.round(rate * 2 * 10000) / 100D;
+            double er3 = Math.round(rate * 3 * 10000) / 100D;
+            long size = Math.round(Math.pow(2, p));
+            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
+        }
+    }
+
+    /**
+     *
+     * @param num
+     * @param size
+     * @param out
+     */
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
new file mode 100644
index 0000000..79c4bba
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+/**
+ * Created by xiefan on 16-12-9.
+ */
+public interface Register {
+
+    void set(int pos, byte value);
+
+    Byte get(int pos);
+
+    void merge(Register another);
+
+    void clear();
+
+    int getSize();
+
+    int getHashCode();
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
new file mode 100644
index 0000000..fec9939
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+/**
+ * Created by xiefan on 16-12-9.
+ */
+public enum RegisterType {
+    SPARSE, DENSE
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
new file mode 100644
index 0000000..d241e81
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Created by xiefan on 16-12-9.
+ */
+public class SparseRegister implements Register {
+
+    private int overThreshold;
+
+    private Map<Integer, Byte> sparseRegister = new TreeMap<>();
+
+    public SparseRegister(int overThreshold) {
+        this.overThreshold = overThreshold;
+    }
+
+    public DenseRegister toDense(int p) {
+        DenseRegister dr = new DenseRegister(p);
+        for (Map.Entry<Integer, Byte> entry : sparseRegister.entrySet()) {
+            dr.set(entry.getKey(), entry.getValue());
+        }
+        return dr;
+    }
+
+    @Override
+    public void set(int pos, byte value) {
+        sparseRegister.put(pos, value);
+    }
+
+    @Override
+    public Byte get(int pos) {
+        return sparseRegister.get(pos);
+    }
+
+    @Override
+    public void merge(Register another) {
+        assert another instanceof SparseRegister;
+        SparseRegister sr = (SparseRegister) another;
+        for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
+            Byte v = sparseRegister.get(entry.getKey());
+            if (v == null || entry.getValue() > v)
+                sparseRegister.put(entry.getKey(), entry.getValue());
+        }
+    }
+
+    @Override
+    public void clear() {
+        sparseRegister.clear();
+    }
+
+    @Override
+    public int getSize() {
+        return sparseRegister.size();
+    }
+
+    @Override
+    public int getHashCode() {
+        final int prime = 31;
+        int result = 1;
+        for (Map.Entry<Integer, Byte> entry : sparseRegister.entrySet()) {
+            result = prime * result + entry.getKey();
+            result = prime * result + entry.getValue();
+        }
+        return result;
+    }
+
+    public boolean isOverThreshold() {
+        if (this.sparseRegister.size() > overThreshold)
+            return true;
+        return false;
+    }
+
+    public Collection<Map.Entry<Integer, Byte>> getAllValue() {
+        return Collections.unmodifiableCollection(sparseRegister.entrySet());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
index 3adec73..103e721 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
@@ -26,7 +26,7 @@ import org.apache.kylin.measure.bitmap.BitmapAggregator;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
 import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType;
 import org.apache.kylin.measure.hllc.HLLCAggregator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
@@ -94,7 +94,7 @@ public class AggregatorMemEstimateTest extends LocalFileMetadataTestCase {
     @Test
     public void testAggregatorEstimate() {
         HLLCAggregator hllcAggregator = new HLLCAggregator(14);
-        hllcAggregator.aggregate(new HyperLogLogPlusCounter(14));
+        hllcAggregator.aggregate(new HyperLogLogPlusCounterNew(14));
 
         BitmapAggregator bitmapAggregator = new BitmapAggregator();
         BitmapCounter bitmapCounter = new BitmapCounter();

http://git-wip-us.apache.org/repos/asf/kylin/blob/f05404d5/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
new file mode 100644
index 0000000..5d17fea
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hll;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * @author yangli9
+ * 
+ */
+public class HyperLogLogCounterOldTest {
+
+    ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+    Random rand1 = new Random(1);
+    Random rand2 = new Random(2);
+    Random rand3 = new Random(3);
+    int errorCount1 = 0;
+    int errorCount2 = 0;
+    int errorCount3 = 0;
+
+    @Test
+    public void testOneAdd() throws IOException {
+        HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(14);
+        HyperLogLogPlusCounterOld one = new HyperLogLogPlusCounterOld(14);
+        for (int i = 0; i < 1000000; i++) {
+            one.clear();
+            one.add(rand1.nextInt());
+            hllc.merge(one);
+        }
+        assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+    }
+
+    @Test
+    public void testPeekLength() throws IOException {
+        HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(10);
+        HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(10);
+        byte[] value = new byte[10];
+        for (int i = 0; i < 200000; i++) {
+            rand1.nextBytes(value);
+            hllc.add(value);
+
+            buf.clear();
+            hllc.writeRegisters(buf);
+
+            int len = buf.position();
+            buf.position(0);
+            assertEquals(len, hllc.peekLength(buf));
+
+            copy.readRegisters(buf);
+            assertEquals(len, buf.position());
+            assertEquals(hllc, copy);
+        }
+        buf.clear();
+    }
+
+    private Set<String> generateTestData(int n) {
+        Set<String> testData = new HashSet<String>();
+        for (int i = 0; i < n; i++) {
+            String[] samples = generateSampleData();
+            for (String sample : samples) {
+                testData.add(sample);
+            }
+        }
+        return testData;
+    }
+
+    // simulate the visit (=visitor+id)
+    private String[] generateSampleData() {
+
+        StringBuilder buf = new StringBuilder();
+        for (int i = 0; i < 19; i++) {
+            buf.append(Math.abs(rand1.nextInt()) % 10);
+        }
+        String header = buf.toString();
+
+        int size = Math.abs(rand3.nextInt()) % 9 + 1;
+        String[] samples = new String[size];
+        for (int k = 0; k < size; k++) {
+            buf = new StringBuilder(header);
+            buf.append("-");
+            for (int i = 0; i < 10; i++) {
+                buf.append(Math.abs(rand3.nextInt()) % 10);
+            }
+            samples[k] = buf.toString();
+        }
+
+        return samples;
+    }
+
+    @Test
+    public void countTest() throws IOException {
+        int n = 10;
+        for (int i = 0; i < 5; i++) {
+            count(n);
+            n *= 10;
+        }
+    }
+
+    private void count(int n) throws IOException {
+        Set<String> testSet = generateTestData(n);
+
+        HyperLogLogPlusCounterOld hllc = newHLLC();
+        for (String testData : testSet) {
+            hllc.add(Bytes.toBytes(testData));
+        }
+        long estimate = hllc.getCountEstimate();
+        double errorRate = hllc.getErrorRate();
+        double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+        System.out.println(estimate);
+        System.out.println(testSet.size());
+        System.out.println(errorRate);
+        System.out.println("=" + actualError);
+        Assert.assertTrue(actualError < errorRate * 3.0);
+
+        checkSerialize(hllc);
+    }
+
+    private void checkSerialize(HyperLogLogPlusCounterOld hllc) throws IOException {
+        long estimate = hllc.getCountEstimate();
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        hllc.readRegisters(buf);
+        Assert.assertEquals(estimate, hllc.getCountEstimate());
+    }
+
+    @Test
+    public void mergeTest() throws IOException {
+        double error = 0;
+        int n = 100;
+        for (int i = 0; i < n; i++) {
+            double e = merge(i);
+            error += e;
+        }
+        System.out.println("Total average error is " + error / n);
+
+        System.out.println("  errorRateCount1 is " + errorCount1 + "!");
+        System.out.println("  errorRateCount2 is " + errorCount2 + "!");
+        System.out.println("  errorRateCount3 is " + errorCount3 + "!");
+
+        Assert.assertTrue(errorCount1 <= n * 0.30);
+        Assert.assertTrue(errorCount2 <= n * 0.05);
+        Assert.assertTrue(errorCount3 <= n * 0.02);
+    }
+
+    private double merge(int round) throws IOException {
+        int ln = 20;
+        int dn = 100 * (round + 1);
+        Set<String> testSet = new HashSet<String>();
+        HyperLogLogPlusCounterOld[] hllcs = new HyperLogLogPlusCounterOld[ln];
+        for (int i = 0; i < ln; i++) {
+            hllcs[i] = newHLLC();
+            for (int k = 0; k < dn; k++) {
+                String[] samples = generateSampleData();
+                for (String data : samples) {
+                    testSet.add(data);
+                    hllcs[i].add(Bytes.toBytes(data));
+                }
+            }
+        }
+        HyperLogLogPlusCounterOld mergeHllc = newHLLC();
+        for (HyperLogLogPlusCounterOld hllc : hllcs) {
+            mergeHllc.merge(serDes(hllc));
+        }
+
+        double errorRate = mergeHllc.getErrorRate();
+        long estimate = mergeHllc.getCountEstimate();
+        double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+        System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+        Assert.assertTrue(actualError < 0.1);
+
+        if (actualError > errorRate) {
+            errorCount1++;
+        }
+        if (actualError > 2 * errorRate) {
+            errorCount2++;
+        }
+        if (actualError > 3 * errorRate) {
+            errorCount3++;
+        }
+
+        return actualError;
+    }
+
+    private HyperLogLogPlusCounterOld serDes(HyperLogLogPlusCounterOld hllc) throws IOException {
+        buf.clear();
+        hllc.writeRegisters(buf);
+        buf.flip();
+        HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(hllc.getPrecision());
+        copy.readRegisters(buf);
+        Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
+        return copy;
+    }
+
+    @Test
+    public void testPerformance() throws IOException {
+        int N = 3; // reduce N HLLC into one
+        int M = 1000; // for M times, use 100000 for real perf test
+
+        HyperLogLogPlusCounterOld samples[] = new HyperLogLogPlusCounterOld[N];
+        for (int i = 0; i < N; i++) {
+            samples[i] = newHLLC();
+            for (String str : generateTestData(10000))
+                samples[i].add(str);
+        }
+
+        System.out.println("Perf test running ... ");
+        long start = System.currentTimeMillis();
+        HyperLogLogPlusCounterOld sum = newHLLC();
+        for (int i = 0; i < M; i++) {
+            sum.clear();
+            for (int j = 0; j < N; j++) {
+                sum.merge(samples[j]);
+                checkSerialize(sum);
+            }
+        }
+        long duration = System.currentTimeMillis() - start;
+        System.out.println("Perf test result: " + duration / 1000 + " seconds");
+    }
+
+    @Test
+    public void testEquivalence() {
+        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+        byte[] b = new byte[] { 3, 4, 42 };
+        HyperLogLogPlusCounterOld ha = new HyperLogLogPlusCounterOld();
+        HyperLogLogPlusCounterOld hb = new HyperLogLogPlusCounterOld();
+        ha.add(a, 1, 3);
+        hb.add(b);
+
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+    }
+
+    private HyperLogLogPlusCounterOld newHLLC() {
+        return new HyperLogLogPlusCounterOld(16);
+    }
+}


[3/5] kylin git commit: KYLIN-1832 code review

Posted by li...@apache.org.
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
----------------------------------------------------------------------
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
index 76212c8..6e894dd 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
@@ -83,7 +83,7 @@ import org.apache.kylin.engine.spark.cube.DefaultTupleConverter;
 import org.apache.kylin.engine.spark.util.IteratorUtils;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.MeasureAggregators;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -241,15 +241,15 @@ public class SparkCubing extends AbstractApplication {
         }
     }
 
-    private Map<Long, HyperLogLogPlusCounterNew> sampling(final JavaRDD<List<String>> rowJavaRDD, final String cubeName, String segmentId) throws Exception {
+    private Map<Long, HLLCounter> sampling(final JavaRDD<List<String>> rowJavaRDD, final String cubeName, String segmentId) throws Exception {
         CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).reloadCubeLocal(cubeName);
         CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
         CubeDesc cubeDesc = cubeInstance.getDescriptor();
         CuboidScheduler cuboidScheduler = new CuboidScheduler(cubeDesc);
         List<Long> allCuboidIds = cuboidScheduler.getAllCuboidIds();
-        final HashMap<Long, HyperLogLogPlusCounterNew> zeroValue = Maps.newHashMap();
+        final HashMap<Long, HLLCounter> zeroValue = Maps.newHashMap();
         for (Long id : allCuboidIds) {
-            zeroValue.put(id, new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
+            zeroValue.put(id, new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
         }
 
         CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
@@ -278,12 +278,12 @@ public class SparkCubing extends AbstractApplication {
             row_hashcodes[i] = new ByteArray();
         }
 
-        final HashMap<Long, HyperLogLogPlusCounterNew> samplingResult = rowJavaRDD.aggregate(zeroValue, new Function2<HashMap<Long, HyperLogLogPlusCounterNew>, List<String>, HashMap<Long, HyperLogLogPlusCounterNew>>() {
+        final HashMap<Long, HLLCounter> samplingResult = rowJavaRDD.aggregate(zeroValue, new Function2<HashMap<Long, HLLCounter>, List<String>, HashMap<Long, HLLCounter>>() {
 
             final HashFunction hashFunction = Hashing.murmur3_128();
 
             @Override
-            public HashMap<Long, HyperLogLogPlusCounterNew> call(HashMap<Long, HyperLogLogPlusCounterNew> v1, List<String> v2) throws Exception {
+            public HashMap<Long, HLLCounter> call(HashMap<Long, HLLCounter> v1, List<String> v2) throws Exception {
                 for (int i = 0; i < nRowKey; i++) {
                     Hasher hc = hashFunction.newHasher();
                     String colValue = v2.get(rowKeyColumnIndexes[i]);
@@ -296,7 +296,7 @@ public class SparkCubing extends AbstractApplication {
 
                 for (Map.Entry<Long, Integer[]> entry : allCuboidsBitSet.entrySet()) {
                     Hasher hc = hashFunction.newHasher();
-                    HyperLogLogPlusCounterNew counter = v1.get(entry.getKey());
+                    HLLCounter counter = v1.get(entry.getKey());
                     final Integer[] cuboidBitSet = entry.getValue();
                     for (int position = 0; position < cuboidBitSet.length; position++) {
                         hc.putBytes(row_hashcodes[cuboidBitSet[position]].array());
@@ -305,14 +305,14 @@ public class SparkCubing extends AbstractApplication {
                 }
                 return v1;
             }
-        }, new Function2<HashMap<Long, HyperLogLogPlusCounterNew>, HashMap<Long, HyperLogLogPlusCounterNew>, HashMap<Long, HyperLogLogPlusCounterNew>>() {
+        }, new Function2<HashMap<Long, HLLCounter>, HashMap<Long, HLLCounter>, HashMap<Long, HLLCounter>>() {
             @Override
-            public HashMap<Long, HyperLogLogPlusCounterNew> call(HashMap<Long, HyperLogLogPlusCounterNew> v1, HashMap<Long, HyperLogLogPlusCounterNew> v2) throws Exception {
+            public HashMap<Long, HLLCounter> call(HashMap<Long, HLLCounter> v1, HashMap<Long, HLLCounter> v2) throws Exception {
                 Preconditions.checkArgument(v1.size() == v2.size());
                 Preconditions.checkArgument(v1.size() > 0);
-                for (Map.Entry<Long, HyperLogLogPlusCounterNew> entry : v1.entrySet()) {
-                    final HyperLogLogPlusCounterNew counter1 = entry.getValue();
-                    final HyperLogLogPlusCounterNew counter2 = v2.get(entry.getKey());
+                for (Map.Entry<Long, HLLCounter> entry : v1.entrySet()) {
+                    final HLLCounter counter1 = entry.getValue();
+                    final HLLCounter counter2 = v2.get(entry.getKey());
                     counter1.merge(Preconditions.checkNotNull(counter2, "counter cannot be null"));
                 }
                 return v1;
@@ -470,7 +470,7 @@ public class SparkCubing extends AbstractApplication {
         ClassUtil.addClasspath(confPath);
     }
 
-    private byte[][] createHTable(String cubeName, String segmentId, Map<Long, HyperLogLogPlusCounterNew> samplingResult) throws Exception {
+    private byte[][] createHTable(String cubeName, String segmentId, Map<Long, HLLCounter> samplingResult) throws Exception {
         final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
         final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
         final CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
@@ -614,7 +614,7 @@ public class SparkCubing extends AbstractApplication {
             }
         });
 
-        final Map<Long, HyperLogLogPlusCounterNew> samplingResult = sampling(rowJavaRDD, cubeName, segmentId);
+        final Map<Long, HLLCounter> samplingResult = sampling(rowJavaRDD, cubeName, segmentId);
         final byte[][] splitKeys = createHTable(cubeName, segmentId, samplingResult);
 
         final String hfile = build(rowJavaRDD, cubeName, segmentId, splitKeys);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
index 230249f..f046f78 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityMapper.java
@@ -35,7 +35,7 @@ import org.apache.kylin.engine.mr.MRUtil;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.TableDesc;
@@ -46,7 +46,7 @@ import org.apache.kylin.metadata.model.TableDesc;
  */
 public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritable, BytesWritable> {
 
-    private Map<Integer, HyperLogLogPlusCounterNew> hllcMap = new HashMap<Integer, HyperLogLogPlusCounterNew>();
+    private Map<Integer, HLLCounter> hllcMap = new HashMap<Integer, HLLCounter>();
     public static final String DEFAULT_DELIM = ",";
 
     private int counter = 0;
@@ -87,9 +87,9 @@ public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritab
         counter++;
     }
 
-    private HyperLogLogPlusCounterNew getHllc(Integer key) {
+    private HLLCounter getHllc(Integer key) {
         if (!hllcMap.containsKey(key)) {
-            hllcMap.put(key, new HyperLogLogPlusCounterNew());
+            hllcMap.put(key, new HLLCounter());
         }
         return hllcMap.get(key);
     }
@@ -100,7 +100,7 @@ public class ColumnCardinalityMapper<T> extends KylinMapper<T, Object, IntWritab
         ByteBuffer buf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
         while (it.hasNext()) {
             int key = it.next();
-            HyperLogLogPlusCounterNew hllc = hllcMap.get(key);
+            HLLCounter hllc = hllcMap.get(key);
             buf.clear();
             hllc.writeRegisters(buf);
             buf.flip();

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
index 32cc6d9..0648960 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducer.java
@@ -32,7 +32,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.engine.mr.KylinReducer;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 
 /**
  * @author Jack
@@ -41,7 +41,7 @@ import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
 public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWritable, IntWritable, LongWritable> {
 
     public static final int ONE = 1;
-    private Map<Integer, HyperLogLogPlusCounterNew> hllcMap = new HashMap<Integer, HyperLogLogPlusCounterNew>();
+    private Map<Integer, HLLCounter> hllcMap = new HashMap<Integer, HLLCounter>();
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -53,16 +53,16 @@ public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWri
         int skey = key.get();
         for (BytesWritable v : values) {
             ByteBuffer buffer = ByteBuffer.wrap(v.getBytes());
-            HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew();
+            HLLCounter hll = new HLLCounter();
             hll.readRegisters(buffer);
             getHllc(skey).merge(hll);
             hll.clear();
         }
     }
 
-    private HyperLogLogPlusCounterNew getHllc(Integer key) {
+    private HLLCounter getHllc(Integer key) {
         if (!hllcMap.containsKey(key)) {
-            hllcMap.put(key, new HyperLogLogPlusCounterNew());
+            hllcMap.put(key, new HLLCounter());
         }
         return hllcMap.get(key);
     }
@@ -78,7 +78,7 @@ public class ColumnCardinalityReducer extends KylinReducer<IntWritable, BytesWri
         it = keys.iterator();
         while (it.hasNext()) {
             int key = it.next();
-            HyperLogLogPlusCounterNew hllc = hllcMap.get(key);
+            HLLCounter hllc = hllcMap.get(key);
             ByteBuffer buf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
             buf.clear();
             hllc.writeRegisters(buf);

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
----------------------------------------------------------------------
diff --git a/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java b/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
index 410543a..c32e76d 100644
--- a/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
+++ b/source-hive/src/test/java/org/apache/kylin/source/hive/cardinality/ColumnCardinalityReducerTest.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
 import org.apache.hadoop.mrunit.types.Pair;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -57,7 +57,7 @@ public class ColumnCardinalityReducerTest {
     }
 
     private byte[] getBytes(String str) throws IOException {
-        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew();
+        HLLCounter hllc = new HLLCounter();
         StringTokenizer tokenizer = new StringTokenizer(str, ColumnCardinalityMapper.DEFAULT_DELIM);
         int i = 0;
         while (tokenizer.hasMoreTokens()) {


[5/5] kylin git commit: KYLIN-1832 code review

Posted by li...@apache.org.
KYLIN-1832 code review


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e6e330a8
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e6e330a8
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e6e330a8

Branch: refs/heads/master
Commit: e6e330a8bd47f1d2dd5fd6f68b510c3cf0be0287
Parents: f05404d
Author: Li Yang <li...@apache.org>
Authored: Wed Dec 14 15:29:56 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Wed Dec 14 15:29:56 2016 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/cube/util/CubingUtils.java |  12 +-
 .../apache/kylin/gridtable/UnitTestSupport.java |  22 +-
 .../benchmark/GTScannerBenchmark2.java          |   4 +-
 .../gridtable/AggregationCacheMemSizeTest.java  |   4 +-
 .../metadata/measure/MeasureCodecTest.java      |   4 +-
 .../org/apache/kylin/measure/MeasureType.java   |   2 +-
 .../kylin/measure/MeasureTypeFactory.java       |   2 +-
 .../kylin/measure/hllc/DenseRegister.java       |  26 +-
 .../kylin/measure/hllc/HLLCAggregator.java      |  10 +-
 .../kylin/measure/hllc/HLLCMeasureType.java     |  20 +-
 .../kylin/measure/hllc/HLLCSerializer.java      |  16 +-
 .../apache/kylin/measure/hllc/HLLCounter.java   | 377 ++++++++++++++++++
 .../kylin/measure/hllc/HLLCounterOld.java       | 393 +++++++++++++++++++
 .../measure/hllc/HLLDistinctCountAggFunc.java   |  22 +-
 .../measure/hllc/HyperLogLogPlusCounterNew.java | 388 ------------------
 .../measure/hllc/HyperLogLogPlusCounterOld.java | 392 ------------------
 .../org/apache/kylin/measure/hllc/Register.java |   4 +-
 .../kylin/measure/hllc/SparseRegister.java      |  38 +-
 .../measure/AggregatorMemEstimateTest.java      |   4 +-
 .../measure/hll/HyperLogLogCounterOldTest.java  | 265 -------------
 .../measure/hll2/HyperLogLogCounterNewTest.java | 301 --------------
 .../hll2/NewHyperLogLogBenchmarkTest.java       | 288 --------------
 .../kylin/measure/hllc/HLLCounterOldTest.java   | 266 +++++++++++++
 .../kylin/measure/hllc/HLLCounterTest.java      | 316 +++++++++++++++
 .../hllc/NewHyperLogLogBenchmarkTest.java       | 291 ++++++++++++++
 .../kylin/engine/mr/common/CubeStatsReader.java |  12 +-
 .../kylin/engine/mr/common/CubeStatsWriter.java |   6 +-
 .../mr/steps/FactDistinctColumnsReducer.java    |   8 +-
 .../mr/steps/FactDistinctHiveColumnsMapper.java |  10 +-
 .../engine/mr/steps/MergeStatisticsStep.java    |   6 +-
 .../kylin/engine/mr/steps/CubeSamplingTest.java |   8 +-
 .../steps/FactDistinctColumnsReducerTest.java   |   4 +-
 .../apache/kylin/engine/spark/SparkCubing.java  |  28 +-
 .../cardinality/ColumnCardinalityMapper.java    |  10 +-
 .../cardinality/ColumnCardinalityReducer.java   |  12 +-
 .../ColumnCardinalityReducerTest.java           |   4 +-
 36 files changed, 1802 insertions(+), 1773 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index 35139a4..5e63f94 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -38,7 +38,7 @@ import org.apache.kylin.dict.DictionaryGenerator;
 import org.apache.kylin.dict.DictionaryInfo;
 import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.dict.IterableDictionaryValueEnumerator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.source.ReadableTable;
@@ -59,7 +59,7 @@ public class CubingUtils {
 
     private static Logger logger = LoggerFactory.getLogger(CubingUtils.class);
 
-    public static Map<Long, HyperLogLogPlusCounterNew> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) {
+    public static Map<Long, HLLCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) {
         final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc);
         final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length;
         final List<Long> allCuboidIds = new CuboidScheduler(cubeDesc).getAllCuboidIds();
@@ -84,9 +84,9 @@ public class CubingUtils {
                 return result;
             }
         });
-        final Map<Long, HyperLogLogPlusCounterNew> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
+        final Map<Long, HLLCounter> result = Maps.newHashMapWithExpectedSize(allCuboidIds.size());
         for (Long cuboidId : allCuboidIds) {
-            result.put(cuboidId, new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
+            result.put(cuboidId, new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
             Integer[] cuboidBitSet = new Integer[Long.bitCount(cuboidId)];
 
             long mask = Long.highestOneBit(baseCuboidId);
@@ -118,9 +118,9 @@ public class CubingUtils {
                 }
             }
 
-            for (Map.Entry<Long, HyperLogLogPlusCounterNew> longHyperLogLogPlusCounterNewEntry : result.entrySet()) {
+            for (Map.Entry<Long, HLLCounter> longHyperLogLogPlusCounterNewEntry : result.entrySet()) {
                 Long cuboidId = longHyperLogLogPlusCounterNewEntry.getKey();
-                HyperLogLogPlusCounterNew counter = longHyperLogLogPlusCounterNewEntry.getValue();
+                HLLCounter counter = longHyperLogLogPlusCounterNewEntry.getValue();
                 Hasher hc = hf.newHasher();
                 final Integer[] cuboidBitSet = allCuboidsBitSet.get(cuboidId);
                 for (int position = 0; position < cuboidBitSet.length; position++) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java b/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
index 6cbf237..b8d116c 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/UnitTestSupport.java
@@ -26,7 +26,7 @@ import java.util.List;
 import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.common.util.ImmutableBitSet;
 import org.apache.kylin.gridtable.GTInfo.Builder;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.datatype.LongMutable;
 
@@ -106,16 +106,16 @@ public class UnitTestSupport {
             String d_01_15 = datePlus("2015-01-15", i * 4);
             String d_01_16 = datePlus("2015-01-16", i * 4);
             String d_01_17 = datePlus("2015-01-17", i * 4);
-            result.add(newRec(info, d_01_14, "Yang", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_14, "Luke", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_15, "Xu", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_15, "Dong", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_15, "Jason", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_16, "Mahone", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_16, "Shaofeng", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_16, "Qianhao", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_16, "George", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
-            result.add(newRec(info, d_01_17, "Kejia", "Food", new LongMutable(10), new BigDecimal("10.5"), new HyperLogLogPlusCounterNew(14)));
+            result.add(newRec(info, d_01_14, "Yang", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_14, "Luke", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_15, "Xu", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_15, "Dong", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_15, "Jason", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_16, "Mahone", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_16, "Shaofeng", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_16, "Qianhao", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_16, "George", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
+            result.add(newRec(info, d_01_17, "Kejia", "Food", new LongMutable(10), new BigDecimal("10.5"), new HLLCounter(14)));
         }
         return result;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
index f80bd24..85d8c37 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
@@ -34,7 +34,7 @@ import org.apache.kylin.gridtable.GTScanRequest;
 import org.apache.kylin.gridtable.GTScanRequestBuilder;
 import org.apache.kylin.gridtable.IGTScanner;
 import org.apache.kylin.gridtable.benchmark.SortedGTRecordGenerator.Randomizer;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.filter.ColumnTupleFilter;
 import org.apache.kylin.metadata.filter.CompareTupleFilter;
@@ -80,7 +80,7 @@ public class GTScannerBenchmark2 {
         gen.addDimension(100, 4, null);
         gen.addMeasure(8);
         gen.addMeasure(8, new Randomizer() {
-            HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(12);
+            HLLCounter hllc = new HLLCounter(12);
 
             @Override
             public int fillRandom(Random rand, byte[] array, int offset) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
index 66a6b51..8ffe055 100644
--- a/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
@@ -26,7 +26,7 @@ import org.apache.kylin.measure.basic.LongSumAggregator;
 import org.apache.kylin.measure.bitmap.BitmapAggregator;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
 import org.apache.kylin.measure.hllc.HLLCAggregator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
 import org.github.jamm.MemoryMeter;
@@ -105,7 +105,7 @@ public class AggregationCacheMemSizeTest {
 
     private HLLCAggregator createHLLCAggr() {
         HLLCAggregator hllcAggregator = new HLLCAggregator(14);
-        hllcAggregator.aggregate(new HyperLogLogPlusCounterNew(14));
+        hllcAggregator.aggregate(new HLLCounter(14));
         return hllcAggregator;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
index cd1aa96..0f3f3a9 100644
--- a/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java
@@ -26,7 +26,7 @@ import java.nio.ByteBuffer;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
 import org.apache.kylin.metadata.model.FunctionDesc;
@@ -57,7 +57,7 @@ public class MeasureCodecTest extends LocalFileMetadataTestCase {
         DoubleMutable d = new DoubleMutable(1.0);
         LongMutable l = new LongMutable(2);
         BigDecimal b = new BigDecimal("333.1234");
-        HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(16);
+        HLLCounter hllc = new HLLCounter(16);
         hllc.add("1234567");
         hllc.add("abcdefg");
         BitmapCounter bitmap = new BitmapCounter();

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
index 031636e..89ff382 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java
@@ -36,7 +36,7 @@ import org.apache.kylin.metadata.tuple.TupleInfo;
  * MeasureType captures how a kind of aggregation is defined, how it is calculated 
  * during cube build, and how it is involved in query and storage scan.
  * 
- * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounterOld
+ * @param <T> the Java type of aggregation data object, e.g. HLLCounter
  */
 abstract public class MeasureType<T> {
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
index d94dec9..694459b 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
@@ -62,7 +62,7 @@ import com.google.common.collect.Maps;
   }
 </pre>
  * 
- * @param <T> the Java type of aggregation data object, e.g. HyperLogLogPlusCounterOld
+ * @param <T> the Java type of aggregation data object, e.g. HLLCounter
  */
 abstract public class MeasureTypeFactory<T> {
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
index 26ee6ab..c5814aa 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -25,7 +25,6 @@ import java.util.Map;
  * Created by xiefan on 16-12-9.
  */
 public class DenseRegister implements Register {
-    private int p;
 
     private int m;
 
@@ -41,7 +40,7 @@ public class DenseRegister implements Register {
     }
 
     @Override
-    public Byte get(int pos) {
+    public byte get(int pos) {
         return register[pos];
     }
 
@@ -80,11 +79,28 @@ public class DenseRegister implements Register {
     }
 
     @Override
-    public int getHashCode() {
-        return Arrays.hashCode(register);
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + Arrays.hashCode(register);
+        return result;
     }
 
-    public byte[] getRawRegister() {
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        DenseRegister other = (DenseRegister) obj;
+        if (!Arrays.equals(register, other.register))
+            return false;
+        return true;
+    }
+
+    byte[] getRawRegister() {
         return this.register;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
index ca73285..5966c04 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCAggregator.java
@@ -23,10 +23,10 @@ import org.apache.kylin.measure.MeasureAggregator;
 /**
  */
 @SuppressWarnings("serial")
-public class HLLCAggregator extends MeasureAggregator<HyperLogLogPlusCounterNew> {
+public class HLLCAggregator extends MeasureAggregator<HLLCounter> {
 
     final int precision;
-    HyperLogLogPlusCounterNew sum = null;
+    HLLCounter sum = null;
 
     public HLLCAggregator(int precision) {
         this.precision = precision;
@@ -38,15 +38,15 @@ public class HLLCAggregator extends MeasureAggregator<HyperLogLogPlusCounterNew>
     }
 
     @Override
-    public void aggregate(HyperLogLogPlusCounterNew value) {
+    public void aggregate(HLLCounter value) {
         if (sum == null)
-            sum = new HyperLogLogPlusCounterNew(value);
+            sum = new HLLCounter(value);
         else
             sum.merge(value);
     }
 
     @Override
-    public HyperLogLogPlusCounterNew getState() {
+    public HLLCounter getState() {
         return sum;
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
index 481fa4e..9601653 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCMeasureType.java
@@ -33,15 +33,15 @@ import org.apache.kylin.metadata.model.TblColRef;
 
 import com.google.common.collect.ImmutableMap;
 
-public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounterNew> {
+public class HLLCMeasureType extends MeasureType<HLLCounter> {
 
     public static final String FUNC_COUNT_DISTINCT = FunctionDesc.FUNC_COUNT_DISTINCT;
     public static final String DATATYPE_HLLC = "hllc";
 
-    public static class Factory extends MeasureTypeFactory<HyperLogLogPlusCounterNew> {
+    public static class Factory extends MeasureTypeFactory<HLLCounter> {
 
         @Override
-        public MeasureType<HyperLogLogPlusCounterNew> createMeasureType(String funcName, DataType dataType) {
+        public MeasureType<HLLCounter> createMeasureType(String funcName, DataType dataType) {
             return new HLLCMeasureType(funcName, dataType);
         }
 
@@ -56,7 +56,7 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounterNew> {
         }
 
         @Override
-        public Class<? extends DataTypeSerializer<HyperLogLogPlusCounterNew>> getAggrDataTypeSerializer() {
+        public Class<? extends DataTypeSerializer<HLLCounter>> getAggrDataTypeSerializer() {
             return HLLCSerializer.class;
         }
     }
@@ -91,13 +91,13 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounterNew> {
     }
 
     @Override
-    public MeasureIngester<HyperLogLogPlusCounterNew> newIngester() {
-        return new MeasureIngester<HyperLogLogPlusCounterNew>() {
-            HyperLogLogPlusCounterNew current = new HyperLogLogPlusCounterNew(dataType.getPrecision());
+    public MeasureIngester<HLLCounter> newIngester() {
+        return new MeasureIngester<HLLCounter>() {
+            HLLCounter current = new HLLCounter(dataType.getPrecision());
 
             @Override
-            public HyperLogLogPlusCounterNew valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
-                HyperLogLogPlusCounterNew hllc = current;
+            public HLLCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
+                HLLCounter hllc = current;
                 hllc.clear();
                 for (String v : values) {
                     if (v != null)
@@ -109,7 +109,7 @@ public class HLLCMeasureType extends MeasureType<HyperLogLogPlusCounterNew> {
     }
 
     @Override
-    public MeasureAggregator<HyperLogLogPlusCounterNew> newAggregator() {
+    public MeasureAggregator<HLLCounter> newAggregator() {
         return new HLLCAggregator(dataType.getPrecision());
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
index 1d01abc..e0992c7 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCSerializer.java
@@ -28,10 +28,10 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer;
  * @author yangli9
  * 
  */
-public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounterNew> {
+public class HLLCSerializer extends DataTypeSerializer<HLLCounter> {
 
     // be thread-safe and avoid repeated obj creation
-    private ThreadLocal<HyperLogLogPlusCounterNew> current = new ThreadLocal<HyperLogLogPlusCounterNew>();
+    private ThreadLocal<HLLCounter> current = new ThreadLocal<HLLCounter>();
 
     private int precision;
 
@@ -40,7 +40,7 @@ public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounterNew
     }
 
     @Override
-    public void serialize(HyperLogLogPlusCounterNew value, ByteBuffer out) {
+    public void serialize(HLLCounter value, ByteBuffer out) {
         try {
             value.writeRegisters(out);
         } catch (IOException e) {
@@ -48,18 +48,18 @@ public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounterNew
         }
     }
 
-    private HyperLogLogPlusCounterNew current() {
-        HyperLogLogPlusCounterNew hllc = current.get();
+    private HLLCounter current() {
+        HLLCounter hllc = current.get();
         if (hllc == null) {
-            hllc = new HyperLogLogPlusCounterNew(precision);
+            hllc = new HLLCounter(precision);
             current.set(hllc);
         }
         return hllc;
     }
 
     @Override
-    public HyperLogLogPlusCounterNew deserialize(ByteBuffer in) {
-        HyperLogLogPlusCounterNew hllc = current();
+    public HLLCounter deserialize(ByteBuffer in) {
+        HLLCounter hllc = current();
         try {
             hllc.readRegisters(in);
         } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
new file mode 100644
index 0000000..22b5e55
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
@@ -0,0 +1,377 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+import org.apache.kylin.common.util.BytesUtil;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Map;
+
+@SuppressWarnings("serial")
+public class HLLCounter implements Serializable, Comparable<HLLCounter> {
+
+    // not final for test purpose
+    static double OVERFLOW_FACTOR = 0.01;
+
+    private int p;
+
+    private int m;
+
+    private HashFunction hashFunc = Hashing.murmur3_128();
+
+    private Register register;
+
+    public HLLCounter() {
+        this(10, RegisterType.SPARSE, Hashing.murmur3_128());
+    }
+
+    public HLLCounter(int p) {
+        this(p, RegisterType.SPARSE, Hashing.murmur3_128());
+    }
+
+    public HLLCounter(int p, HashFunction hashFunc) {
+        this(p, RegisterType.SPARSE, hashFunc);
+    }
+
+    public HLLCounter(HLLCounter another) {
+        this(another.p, another.hashFunc);
+        merge(another);
+    }
+
+    HLLCounter(int p, RegisterType type) {
+        this(p, type, Hashing.murmur3_128());
+    }
+
+    HLLCounter(int p, RegisterType type, HashFunction hashFunc) {
+        this.p = p;
+        this.m = 1 << p;//(int) Math.pow(2, p);
+        this.hashFunc = hashFunc;
+        if (type == RegisterType.SPARSE) {
+            this.register = new SparseRegister();
+        } else {
+            this.register = new DenseRegister(p);
+        }
+    }
+
+    private boolean isDense(int size) {
+        double over = OVERFLOW_FACTOR * m;
+        return size > (int) over;
+    }
+    
+    public void add(int value) {
+        add(hashFunc.hashInt(value).asLong());
+    }
+
+    public void add(String value) {
+        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
+    }
+
+    public void add(byte[] value) {
+        add(hashFunc.hashBytes(value).asLong());
+    }
+
+    public void add(byte[] value, int offset, int length) {
+        add(hashFunc.hashBytes(value, offset, length).asLong());
+    }
+
+    protected void add(long hash) {
+        int bucketMask = m - 1;
+        int bucket = (int) (hash & bucketMask);
+        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
+        Byte b = register.get(bucket);
+        if (b == null || (byte) firstOnePos > b) {
+            register.set(bucket, (byte) firstOnePos);
+        }
+        toDenseIfNeeded();
+    }
+
+    private void toDenseIfNeeded() {
+        if (register instanceof SparseRegister) {
+            if (isDense(register.getSize())) {
+                register = ((SparseRegister) register).toDense(p);
+            }
+        }
+    }
+
+    public void merge(HLLCounter another) {
+        assert this.p == another.p;
+        assert this.hashFunc == another.hashFunc;
+        if (register instanceof SparseRegister && another.register instanceof SparseRegister) {
+            register.merge(another.register);
+            toDenseIfNeeded();
+        } else if (register instanceof SparseRegister && another.register instanceof DenseRegister) {
+            register = ((SparseRegister) register).toDense(p);
+            register.merge(another.register);
+        } else {
+            register.merge(another.register);
+        }
+    }
+
+    public long getCountEstimate() {
+        return new HLLCSnapshot(this).getCountEstimate();
+    }
+
+    public int getPrecision() {
+        return this.p;
+    }
+
+    public double getErrorRate() {
+        return 1.04 / Math.sqrt(m);
+    }
+
+    @Override
+    public String toString() {
+        return "" + getCountEstimate();
+    }
+
+    // ============================================================================
+
+    // a memory efficient snapshot of HLL registers which can yield count estimate later
+    public static class HLLCSnapshot {
+        byte p;
+        double registerSum;
+        int zeroBuckets;
+
+        public HLLCSnapshot(HLLCounter hllc) {
+            p = (byte) hllc.p;
+            registerSum = 0;
+            zeroBuckets = 0;
+            Register register = hllc.getRegister();
+            DenseRegister dr;
+            if (register instanceof SparseRegister) {
+                dr = ((SparseRegister) register).toDense(p);
+            } else {
+                dr = (DenseRegister) register;
+            }
+            byte[] registers = dr.getRawRegister();
+            for (int i = 0; i < hllc.m; i++) {
+                if (registers[i] == 0) {
+                    registerSum++;
+                    zeroBuckets++;
+                } else {
+                    registerSum += 1.0 / (1L << registers[i]);
+                }
+            }
+        }
+
+        public long getCountEstimate() {
+            int m = 1 << p;
+            double alpha = 0.7213 / (1 + 1.079 / m);
+            double estimate = alpha * m * m / registerSum;
+
+            // small cardinality adjustment
+            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
+                estimate = m * Math.log(m * 1.0 / zeroBuckets);
+            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
+                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
+            }
+
+            return Math.round(estimate);
+        }
+    }
+
+    public static void main(String[] args) throws IOException {
+        dumpErrorRates();
+    }
+
+    static void dumpErrorRates() {
+        for (int p = 10; p <= 18; p++) {
+            double rate = new HLLCounter(p, RegisterType.SPARSE).getErrorRate();
+            double er = Math.round(rate * 10000) / 100D;
+            double er2 = Math.round(rate * 2 * 10000) / 100D;
+            double er3 = Math.round(rate * 3 * 10000) / 100D;
+            long size = Math.round(Math.pow(2, p));
+            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
+        }
+    }
+
+    public Register getRegister() {
+        return register;
+    }
+
+    public void clear() {
+        register.clear();
+    }
+
+    // ============================================================================
+
+    public void writeRegisters(final ByteBuffer out) throws IOException {
+
+        final int indexLen = getRegisterIndexSize();
+        int size = register.getSize();
+
+        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
+        byte scheme;
+        if (register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+            scheme = 0; // map
+        } else {
+            scheme = 1; // array
+        }
+        out.put(scheme);
+        if (scheme == 0) { // map scheme
+            BytesUtil.writeVInt(size, out);
+            if (register instanceof SparseRegister) { //sparse register
+                Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
+                for (Map.Entry<Integer, Byte> entry : allValue) {
+                    writeUnsigned(entry.getKey(), indexLen, out);
+                    out.put(entry.getValue());
+                }
+            } else { //dense register
+                byte[] registers = ((DenseRegister) register).getRawRegister();
+                for (int i = 0; i < m; i++) {
+                    if (registers[i] > 0) {
+                        writeUnsigned(i, indexLen, out);
+                        out.put(registers[i]);
+                    }
+                }
+            }
+        } else if (scheme == 1) { // array scheme
+            out.put(((DenseRegister) register).getRawRegister());
+        } else
+            throw new IllegalStateException();
+    }
+
+    public void readRegisters(ByteBuffer in) throws IOException {
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            clear();
+            int size = BytesUtil.readVInt(in);
+            if (size > m)
+                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
+            if (isDense(size)) {
+                register = new DenseRegister(p);
+            } else {
+                register = new SparseRegister();//default is sparse
+            }
+            int indexLen = getRegisterIndexSize();
+            int key = 0;
+            for (int i = 0; i < size; i++) {
+                key = readUnsigned(in, indexLen);
+                register.set(key, in.get());
+            }
+        } else if (scheme == 1) { // array scheme
+            if (register instanceof SparseRegister) {
+                register = new DenseRegister(p);
+            }
+            in.get(((DenseRegister) register).getRawRegister());
+        } else
+            throw new IllegalStateException();
+    }
+
+    public int peekLength(ByteBuffer in) {
+        int mark = in.position();
+        int len;
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            int size = BytesUtil.readVInt(in);
+            int indexLen = getRegisterIndexSize();
+            len = in.position() - mark + (indexLen + 1) * size;
+        } else {
+            len = in.position() - mark + m;
+        }
+
+        in.position(mark);
+        return len;
+    }
+
+    public int maxLength() {
+        return 1 + m;
+    }
+
+    private int getRegisterIndexSize() {
+        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
+        result = prime * result + p;
+        result = prime * result + register.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        HLLCounter other = (HLLCounter) obj;
+        if (!hashFunc.equals(other.hashFunc))
+            return false;
+        if (p != other.p)
+            return false;
+        if (!register.equals(other.register))
+            return false;
+        return true;
+    }
+
+    @Override
+    public int compareTo(HLLCounter o) {
+        if (o == null)
+            return 1;
+
+        long e1 = this.getCountEstimate();
+        long e2 = o.getCountEstimate();
+
+        if (e1 == e2)
+            return 0;
+        else if (e1 > e2)
+            return 1;
+        else
+            return -1;
+    }
+
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
+
+    public RegisterType getRegisterType() {
+        if (register instanceof SparseRegister)
+            return RegisterType.SPARSE;
+        else
+            return RegisterType.DENSE;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounterOld.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounterOld.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounterOld.java
new file mode 100644
index 0000000..5cbdd43
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounterOld.java
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.kylin.common.util.BytesUtil;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+/**
+ * Deprecated, use HLLCounter instead.
+ * 
+ * About compression, test on HLLC data shows
+ * 
+ * - LZF compression ratio is around 65%-80%, fast
+ * - GZIP compression ratio is around 41%-46%, very slow
+ */
+@Deprecated
+@SuppressWarnings("serial")
+public class HLLCounterOld implements Serializable, Comparable<HLLCounterOld> {
+
+    private final int p;
+    private final int m;
+    private final HashFunction hashFunc;
+    byte[] registers;
+    int singleBucket;
+
+    public HLLCounterOld() {
+        this(10);
+    }
+
+    public HLLCounterOld(int p) {
+        this(p, Hashing.murmur3_128());
+    }
+
+    public HLLCounterOld(HLLCounterOld another) {
+        this(another.p, another.hashFunc);
+        merge(another);
+    }
+
+    /** The larger p is, the more storage (2^p bytes), the better accuracy */
+    private HLLCounterOld(int p, HashFunction hashFunc) {
+        this.p = p;
+        this.m = 1 << p;//(int) Math.pow(2, p);
+        this.hashFunc = hashFunc;
+        this.registers = new byte[m];
+        this.singleBucket = -1;
+    }
+
+    public void clear() {
+        byte zero = (byte) 0;
+        if (singleBucket == -1) {
+            //nothing
+        } else if (singleBucket >= 0) {
+            registers[singleBucket] = 0;
+        } else {
+            Arrays.fill(registers, zero);
+        }
+        singleBucket = -1;
+    }
+
+    public void add(int value) {
+        add(hashFunc.hashInt(value).asLong());
+    }
+
+    public void add(String value) {
+        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
+    }
+
+    public void add(byte[] value) {
+        add(hashFunc.hashBytes(value).asLong());
+    }
+
+    public void add(byte[] value, int offset, int length) {
+        add(hashFunc.hashBytes(value, offset, length).asLong());
+    }
+
+    protected void add(long hash) {
+        int bucketMask = m - 1;
+        int bucket = (int) (hash & bucketMask);
+        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
+
+        if (firstOnePos > registers[bucket])
+            registers[bucket] = (byte) firstOnePos;
+
+        if (singleBucket == -1)
+            singleBucket = bucket;
+        else
+            singleBucket = Integer.MIN_VALUE;
+    }
+
+    public void merge(HLLCounterOld another) {
+        assert this.p == another.p;
+        assert this.hashFunc == another.hashFunc;
+
+        // quick path for single value HLLC
+        if (another.singleBucket == -1) {
+            return;
+        } else if (another.singleBucket >= 0) {
+            int b = another.singleBucket;
+            if (registers[b] < another.registers[b])
+                registers[b] = another.registers[b];
+        } else {
+            // normal path
+            for (int i = 0; i < m; i++) {
+                if (registers[i] < another.registers[i])
+                    registers[i] = another.registers[i];
+            }
+        }
+        singleBucket = Integer.MIN_VALUE;
+    }
+
+    public long getCountEstimate() {
+        return new HLLCSnapshot(this).getCountEstimate();
+    }
+
+    public int getPrecision() {
+        return this.p;
+    }
+
+    public double getErrorRate() {
+        return 1.04 / Math.sqrt(m);
+    }
+
+    private int size() {
+        if (singleBucket == -1) {
+            return 0;
+        } else if (singleBucket >= 0) {
+            return 1;
+        } else {
+            int size = 0;
+            for (int i = 0; i < m; i++) {
+                if (registers[i] > 0)
+                    size++;
+            }
+            return size;
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "" + getCountEstimate();
+    }
+
+    // ============================================================================
+
+    // a memory efficient snapshot of HLL registers which can yield count
+    // estimate later
+    public static class HLLCSnapshot {
+        byte p;
+        double registerSum;
+        int zeroBuckets;
+
+        public HLLCSnapshot(HLLCounterOld hllc) {
+            p = (byte) hllc.p;
+            registerSum = 0;
+            zeroBuckets = 0;
+
+            byte[] registers = hllc.registers;
+            for (int i = 0; i < hllc.m; i++) {
+                if (registers[i] == 0) {
+                    registerSum++;
+                    zeroBuckets++;
+                } else {
+                    registerSum += 1.0 / (1L << registers[i]);
+                }
+            }
+        }
+
+        public long getCountEstimate() {
+            int m = 1 << p;
+            double alpha = 0.7213 / (1 + 1.079 / m);
+            double estimate = alpha * m * m / registerSum;
+
+            // small cardinality adjustment
+            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
+                estimate = m * Math.log(m * 1.0 / zeroBuckets);
+            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
+                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
+            }
+
+            return Math.round(estimate);
+        }
+    }
+
+    // ============================================================================
+
+    public void writeRegisters(final ByteBuffer out) throws IOException {
+
+        final int indexLen = getRegisterIndexSize();
+        int size = size();
+
+        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
+        byte scheme;
+        if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
+            scheme = 0; // map
+        else
+            scheme = 1; // array
+        out.put(scheme);
+
+        if (scheme == 0) { // map scheme
+            BytesUtil.writeVInt(size, out);
+            if (singleBucket == -1) {
+                // no non-zero register
+            } else if (singleBucket >= 0) {
+                writeUnsigned(singleBucket, indexLen, out);
+                out.put(registers[singleBucket]);
+            } else {
+                for (int i = 0; i < m; i++) {
+                    if (registers[i] > 0) {
+                        writeUnsigned(i, indexLen, out);
+                        out.put(registers[i]);
+                    }
+                }
+            }
+        } else if (scheme == 1) { // array scheme
+            out.put(registers);
+        } else
+            throw new IllegalStateException();
+    }
+
+    public void readRegisters(ByteBuffer in) throws IOException {
+        byte scheme = in.get();
+
+        if (scheme == 0) { // map scheme
+            clear();
+            int size = BytesUtil.readVInt(in);
+            if (size > m)
+                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
+            int indexLen = getRegisterIndexSize();
+            int key = 0;
+            for (int i = 0; i < size; i++) {
+                key = readUnsigned(in, indexLen);
+                registers[key] = in.get();
+            }
+
+            if (size == 0)
+                singleBucket = -1;
+            else if (size == 1)
+                singleBucket = key;
+            else
+                singleBucket = Integer.MIN_VALUE;
+
+        } else if (scheme == 1) { // array scheme
+            in.get(registers);
+            singleBucket = Integer.MIN_VALUE;
+        } else
+            throw new IllegalStateException();
+    }
+
+    public int peekLength(ByteBuffer in) {
+        int mark = in.position();
+        int len;
+
+        byte scheme = in.get();
+        if (scheme == 0) { // map scheme
+            int size = BytesUtil.readVInt(in);
+            int indexLen = getRegisterIndexSize();
+            len = in.position() - mark + (indexLen + 1) * size;
+        } else {
+            len = in.position() - mark + m;
+        }
+
+        in.position(mark);
+        return len;
+    }
+
+    public int maxLength() {
+        return 1 + m;
+    }
+
+    /*public void writeRegistersArray(final ByteBuffer out) {
+        out.put(this.registers);
+    }
+
+    public void readRegistersArray(ByteBuffer in) {
+        in.get(registers, 0, m);
+        singleBucket = Integer.MIN_VALUE;
+    }*/
+
+    private int getRegisterIndexSize() {
+        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
+        result = prime * result + p;
+        result = prime * result + Arrays.hashCode(registers);
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        HLLCounterOld other = (HLLCounterOld) obj;
+        if (hashFunc == null) {
+            if (other.hashFunc != null)
+                return false;
+        } else if (!hashFunc.equals(other.hashFunc))
+            return false;
+        if (p != other.p)
+            return false;
+        if (!Arrays.equals(registers, other.registers))
+            return false;
+        return true;
+    }
+
+    @Override
+    public int compareTo(HLLCounterOld o) {
+        if (o == null)
+            return 1;
+
+        long e1 = this.getCountEstimate();
+        long e2 = o.getCountEstimate();
+
+        if (e1 == e2)
+            return 0;
+        else if (e1 > e2)
+            return 1;
+        else
+            return -1;
+    }
+
+    public static void main(String[] args) throws IOException {
+        dumpErrorRates();
+    }
+
+    static void dumpErrorRates() {
+        for (int p = 10; p <= 18; p++) {
+            double rate = new HLLCounterOld(p).getErrorRate();
+            double er = Math.round(rate * 10000) / 100D;
+            double er2 = Math.round(rate * 2 * 10000) / 100D;
+            double er3 = Math.round(rate * 3 * 10000) / 100D;
+            long size = Math.round(Math.pow(2, p));
+            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
+        }
+    }
+
+    /**
+     *
+     * @param num
+     * @param size
+     * @param out
+     */
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
index a72ad09..438a33f 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLDistinctCountAggFunc.java
@@ -31,21 +31,21 @@ public class HLLDistinctCountAggFunc {
 
     private static final Logger logger = LoggerFactory.getLogger(HLLDistinctCountAggFunc.class);
 
-    public static HyperLogLogPlusCounterNew init() {
+    public static HLLCounter init() {
         return null;
     }
 
-    public static HyperLogLogPlusCounterNew initAdd(Object v) {
+    public static HLLCounter initAdd(Object v) {
         if (v instanceof Long) { // holistic case
             long l = (Long) v;
             return new FixedValueHLLCMockup(l);
         } else {
-            HyperLogLogPlusCounterNew c = (HyperLogLogPlusCounterNew) v;
-            return new HyperLogLogPlusCounterNew(c);
+            HLLCounter c = (HLLCounter) v;
+            return new HLLCounter(c);
         }
     }
 
-    public static HyperLogLogPlusCounterNew add(HyperLogLogPlusCounterNew counter, Object v) {
+    public static HLLCounter add(HLLCounter counter, Object v) {
         if (v instanceof Long) { // holistic case
             long l = (Long) v;
             if (counter == null) {
@@ -58,9 +58,9 @@ public class HLLDistinctCountAggFunc {
                 return counter;
             }
         } else {
-            HyperLogLogPlusCounterNew c = (HyperLogLogPlusCounterNew) v;
+            HLLCounter c = (HLLCounter) v;
             if (counter == null) {
-                return new HyperLogLogPlusCounterNew(c);
+                return new HLLCounter(c);
             } else {
                 counter.merge(c);
                 return counter;
@@ -68,16 +68,16 @@ public class HLLDistinctCountAggFunc {
         }
     }
 
-    public static HyperLogLogPlusCounterNew merge(HyperLogLogPlusCounterNew counter0, Object counter1) {
+    public static HLLCounter merge(HLLCounter counter0, Object counter1) {
         return add(counter0, counter1);
     }
 
-    public static long result(HyperLogLogPlusCounterNew counter) {
+    public static long result(HLLCounter counter) {
         return counter == null ? 0L : counter.getCountEstimate();
     }
 
     @SuppressWarnings("serial")
-    private static class FixedValueHLLCMockup extends HyperLogLogPlusCounterNew {
+    private static class FixedValueHLLCMockup extends HLLCounter {
 
         private Long value = null;
 
@@ -107,7 +107,7 @@ public class HLLDistinctCountAggFunc {
         }
 
         @Override
-        public void merge(HyperLogLogPlusCounterNew another) {
+        public void merge(HLLCounter another) {
             throw new UnsupportedOperationException();
         }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
deleted file mode 100644
index d7329f6..0000000
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterNew.java
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hllc;
-
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-import org.apache.kylin.common.util.BytesUtil;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.Collection;
-import java.util.Map;
-
-@SuppressWarnings("serial")
-public class HyperLogLogPlusCounterNew implements Serializable, Comparable<HyperLogLogPlusCounterNew> {
-
-    private int p;
-
-    private int m;
-
-    private HashFunction hashFunc = Hashing.murmur3_128();
-
-    private Register register;
-
-    public static double overflowFactor = 0.01;
-
-    public HyperLogLogPlusCounterNew(int p, RegisterType type, HashFunction hashFunc) {
-        this.p = p;
-        this.m = 1 << p;//(int) Math.pow(2, p);
-        this.hashFunc = hashFunc;
-        if (type == RegisterType.SPARSE) {
-            double over = overflowFactor * m;
-            this.register = new SparseRegister((int) over);
-        } else {
-            this.register = new DenseRegister(p);
-        }
-    }
-
-    public HyperLogLogPlusCounterNew() {
-        this(10, RegisterType.SPARSE, Hashing.murmur3_128());
-    }
-
-    public HyperLogLogPlusCounterNew(int p) {
-        this(p, RegisterType.SPARSE, Hashing.murmur3_128());
-    }
-
-    public HyperLogLogPlusCounterNew(int p, RegisterType type) {
-        this(p, type, Hashing.murmur3_128());
-    }
-
-    public HyperLogLogPlusCounterNew(int p, HashFunction hashFunc) {
-        this(p, RegisterType.SPARSE, hashFunc);
-    }
-
-    public HyperLogLogPlusCounterNew(HyperLogLogPlusCounterNew another) {
-        this(another.p, another.hashFunc);
-        merge(another);
-    }
-
-    public void add(int value) {
-        add(hashFunc.hashInt(value).asLong());
-    }
-
-    public void add(String value) {
-        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
-    }
-
-    public void add(byte[] value) {
-        add(hashFunc.hashBytes(value).asLong());
-    }
-
-    public void add(byte[] value, int offset, int length) {
-        add(hashFunc.hashBytes(value, offset, length).asLong());
-    }
-
-    protected void add(long hash) {
-        int bucketMask = m - 1;
-        int bucket = (int) (hash & bucketMask);
-        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
-        Byte b = register.get(bucket);
-        if (b == null || (byte) firstOnePos > b) {
-            register.set(bucket, (byte) firstOnePos);
-        }
-        if (register instanceof SparseRegister) {
-            if (((SparseRegister) register).isOverThreshold()) {
-                register = ((SparseRegister) register).toDense(p);
-            }
-        }
-    }
-
-    public void merge(HyperLogLogPlusCounterNew another) {
-        assert this.p == another.p;
-        assert this.hashFunc == another.hashFunc;
-        if (register instanceof SparseRegister && another.register instanceof SparseRegister) {
-            register.merge(another.register);
-            if (((SparseRegister) register).isOverThreshold()) {
-                register = ((SparseRegister) register).toDense(p);
-            }
-        } else if (register instanceof SparseRegister && another.register instanceof DenseRegister) {
-            register = ((SparseRegister) register).toDense(p);
-            register.merge(another.register);
-        } else {
-            register.merge(another.register);
-        }
-    }
-
-    public long getCountEstimate() {
-        return new HLLCSnapshot(this).getCountEstimate();
-    }
-
-    public int getPrecision() {
-        return this.p;
-    }
-
-    public double getErrorRate() {
-        return 1.04 / Math.sqrt(m);
-    }
-
-    @Override
-    public String toString() {
-        return "" + getCountEstimate();
-    }
-
-    // ============================================================================
-
-    // a memory efficient snapshot of HLL registers which can yield count
-    // estimate later
-    public static class HLLCSnapshot {
-        byte p;
-        double registerSum;
-        int zeroBuckets;
-
-        public HLLCSnapshot(HyperLogLogPlusCounterNew hllc) {
-            p = (byte) hllc.p;
-            registerSum = 0;
-            zeroBuckets = 0;
-            Register register = hllc.getRegister();
-            DenseRegister dr;
-            if (register instanceof SparseRegister) {
-                dr = ((SparseRegister) register).toDense(p);
-            } else {
-                dr = (DenseRegister) register;
-            }
-            byte[] registers = dr.getRawRegister();
-            for (int i = 0; i < hllc.m; i++) {
-                if (registers[i] == 0) {
-                    registerSum++;
-                    zeroBuckets++;
-                } else {
-                    registerSum += 1.0 / (1L << registers[i]);
-                }
-            }
-        }
-
-        public long getCountEstimate() {
-            int m = 1 << p;
-            double alpha = 0.7213 / (1 + 1.079 / m);
-            double estimate = alpha * m * m / registerSum;
-
-            // small cardinality adjustment
-            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
-                estimate = m * Math.log(m * 1.0 / zeroBuckets);
-            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
-                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
-            }
-
-            return Math.round(estimate);
-        }
-    }
-
-    public static void main(String[] args) throws IOException {
-        dumpErrorRates();
-    }
-
-    static void dumpErrorRates() {
-        for (int p = 10; p <= 18; p++) {
-            double rate = new HyperLogLogPlusCounterNew(p, RegisterType.SPARSE).getErrorRate();
-            double er = Math.round(rate * 10000) / 100D;
-            double er2 = Math.round(rate * 2 * 10000) / 100D;
-            double er3 = Math.round(rate * 3 * 10000) / 100D;
-            long size = Math.round(Math.pow(2, p));
-            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
-        }
-    }
-
-    public Register getRegister() {
-        return register;
-    }
-
-    public void clear() {
-        register.clear();
-    }
-
-    public RegisterType getRegisterType() {
-        if (register instanceof SparseRegister)
-            return RegisterType.SPARSE;
-        else
-            return RegisterType.DENSE;
-    }
-
-    // ============================================================================
-
-    public void writeRegisters(final ByteBuffer out) throws IOException {
-
-        final int indexLen = getRegisterIndexSize();
-        int size = size();
-
-        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
-        byte scheme;
-        //byte type;
-        if (register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
-            scheme = 0; //map
-        } else {
-            scheme = 1; // array
-        }
-        out.put(scheme);
-        if (scheme == 0) { // map scheme
-            BytesUtil.writeVInt(size, out);
-            if (register instanceof SparseRegister) { //sparse\u3000register
-                Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
-                for (Map.Entry<Integer, Byte> entry : allValue) {
-                    writeUnsigned(entry.getKey(), indexLen, out);
-                    out.put(entry.getValue());
-                }
-            } else { //dense register
-                byte[] registers = ((DenseRegister) register).getRawRegister();
-                for (int i = 0; i < m; i++) {
-                    if (registers[i] > 0) {
-                        writeUnsigned(i, indexLen, out);
-                        out.put(registers[i]);
-                    }
-                }
-            }
-        } else if (scheme == 1) { // array scheme
-            out.put(((DenseRegister) register).getRawRegister());
-        } else
-            throw new IllegalStateException();
-    }
-
-    public void readRegisters(ByteBuffer in) throws IOException {
-        byte scheme = in.get();
-        if (scheme == 0) { // map scheme
-            clear();
-            int size = BytesUtil.readVInt(in);
-            if (size > m)
-                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
-            double over = overflowFactor * m;
-            if (size > (int) over) {
-                this.register = new DenseRegister(p);
-            } else {
-                this.register = new SparseRegister((int) over);//default is sparse
-            }
-            int indexLen = getRegisterIndexSize();
-            int key = 0;
-            for (int i = 0; i < size; i++) {
-                key = readUnsigned(in, indexLen);
-                register.set(key, in.get());
-            }
-        } else if (scheme == 1) { // array scheme
-            this.register = new DenseRegister(p);
-            for (int i = 0; i < m; i++) {
-                register.set(i, in.get());
-            }
-        } else
-            throw new IllegalStateException();
-    }
-
-    public int peekLength(ByteBuffer in) {
-        int mark = in.position();
-        int len;
-        byte scheme = in.get();
-        if (scheme == 0) { // map scheme
-            int size = BytesUtil.readVInt(in);
-            int indexLen = getRegisterIndexSize();
-            len = in.position() - mark + (indexLen + 1) * size;
-        } else {
-            len = in.position() - mark + m;
-        }
-
-        in.position(mark);
-        return len;
-    }
-
-    public int maxLength() {
-        return 1 + m;
-    }
-
-    private int getRegisterIndexSize() {
-        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
-    }
-
-    @Override
-    public int hashCode() {
-        final int prime = 31;
-        int result = 1;
-        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
-        result = prime * result + p;
-        result = prime * result + register.getHashCode();
-        return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (this == obj)
-            return true;
-        if (obj == null)
-            return false;
-        if (getClass() != obj.getClass())
-            return false;
-        HyperLogLogPlusCounterNew other = (HyperLogLogPlusCounterNew) obj;
-        if (hashFunc == null) {
-            if (other.hashFunc != null)
-                return false;
-        } else if (!hashFunc.equals(other.hashFunc))
-            return false;
-        if (p != other.p)
-            return false;
-        if (this.getRegisterType() != other.getRegisterType())
-            return false;
-        if (register.getHashCode() != other.register.getHashCode())
-            return false;
-        return true;
-    }
-
-    @Override
-    public int compareTo(HyperLogLogPlusCounterNew o) {
-        if (o == null)
-            return 1;
-
-        long e1 = this.getCountEstimate();
-        long e2 = o.getCountEstimate();
-
-        if (e1 == e2)
-            return 0;
-        else if (e1 > e2)
-            return 1;
-        else
-            return -1;
-    }
-
-    /**
-     *
-     * @param num
-     * @param size
-     * @param out
-     */
-    public static void writeUnsigned(int num, int size, ByteBuffer out) {
-        for (int i = 0; i < size; i++) {
-            out.put((byte) num);
-            num >>>= 8;
-        }
-    }
-
-    public static int readUnsigned(ByteBuffer in, int size) {
-        int integer = 0;
-        int mask = 0xff;
-        int shift = 0;
-        for (int i = 0; i < size; i++) {
-            integer |= (in.get() << shift) & mask;
-            mask = mask << 8;
-            shift += 8;
-        }
-        return integer;
-    }
-
-    private int size() {
-        return register.getSize();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
deleted file mode 100644
index cb5533e..0000000
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounterOld.java
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hllc;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.Arrays;
-
-import org.apache.kylin.common.util.BytesUtil;
-
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-
-/**
- * About compression, test on HLLC data shows
- * 
- * - LZF compression ratio is around 65%-80%, fast
- * - GZIP compression ratio is around 41%-46%, very slow
- * 
- * @author yangli9
- */
-@SuppressWarnings("serial")
-public class HyperLogLogPlusCounterOld implements Serializable, Comparable<HyperLogLogPlusCounterOld> {
-
-    private final int p;
-    private final int m;
-    private final HashFunction hashFunc;
-    byte[] registers;
-    int singleBucket;
-
-    public HyperLogLogPlusCounterOld() {
-        this(10);
-    }
-
-    public HyperLogLogPlusCounterOld(int p) {
-        this(p, Hashing.murmur3_128());
-    }
-
-    public HyperLogLogPlusCounterOld(HyperLogLogPlusCounterOld another) {
-        this(another.p, another.hashFunc);
-        merge(another);
-    }
-
-    /** The larger p is, the more storage (2^p bytes), the better accuracy */
-    private HyperLogLogPlusCounterOld(int p, HashFunction hashFunc) {
-        this.p = p;
-        this.m = 1 << p;//(int) Math.pow(2, p);
-        this.hashFunc = hashFunc;
-        this.registers = new byte[m];
-        this.singleBucket = -1;
-    }
-
-    public void clear() {
-        byte zero = (byte) 0;
-        if (singleBucket == -1) {
-            //nothing
-        } else if (singleBucket >= 0) {
-            registers[singleBucket] = 0;
-        } else {
-            Arrays.fill(registers, zero);
-        }
-        singleBucket = -1;
-    }
-
-    public void add(int value) {
-        add(hashFunc.hashInt(value).asLong());
-    }
-
-    public void add(String value) {
-        add(hashFunc.hashString(value, Charset.defaultCharset()).asLong());
-    }
-
-    public void add(byte[] value) {
-        add(hashFunc.hashBytes(value).asLong());
-    }
-
-    public void add(byte[] value, int offset, int length) {
-        add(hashFunc.hashBytes(value, offset, length).asLong());
-    }
-
-    protected void add(long hash) {
-        int bucketMask = m - 1;
-        int bucket = (int) (hash & bucketMask);
-        int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
-
-        if (firstOnePos > registers[bucket])
-            registers[bucket] = (byte) firstOnePos;
-
-        if (singleBucket == -1)
-            singleBucket = bucket;
-        else
-            singleBucket = Integer.MIN_VALUE;
-    }
-
-    public void merge(HyperLogLogPlusCounterOld another) {
-        assert this.p == another.p;
-        assert this.hashFunc == another.hashFunc;
-
-        // quick path for single value HLLC
-        if (another.singleBucket == -1) {
-            return;
-        } else if (another.singleBucket >= 0) {
-            int b = another.singleBucket;
-            if (registers[b] < another.registers[b])
-                registers[b] = another.registers[b];
-        } else {
-            // normal path
-            for (int i = 0; i < m; i++) {
-                if (registers[i] < another.registers[i])
-                    registers[i] = another.registers[i];
-            }
-        }
-        singleBucket = Integer.MIN_VALUE;
-    }
-
-    public long getCountEstimate() {
-        return new HLLCSnapshot(this).getCountEstimate();
-    }
-
-    public int getPrecision() {
-        return this.p;
-    }
-
-    public double getErrorRate() {
-        return 1.04 / Math.sqrt(m);
-    }
-
-    private int size() {
-        if (singleBucket == -1) {
-            return 0;
-        } else if (singleBucket >= 0) {
-            return 1;
-        } else {
-            int size = 0;
-            for (int i = 0; i < m; i++) {
-                if (registers[i] > 0)
-                    size++;
-            }
-            return size;
-        }
-    }
-
-    @Override
-    public String toString() {
-        return "" + getCountEstimate();
-    }
-
-    // ============================================================================
-
-    // a memory efficient snapshot of HLL registers which can yield count
-    // estimate later
-    public static class HLLCSnapshot {
-        byte p;
-        double registerSum;
-        int zeroBuckets;
-
-        public HLLCSnapshot(HyperLogLogPlusCounterOld hllc) {
-            p = (byte) hllc.p;
-            registerSum = 0;
-            zeroBuckets = 0;
-
-            byte[] registers = hllc.registers;
-            for (int i = 0; i < hllc.m; i++) {
-                if (registers[i] == 0) {
-                    registerSum++;
-                    zeroBuckets++;
-                } else {
-                    registerSum += 1.0 / (1L << registers[i]);
-                }
-            }
-        }
-
-        public long getCountEstimate() {
-            int m = 1 << p;
-            double alpha = 0.7213 / (1 + 1.079 / m);
-            double estimate = alpha * m * m / registerSum;
-
-            // small cardinality adjustment
-            if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)
-                estimate = m * Math.log(m * 1.0 / zeroBuckets);
-            } else if (HyperLogLogPlusTable.isBiasCorrection(m, estimate)) {
-                estimate = HyperLogLogPlusTable.biasCorrection(p, estimate);
-            }
-
-            return Math.round(estimate);
-        }
-    }
-
-    // ============================================================================
-
-    public void writeRegisters(final ByteBuffer out) throws IOException {
-
-        final int indexLen = getRegisterIndexSize();
-        int size = size();
-
-        // decide output scheme -- map (3*size bytes) or array (2^p bytes)
-        byte scheme;
-        if (5 + (indexLen + 1) * size < m) // 5 is max len of vint
-            scheme = 0; // map
-        else
-            scheme = 1; // array
-        out.put(scheme);
-
-        if (scheme == 0) { // map scheme
-            BytesUtil.writeVInt(size, out);
-            if (singleBucket == -1) {
-                // no non-zero register
-            } else if (singleBucket >= 0) {
-                writeUnsigned(singleBucket, indexLen, out);
-                out.put(registers[singleBucket]);
-            } else {
-                for (int i = 0; i < m; i++) {
-                    if (registers[i] > 0) {
-                        writeUnsigned(i, indexLen, out);
-                        out.put(registers[i]);
-                    }
-                }
-            }
-        } else if (scheme == 1) { // array scheme
-            out.put(registers);
-        } else
-            throw new IllegalStateException();
-    }
-
-    public void readRegisters(ByteBuffer in) throws IOException {
-        byte scheme = in.get();
-
-        if (scheme == 0) { // map scheme
-            clear();
-            int size = BytesUtil.readVInt(in);
-            if (size > m)
-                throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
-            int indexLen = getRegisterIndexSize();
-            int key = 0;
-            for (int i = 0; i < size; i++) {
-                key = readUnsigned(in, indexLen);
-                registers[key] = in.get();
-            }
-
-            if (size == 0)
-                singleBucket = -1;
-            else if (size == 1)
-                singleBucket = key;
-            else
-                singleBucket = Integer.MIN_VALUE;
-
-        } else if (scheme == 1) { // array scheme
-            in.get(registers);
-            singleBucket = Integer.MIN_VALUE;
-        } else
-            throw new IllegalStateException();
-    }
-
-    public int peekLength(ByteBuffer in) {
-        int mark = in.position();
-        int len;
-
-        byte scheme = in.get();
-        if (scheme == 0) { // map scheme
-            int size = BytesUtil.readVInt(in);
-            int indexLen = getRegisterIndexSize();
-            len = in.position() - mark + (indexLen + 1) * size;
-        } else {
-            len = in.position() - mark + m;
-        }
-
-        in.position(mark);
-        return len;
-    }
-
-    public int maxLength() {
-        return 1 + m;
-    }
-
-    /*public void writeRegistersArray(final ByteBuffer out) {
-        out.put(this.registers);
-    }
-
-    public void readRegistersArray(ByteBuffer in) {
-        in.get(registers, 0, m);
-        singleBucket = Integer.MIN_VALUE;
-    }*/
-
-    private int getRegisterIndexSize() {
-        return (p - 1) / 8 + 1; // 2 when p=16, 3 when p=17
-    }
-
-    @Override
-    public int hashCode() {
-        final int prime = 31;
-        int result = 1;
-        result = prime * result + ((hashFunc == null) ? 0 : hashFunc.hashCode());
-        result = prime * result + p;
-        result = prime * result + Arrays.hashCode(registers);
-        return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (this == obj)
-            return true;
-        if (obj == null)
-            return false;
-        if (getClass() != obj.getClass())
-            return false;
-        HyperLogLogPlusCounterOld other = (HyperLogLogPlusCounterOld) obj;
-        if (hashFunc == null) {
-            if (other.hashFunc != null)
-                return false;
-        } else if (!hashFunc.equals(other.hashFunc))
-            return false;
-        if (p != other.p)
-            return false;
-        if (!Arrays.equals(registers, other.registers))
-            return false;
-        return true;
-    }
-
-    @Override
-    public int compareTo(HyperLogLogPlusCounterOld o) {
-        if (o == null)
-            return 1;
-
-        long e1 = this.getCountEstimate();
-        long e2 = o.getCountEstimate();
-
-        if (e1 == e2)
-            return 0;
-        else if (e1 > e2)
-            return 1;
-        else
-            return -1;
-    }
-
-    public static void main(String[] args) throws IOException {
-        dumpErrorRates();
-    }
-
-    static void dumpErrorRates() {
-        for (int p = 10; p <= 18; p++) {
-            double rate = new HyperLogLogPlusCounterOld(p).getErrorRate();
-            double er = Math.round(rate * 10000) / 100D;
-            double er2 = Math.round(rate * 2 * 10000) / 100D;
-            double er3 = Math.round(rate * 3 * 10000) / 100D;
-            long size = Math.round(Math.pow(2, p));
-            System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
-        }
-    }
-
-    /**
-     *
-     * @param num
-     * @param size
-     * @param out
-     */
-    public static void writeUnsigned(int num, int size, ByteBuffer out) {
-        for (int i = 0; i < size; i++) {
-            out.put((byte) num);
-            num >>>= 8;
-        }
-    }
-
-    public static int readUnsigned(ByteBuffer in, int size) {
-        int integer = 0;
-        int mask = 0xff;
-        int shift = 0;
-        for (int i = 0; i < size; i++) {
-            integer |= (in.get() << shift) & mask;
-            mask = mask << 8;
-            shift += 8;
-        }
-        return integer;
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
index 79c4bba..a6ef94f 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
@@ -24,7 +24,7 @@ public interface Register {
 
     void set(int pos, byte value);
 
-    Byte get(int pos);
+    byte get(int pos);
 
     void merge(Register another);
 
@@ -32,6 +32,4 @@ public interface Register {
 
     int getSize();
 
-    int getHashCode();
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
index d241e81..d6bb024 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -27,12 +27,9 @@ import java.util.TreeMap;
  */
 public class SparseRegister implements Register {
 
-    private int overThreshold;
-
     private Map<Integer, Byte> sparseRegister = new TreeMap<>();
 
-    public SparseRegister(int overThreshold) {
-        this.overThreshold = overThreshold;
+    public SparseRegister() {
     }
 
     public DenseRegister toDense(int p) {
@@ -49,8 +46,9 @@ public class SparseRegister implements Register {
     }
 
     @Override
-    public Byte get(int pos) {
-        return sparseRegister.get(pos);
+    public byte get(int pos) {
+        Byte b = sparseRegister.get(pos);
+        return b == null ? 0 : b;
     }
 
     @Override
@@ -58,8 +56,8 @@ public class SparseRegister implements Register {
         assert another instanceof SparseRegister;
         SparseRegister sr = (SparseRegister) another;
         for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
-            Byte v = sparseRegister.get(entry.getKey());
-            if (v == null || entry.getValue() > v)
+            byte v = get(entry.getKey());
+            if (entry.getValue() > v)
                 sparseRegister.put(entry.getKey(), entry.getValue());
         }
     }
@@ -75,20 +73,28 @@ public class SparseRegister implements Register {
     }
 
     @Override
-    public int getHashCode() {
+    public int hashCode() {
         final int prime = 31;
         int result = 1;
-        for (Map.Entry<Integer, Byte> entry : sparseRegister.entrySet()) {
-            result = prime * result + entry.getKey();
-            result = prime * result + entry.getValue();
-        }
+        result = prime * result + ((sparseRegister == null) ? 0 : sparseRegister.hashCode());
         return result;
     }
 
-    public boolean isOverThreshold() {
-        if (this.sparseRegister.size() > overThreshold)
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
             return true;
-        return false;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        SparseRegister other = (SparseRegister) obj;
+        if (sparseRegister == null) {
+            if (other.sparseRegister != null)
+                return false;
+        } else if (!sparseRegister.equals(other.sparseRegister))
+            return false;
+        return true;
     }
 
     public Collection<Map.Entry<Integer, Byte>> getAllValue() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
index 103e721..0f22610 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/AggregatorMemEstimateTest.java
@@ -26,7 +26,7 @@ import org.apache.kylin.measure.bitmap.BitmapAggregator;
 import org.apache.kylin.measure.bitmap.BitmapCounter;
 import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType;
 import org.apache.kylin.measure.hllc.HLLCAggregator;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.datatype.DoubleMutable;
 import org.apache.kylin.metadata.datatype.LongMutable;
@@ -94,7 +94,7 @@ public class AggregatorMemEstimateTest extends LocalFileMetadataTestCase {
     @Test
     public void testAggregatorEstimate() {
         HLLCAggregator hllcAggregator = new HLLCAggregator(14);
-        hllcAggregator.aggregate(new HyperLogLogPlusCounterNew(14));
+        hllcAggregator.aggregate(new HLLCounter(14));
 
         BitmapAggregator bitmapAggregator = new BitmapAggregator();
         BitmapCounter bitmapCounter = new BitmapCounter();