You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/14 07:30:50 UTC
[4/5] kylin git commit: KYLIN-1832 code review
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
deleted file mode 100644
index 5d17fea..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll/HyperLogLogCounterOldTest.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.measure.hll;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * @author yangli9
- *
- */
-public class HyperLogLogCounterOldTest {
-
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- Random rand1 = new Random(1);
- Random rand2 = new Random(2);
- Random rand3 = new Random(3);
- int errorCount1 = 0;
- int errorCount2 = 0;
- int errorCount3 = 0;
-
- @Test
- public void testOneAdd() throws IOException {
- HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(14);
- HyperLogLogPlusCounterOld one = new HyperLogLogPlusCounterOld(14);
- for (int i = 0; i < 1000000; i++) {
- one.clear();
- one.add(rand1.nextInt());
- hllc.merge(one);
- }
- assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
- }
-
- @Test
- public void testPeekLength() throws IOException {
- HyperLogLogPlusCounterOld hllc = new HyperLogLogPlusCounterOld(10);
- HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(10);
- byte[] value = new byte[10];
- for (int i = 0; i < 200000; i++) {
- rand1.nextBytes(value);
- hllc.add(value);
-
- buf.clear();
- hllc.writeRegisters(buf);
-
- int len = buf.position();
- buf.position(0);
- assertEquals(len, hllc.peekLength(buf));
-
- copy.readRegisters(buf);
- assertEquals(len, buf.position());
- assertEquals(hllc, copy);
- }
- buf.clear();
- }
-
- private Set<String> generateTestData(int n) {
- Set<String> testData = new HashSet<String>();
- for (int i = 0; i < n; i++) {
- String[] samples = generateSampleData();
- for (String sample : samples) {
- testData.add(sample);
- }
- }
- return testData;
- }
-
- // simulate the visit (=visitor+id)
- private String[] generateSampleData() {
-
- StringBuilder buf = new StringBuilder();
- for (int i = 0; i < 19; i++) {
- buf.append(Math.abs(rand1.nextInt()) % 10);
- }
- String header = buf.toString();
-
- int size = Math.abs(rand3.nextInt()) % 9 + 1;
- String[] samples = new String[size];
- for (int k = 0; k < size; k++) {
- buf = new StringBuilder(header);
- buf.append("-");
- for (int i = 0; i < 10; i++) {
- buf.append(Math.abs(rand3.nextInt()) % 10);
- }
- samples[k] = buf.toString();
- }
-
- return samples;
- }
-
- @Test
- public void countTest() throws IOException {
- int n = 10;
- for (int i = 0; i < 5; i++) {
- count(n);
- n *= 10;
- }
- }
-
- private void count(int n) throws IOException {
- Set<String> testSet = generateTestData(n);
-
- HyperLogLogPlusCounterOld hllc = newHLLC();
- for (String testData : testSet) {
- hllc.add(Bytes.toBytes(testData));
- }
- long estimate = hllc.getCountEstimate();
- double errorRate = hllc.getErrorRate();
- double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
- System.out.println(estimate);
- System.out.println(testSet.size());
- System.out.println(errorRate);
- System.out.println("=" + actualError);
- Assert.assertTrue(actualError < errorRate * 3.0);
-
- checkSerialize(hllc);
- }
-
- private void checkSerialize(HyperLogLogPlusCounterOld hllc) throws IOException {
- long estimate = hllc.getCountEstimate();
- buf.clear();
- hllc.writeRegisters(buf);
- buf.flip();
- hllc.readRegisters(buf);
- Assert.assertEquals(estimate, hllc.getCountEstimate());
- }
-
- @Test
- public void mergeTest() throws IOException {
- double error = 0;
- int n = 100;
- for (int i = 0; i < n; i++) {
- double e = merge(i);
- error += e;
- }
- System.out.println("Total average error is " + error / n);
-
- System.out.println(" errorRateCount1 is " + errorCount1 + "!");
- System.out.println(" errorRateCount2 is " + errorCount2 + "!");
- System.out.println(" errorRateCount3 is " + errorCount3 + "!");
-
- Assert.assertTrue(errorCount1 <= n * 0.30);
- Assert.assertTrue(errorCount2 <= n * 0.05);
- Assert.assertTrue(errorCount3 <= n * 0.02);
- }
-
- private double merge(int round) throws IOException {
- int ln = 20;
- int dn = 100 * (round + 1);
- Set<String> testSet = new HashSet<String>();
- HyperLogLogPlusCounterOld[] hllcs = new HyperLogLogPlusCounterOld[ln];
- for (int i = 0; i < ln; i++) {
- hllcs[i] = newHLLC();
- for (int k = 0; k < dn; k++) {
- String[] samples = generateSampleData();
- for (String data : samples) {
- testSet.add(data);
- hllcs[i].add(Bytes.toBytes(data));
- }
- }
- }
- HyperLogLogPlusCounterOld mergeHllc = newHLLC();
- for (HyperLogLogPlusCounterOld hllc : hllcs) {
- mergeHllc.merge(serDes(hllc));
- }
-
- double errorRate = mergeHllc.getErrorRate();
- long estimate = mergeHllc.getCountEstimate();
- double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
-
- System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
- Assert.assertTrue(actualError < 0.1);
-
- if (actualError > errorRate) {
- errorCount1++;
- }
- if (actualError > 2 * errorRate) {
- errorCount2++;
- }
- if (actualError > 3 * errorRate) {
- errorCount3++;
- }
-
- return actualError;
- }
-
- private HyperLogLogPlusCounterOld serDes(HyperLogLogPlusCounterOld hllc) throws IOException {
- buf.clear();
- hllc.writeRegisters(buf);
- buf.flip();
- HyperLogLogPlusCounterOld copy = new HyperLogLogPlusCounterOld(hllc.getPrecision());
- copy.readRegisters(buf);
- Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
- return copy;
- }
-
- @Test
- public void testPerformance() throws IOException {
- int N = 3; // reduce N HLLC into one
- int M = 1000; // for M times, use 100000 for real perf test
-
- HyperLogLogPlusCounterOld samples[] = new HyperLogLogPlusCounterOld[N];
- for (int i = 0; i < N; i++) {
- samples[i] = newHLLC();
- for (String str : generateTestData(10000))
- samples[i].add(str);
- }
-
- System.out.println("Perf test running ... ");
- long start = System.currentTimeMillis();
- HyperLogLogPlusCounterOld sum = newHLLC();
- for (int i = 0; i < M; i++) {
- sum.clear();
- for (int j = 0; j < N; j++) {
- sum.merge(samples[j]);
- checkSerialize(sum);
- }
- }
- long duration = System.currentTimeMillis() - start;
- System.out.println("Perf test result: " + duration / 1000 + " seconds");
- }
-
- @Test
- public void testEquivalence() {
- byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
- byte[] b = new byte[] { 3, 4, 42 };
- HyperLogLogPlusCounterOld ha = new HyperLogLogPlusCounterOld();
- HyperLogLogPlusCounterOld hb = new HyperLogLogPlusCounterOld();
- ha.add(a, 1, 3);
- hb.add(b);
-
- Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
- }
-
- private HyperLogLogPlusCounterOld newHLLC() {
- return new HyperLogLogPlusCounterOld(16);
- }
-}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
deleted file mode 100644
index feb8c8e..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/HyperLogLogCounterNewTest.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-package org.apache.kylin.measure.hll2;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Created by xiefan on 16-12-12.
- */
-public class HyperLogLogCounterNewTest {
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- Random rand1 = new Random(1);
- Random rand2 = new Random(2);
- Random rand3 = new Random(3);
- int errorCount1 = 0;
- int errorCount2 = 0;
- int errorCount3 = 0;
-
- @Test
- public void testOneAdd() throws IOException {
- HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
- HyperLogLogPlusCounterNew one = new HyperLogLogPlusCounterNew(14);
- for (int i = 0; i < 1000000; i++) {
- one.clear();
- one.add(rand1.nextInt());
- hllc.merge(one);
- }
- System.out.println(hllc.getCountEstimate());
- assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
- }
-
- @Test
- public void tesSparseEstimate() throws IOException {
- HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(14);
- for (int i = 0; i < 10; i++) {
- hllc.add(i);
- }
- System.out.println(hllc.getCountEstimate());
- assertTrue(hllc.getCountEstimate() > 10 * 0.9);
- }
-
- @Test
- public void countTest() throws IOException {
- int n = 10;
- for (int i = 0; i < 5; i++) {
- count(n);
- n *= 10;
- }
- }
-
- @Test
- public void mergeTest() throws IOException {
- double error = 0;
- int n = 100;
- for (int i = 0; i < n; i++) {
- double e = merge(i);
- error += e;
- }
- System.out.println("Total average error is " + error / n);
-
- System.out.println(" errorRateCount1 is " + errorCount1 + "!");
- System.out.println(" errorRateCount2 is " + errorCount2 + "!");
- System.out.println(" errorRateCount3 is " + errorCount3 + "!");
-
- Assert.assertTrue(errorCount1 <= n * 0.30);
- Assert.assertTrue(errorCount2 <= n * 0.05);
- Assert.assertTrue(errorCount3 <= n * 0.02);
- }
-
- /*
- compare the result of two different hll counter
- */
- @Test
- public void compareResult() {
- int p = 12; //4096
- int m = 1 << p;
-
- for (int t = 0; t < 5; t++) {
- //compare sparse
- HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
- HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
-
- for (int i = 0; i < 20; i++) {
- //int r = rand1.nextInt();
- oldCounter.add(i);
- newCounter.add(i);
- }
- assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
- assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
- //compare dense
- for (int i = 0; i < m; i++) {
- oldCounter.add(i);
- newCounter.add(i);
- }
- assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
- }
-
- }
-
- @Test
- public void testPeekLength() throws IOException {
- HyperLogLogPlusCounterNew hllc = new HyperLogLogPlusCounterNew(10);
- HyperLogLogPlusCounterNew copy = new HyperLogLogPlusCounterNew(10);
- byte[] value = new byte[10];
- for (int i = 0; i < 200000; i++) {
- rand1.nextBytes(value);
- hllc.add(value);
-
- buf.clear();
- hllc.writeRegisters(buf);
-
- int len = buf.position();
- buf.position(0);
- assertEquals(len, hllc.peekLength(buf));
-
- copy.readRegisters(buf);
- assertEquals(len, buf.position());
- assertEquals(hllc, copy);
- }
- buf.clear();
- }
-
- @Test
- public void testEquivalence() {
- byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
- byte[] b = new byte[] { 3, 4, 42 };
- HyperLogLogPlusCounterNew ha = new HyperLogLogPlusCounterNew();
- HyperLogLogPlusCounterNew hb = new HyperLogLogPlusCounterNew();
- ha.add(a, 1, 3);
- hb.add(b);
-
- Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
- }
-
- @Test
- public void testAutoChangeToSparse() {
- int p = 15;
- int m = 1 << p;
- HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
- assertEquals(RegisterType.SPARSE, counter.getRegisterType());
- double over = HyperLogLogPlusCounterNew.overflowFactor * m;
- int overFlow = (int) over + 1000;
- for (int i = 0; i < overFlow; i++)
- counter.add(i);
- assertEquals(RegisterType.DENSE, counter.getRegisterType());
- }
-
- @Test
- public void testSerialilze() throws Exception {
- //test sparse serialize
- int p = 15;
- int m = 1 << p;
- HyperLogLogPlusCounterNew counter = new HyperLogLogPlusCounterNew(p);
- counter.add(123);
- assertEquals(RegisterType.SPARSE, counter.getRegisterType());
- checkSerialize(counter);
- //test dense serialize
- double over = HyperLogLogPlusCounterNew.overflowFactor * m;
- int overFlow = (int) over + 1000;
- for (int i = 0; i < overFlow; i++)
- counter.add(i);
- assertEquals(RegisterType.DENSE, counter.getRegisterType());
- checkSerialize(counter);
- }
-
- private Set<String> generateTestData(int n) {
- Set<String> testData = new HashSet<String>();
- for (int i = 0; i < n; i++) {
- String[] samples = generateSampleData();
- for (String sample : samples) {
- testData.add(sample);
- }
- }
- return testData;
- }
-
- // simulate the visit (=visitor+id)
- private String[] generateSampleData() {
-
- StringBuilder buf = new StringBuilder();
- for (int i = 0; i < 19; i++) {
- buf.append(Math.abs(rand1.nextInt()) % 10);
- }
- String header = buf.toString();
-
- int size = Math.abs(rand3.nextInt()) % 9 + 1;
- String[] samples = new String[size];
- for (int k = 0; k < size; k++) {
- buf = new StringBuilder(header);
- buf.append("-");
- for (int i = 0; i < 10; i++) {
- buf.append(Math.abs(rand3.nextInt()) % 10);
- }
- samples[k] = buf.toString();
- }
-
- return samples;
- }
-
- private double merge(int round) throws IOException {
- int ln = 20;
- int dn = 100 * (round + 1);
- Set<String> testSet = new HashSet<String>();
- HyperLogLogPlusCounterNew[] hllcs = new HyperLogLogPlusCounterNew[ln];
- for (int i = 0; i < ln; i++) {
- hllcs[i] = newHLLC();
- for (int k = 0; k < dn; k++) {
- String[] samples = generateSampleData();
- for (String data : samples) {
- testSet.add(data);
- hllcs[i].add(Bytes.toBytes(data));
- }
- }
- }
- HyperLogLogPlusCounterNew mergeHllc = newHLLC();
- for (HyperLogLogPlusCounterNew hllc : hllcs) {
- mergeHllc.merge(hllc);
- }
-
- double errorRate = mergeHllc.getErrorRate();
- long estimate = mergeHllc.getCountEstimate();
- double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
-
- System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
- Assert.assertTrue(actualError < 0.1);
-
- if (actualError > errorRate) {
- errorCount1++;
- }
- if (actualError > 2 * errorRate) {
- errorCount2++;
- }
- if (actualError > 3 * errorRate) {
- errorCount3++;
- }
-
- return actualError;
- }
-
- private HyperLogLogPlusCounterNew newHLLC() {
- return new HyperLogLogPlusCounterNew(16);
- }
-
- private void count(int n) throws IOException {
- Set<String> testSet = generateTestData(n);
-
- HyperLogLogPlusCounterNew hllc = newHLLC();
- for (String testData : testSet) {
- hllc.add(Bytes.toBytes(testData));
- }
- long estimate = hllc.getCountEstimate();
- double errorRate = hllc.getErrorRate();
- double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
- System.out.println(estimate);
- System.out.println(testSet.size());
- System.out.println(errorRate);
- System.out.println("=" + actualError);
- Assert.assertTrue(actualError < errorRate * 3.0);
-
- checkSerialize(hllc);
- }
-
- private void checkSerialize(HyperLogLogPlusCounterNew hllc) throws IOException {
- long estimate = hllc.getCountEstimate();
- buf.clear();
- hllc.writeRegisters(buf);
- buf.flip();
- hllc.readRegisters(buf);
- Assert.assertEquals(estimate, hllc.getCountEstimate());
- }
-}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
deleted file mode 100644
index bfb87f9..0000000
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hll2/NewHyperLogLogBenchmarkTest.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-package org.apache.kylin.measure.hll2;
-
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterOld;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Test;
-
-import java.nio.ByteBuffer;
-import java.util.Random;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * Created by xiefan on 16-12-12.
- */
-public class NewHyperLogLogBenchmarkTest {
-
- public static final Random rand = new Random(1);
-
- final int testTimes = 10000;
-
- @Test
- public void denseToDenseRegisterMergeBenchmark() throws Exception {
- final int p = 15;
- int m = 1 << p;
-
- System.out.println("m : " + m);
- double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
- HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
- final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
- final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
- long oldTime = runTestCase(new TestCase() {
- @Override
- public void run() {
-
- for (int i = 0; i < testTimes; i++) {
- oldCounter.merge(oldCounter2);
- }
- }
- });
- final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
- final HyperLogLogPlusCounterNew newCounter2 = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
- for (int i = 0; i < testTimes; i++)
- newCounter2.add(i);
- long newTime = runTestCase(new TestCase() {
- @Override
- public void run() {
- for (int i = 0; i < testTimes; i++) {
- newCounter.merge(newCounter2);
- }
- }
- });
- assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- assertEquals(RegisterType.DENSE, newCounter2.getRegisterType());
- System.out.println("----------------------------");
- System.out.println("cardinality : " + cardinality);
- System.out.println("old time : " + oldTime);
- System.out.println("new time : " + newTime);
- }
- HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
- }
-
- @Test
- public void sparseToSparseMergeBenchmark() throws Exception {
- final int p = 15;
- int m = 1 << p;
- System.out.println("m : " + m);
- double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
- HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
- final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
- final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
- long oldTime = runTestCase(new TestCase() {
- @Override
- public void run() {
-
- for (int i = 0; i < testTimes; i++) {
- oldCounter.merge(oldCounter2);
- }
- }
- });
- final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p);
- final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
- long newTime = runTestCase(new TestCase() {
- @Override
- public void run() {
- for (int i = 0; i < testTimes; i++) {
- newCounter.merge(newCounter2);
- }
- }
- });
- assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
- assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
- System.out.println("----------------------------");
- System.out.println("cardinality : " + cardinality);
- System.out.println("old time : " + oldTime);
- System.out.println("new time : " + newTime);
- }
- HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
- }
-
- @Test
- public void sparseToDenseRegisterMergeBenchmark() throws Exception {
- final int p = 15;
- int m = 1 << p;
- System.out.println("m : " + m);
- double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
- HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
- System.out.println("----------------------------");
- System.out.println("cardinality : " + cardinality);
- final HyperLogLogPlusCounterOld oldCounter = new HyperLogLogPlusCounterOld(p);
- final HyperLogLogPlusCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
- long oldTime = runTestCase(new TestCase() {
- @Override
- public void run() {
- for (int i = 0; i < testTimes; i++) {
- oldCounter.merge(oldCounter2);
- }
- }
- });
- final HyperLogLogPlusCounterNew newCounter = new HyperLogLogPlusCounterNew(p, RegisterType.DENSE);
- final HyperLogLogPlusCounterNew newCounter2 = getRandNewCounter(p, cardinality);
- long newTime = runTestCase(new TestCase() {
- @Override
- public void run() {
- for (int i = 0; i < testTimes; i++) {
- newCounter.merge(newCounter2);
- }
- }
- });
- assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
- System.out.println("old time : " + oldTime);
- System.out.println("new time : " + newTime);
- }
- HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
- }
-
- @Test
- public void sparseSerializeBenchmark() throws Exception {
- final int p = 15;
- int m = 1 << p;
- double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
- HyperLogLogPlusCounterNew.overflowFactor = 1.1; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
- System.out.println("----------------------------");
- System.out.println("cardinality : " + cardinality);
- final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
- long oldTime = runTestCase(new TestCase() {
- @Override
- public void run() throws Exception {
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- long totalBytes = 0;
- for (int i = 0; i < testTimes; i++) {
- buf.clear();
- oldCounter.writeRegisters(buf);
- totalBytes += buf.position();
- buf.flip();
- oldCounter.readRegisters(buf);
- }
- System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
- }
- });
- final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality);
- long newTime = runTestCase(new TestCase() {
- @Override
- public void run() throws Exception {
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- long totalBytes = 0;
- for (int i = 0; i < testTimes; i++) {
- buf.clear();
- newCounter.writeRegisters(buf);
- totalBytes += buf.position();
- buf.flip();
- newCounter.readRegisters(buf);
- }
- System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
- }
- });
- assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
- System.out.println("old serialize time : " + oldTime);
- System.out.println("new serialize time : " + newTime);
- }
- HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
- }
-
- @Test
- public void denseSerializeBenchmark() throws Exception {
- final int p = 15;
- int m = 1 << p;
- double oldFactor = HyperLogLogPlusCounterNew.overflowFactor;
- HyperLogLogPlusCounterNew.overflowFactor = 0; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
- System.out.println("----------------------------");
- System.out.println("cardinality : " + cardinality);
- final HyperLogLogPlusCounterOld oldCounter = getRandOldCounter(p, cardinality);
- long oldTime = runTestCase(new TestCase() {
- @Override
- public void run() throws Exception {
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- long totalBytes = 0;
- for (int i = 0; i < testTimes; i++) {
- buf.clear();
- oldCounter.writeRegisters(buf);
- totalBytes += buf.position();
- buf.flip();
- oldCounter.readRegisters(buf);
- }
- System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
- }
- });
- final HyperLogLogPlusCounterNew newCounter = getRandNewCounter(p, cardinality, RegisterType.DENSE);
- long newTime = runTestCase(new TestCase() {
- @Override
- public void run() throws Exception {
- ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
- long totalBytes = 0;
- for (int i = 0; i < testTimes; i++) {
- buf.clear();
- newCounter.writeRegisters(buf);
- totalBytes += buf.position();
- buf.flip();
- newCounter.readRegisters(buf);
- }
- System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
- }
- });
- assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- System.out.println("old serialize time : " + oldTime);
- System.out.println("new serialize time : " + newTime);
- }
- HyperLogLogPlusCounterNew.overflowFactor = oldFactor;
- }
-
- interface TestCase {
- void run() throws Exception;
- }
-
- public long runTestCase(TestCase testCase) throws Exception {
- long startTime = System.currentTimeMillis();
- testCase.run();
- return System.currentTimeMillis() - startTime;
- }
-
- public HyperLogLogPlusCounterOld getRandOldCounter(int p, int num) {
- HyperLogLogPlusCounterOld c = new HyperLogLogPlusCounterOld(p);
- for (int i = 0; i < num; i++)
- c.add(i);
- return c;
- }
-
- public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num) {
- HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p);
- for (int i = 0; i < num; i++)
- c.add(i);
- return c;
- }
-
- public HyperLogLogPlusCounterNew getRandNewCounter(int p, int num, RegisterType type) {
- HyperLogLogPlusCounterNew c = new HyperLogLogPlusCounterNew(p, type);
- for (int i = 0; i < num; i++)
- c.add(i);
- return c;
- }
-
- public static int[] getTestDataDivide(int m) {
- return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
- }
-}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
new file mode 100644
index 0000000..c4a97cd
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterOldTest.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.hllc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * @author yangli9
+ *
+ */
+@SuppressWarnings("deprecation")
+public class HLLCounterOldTest {
+
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ Random rand1 = new Random(1);
+ Random rand2 = new Random(2);
+ Random rand3 = new Random(3);
+ int errorCount1 = 0;
+ int errorCount2 = 0;
+ int errorCount3 = 0;
+
+ @Test
+ public void testOneAdd() throws IOException {
+ HLLCounterOld hllc = new HLLCounterOld(14);
+ HLLCounterOld one = new HLLCounterOld(14);
+ for (int i = 0; i < 1000000; i++) {
+ one.clear();
+ one.add(rand1.nextInt());
+ hllc.merge(one);
+ }
+ assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+ }
+
+ @Test
+ public void testPeekLength() throws IOException {
+ HLLCounterOld hllc = new HLLCounterOld(10);
+ HLLCounterOld copy = new HLLCounterOld(10);
+ byte[] value = new byte[10];
+ for (int i = 0; i < 200000; i++) {
+ rand1.nextBytes(value);
+ hllc.add(value);
+
+ buf.clear();
+ hllc.writeRegisters(buf);
+
+ int len = buf.position();
+ buf.position(0);
+ assertEquals(len, hllc.peekLength(buf));
+
+ copy.readRegisters(buf);
+ assertEquals(len, buf.position());
+ assertEquals(hllc, copy);
+ }
+ buf.clear();
+ }
+
+ private Set<String> generateTestData(int n) {
+ Set<String> testData = new HashSet<String>();
+ for (int i = 0; i < n; i++) {
+ String[] samples = generateSampleData();
+ for (String sample : samples) {
+ testData.add(sample);
+ }
+ }
+ return testData;
+ }
+
+ // simulate the visit (=visitor+id)
+ private String[] generateSampleData() {
+
+ StringBuilder buf = new StringBuilder();
+ for (int i = 0; i < 19; i++) {
+ buf.append(Math.abs(rand1.nextInt()) % 10);
+ }
+ String header = buf.toString();
+
+ int size = Math.abs(rand3.nextInt()) % 9 + 1;
+ String[] samples = new String[size];
+ for (int k = 0; k < size; k++) {
+ buf = new StringBuilder(header);
+ buf.append("-");
+ for (int i = 0; i < 10; i++) {
+ buf.append(Math.abs(rand3.nextInt()) % 10);
+ }
+ samples[k] = buf.toString();
+ }
+
+ return samples;
+ }
+
+ @Test
+ public void countTest() throws IOException {
+ int n = 10;
+ for (int i = 0; i < 5; i++) {
+ count(n);
+ n *= 10;
+ }
+ }
+
+ private void count(int n) throws IOException {
+ Set<String> testSet = generateTestData(n);
+
+ HLLCounterOld hllc = newHLLC();
+ for (String testData : testSet) {
+ hllc.add(Bytes.toBytes(testData));
+ }
+ long estimate = hllc.getCountEstimate();
+ double errorRate = hllc.getErrorRate();
+ double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+ System.out.println(estimate);
+ System.out.println(testSet.size());
+ System.out.println(errorRate);
+ System.out.println("=" + actualError);
+ Assert.assertTrue(actualError < errorRate * 3.0);
+
+ checkSerialize(hllc);
+ }
+
+ private void checkSerialize(HLLCounterOld hllc) throws IOException {
+ long estimate = hllc.getCountEstimate();
+ buf.clear();
+ hllc.writeRegisters(buf);
+ buf.flip();
+ hllc.readRegisters(buf);
+ Assert.assertEquals(estimate, hllc.getCountEstimate());
+ }
+
+ @Test
+ public void mergeTest() throws IOException {
+ double error = 0;
+ int n = 100;
+ for (int i = 0; i < n; i++) {
+ double e = merge(i);
+ error += e;
+ }
+ System.out.println("Total average error is " + error / n);
+
+ System.out.println(" errorRateCount1 is " + errorCount1 + "!");
+ System.out.println(" errorRateCount2 is " + errorCount2 + "!");
+ System.out.println(" errorRateCount3 is " + errorCount3 + "!");
+
+ Assert.assertTrue(errorCount1 <= n * 0.30);
+ Assert.assertTrue(errorCount2 <= n * 0.05);
+ Assert.assertTrue(errorCount3 <= n * 0.02);
+ }
+
+ private double merge(int round) throws IOException {
+ int ln = 20;
+ int dn = 100 * (round + 1);
+ Set<String> testSet = new HashSet<String>();
+ HLLCounterOld[] hllcs = new HLLCounterOld[ln];
+ for (int i = 0; i < ln; i++) {
+ hllcs[i] = newHLLC();
+ for (int k = 0; k < dn; k++) {
+ String[] samples = generateSampleData();
+ for (String data : samples) {
+ testSet.add(data);
+ hllcs[i].add(Bytes.toBytes(data));
+ }
+ }
+ }
+ HLLCounterOld mergeHllc = newHLLC();
+ for (HLLCounterOld hllc : hllcs) {
+ mergeHllc.merge(serDes(hllc));
+ }
+
+ double errorRate = mergeHllc.getErrorRate();
+ long estimate = mergeHllc.getCountEstimate();
+ double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+ System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+ Assert.assertTrue(actualError < 0.1);
+
+ if (actualError > errorRate) {
+ errorCount1++;
+ }
+ if (actualError > 2 * errorRate) {
+ errorCount2++;
+ }
+ if (actualError > 3 * errorRate) {
+ errorCount3++;
+ }
+
+ return actualError;
+ }
+
+ private HLLCounterOld serDes(HLLCounterOld hllc) throws IOException {
+ buf.clear();
+ hllc.writeRegisters(buf);
+ buf.flip();
+ HLLCounterOld copy = new HLLCounterOld(hllc.getPrecision());
+ copy.readRegisters(buf);
+ Assert.assertEquals(copy.getCountEstimate(), hllc.getCountEstimate());
+ return copy;
+ }
+
+ @Test
+ public void testPerformance() throws IOException {
+ int N = 3; // reduce N HLLC into one
+ int M = 1000; // for M times, use 100000 for real perf test
+
+ HLLCounterOld samples[] = new HLLCounterOld[N];
+ for (int i = 0; i < N; i++) {
+ samples[i] = newHLLC();
+ for (String str : generateTestData(10000))
+ samples[i].add(str);
+ }
+
+ System.out.println("Perf test running ... ");
+ long start = System.currentTimeMillis();
+ HLLCounterOld sum = newHLLC();
+ for (int i = 0; i < M; i++) {
+ sum.clear();
+ for (int j = 0; j < N; j++) {
+ sum.merge(samples[j]);
+ checkSerialize(sum);
+ }
+ }
+ long duration = System.currentTimeMillis() - start;
+ System.out.println("Perf test result: " + duration / 1000 + " seconds");
+ }
+
+ @Test
+ public void testEquivalence() {
+ byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+ byte[] b = new byte[] { 3, 4, 42 };
+ HLLCounterOld ha = new HLLCounterOld();
+ HLLCounterOld hb = new HLLCounterOld();
+ ha.add(a, 1, 3);
+ hb.add(b);
+
+ Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+ }
+
+ private HLLCounterOld newHLLC() {
+ return new HLLCounterOld(16);
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
new file mode 100644
index 0000000..26ad4a7
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+@SuppressWarnings("deprecation")
+public class HLLCounterTest {
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ Random rand1 = new Random(1);
+ Random rand2 = new Random(2);
+ Random rand3 = new Random(3);
+ int errorCount1 = 0;
+ int errorCount2 = 0;
+ int errorCount3 = 0;
+
+ @Test
+ public void testOneAdd() throws IOException {
+ HLLCounter hllc = new HLLCounter(14);
+ HLLCounter one = new HLLCounter(14);
+ for (int i = 0; i < 1000000; i++) {
+ one.clear();
+ one.add(rand1.nextInt());
+ hllc.merge(one);
+ }
+ System.out.println(hllc.getCountEstimate());
+ assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
+ }
+
+ @Test
+ public void tesSparseEstimate() throws IOException {
+ HLLCounter hllc = new HLLCounter(14);
+ for (int i = 0; i < 10; i++) {
+ hllc.add(i);
+ }
+ System.out.println(hllc.getCountEstimate());
+ assertTrue(hllc.getCountEstimate() > 10 * 0.9);
+ }
+
+ @Test
+ public void countTest() throws IOException {
+ int n = 10;
+ for (int i = 0; i < 5; i++) {
+ count(n);
+ n *= 10;
+ }
+ }
+
+ @Test
+ public void mergeTest() throws IOException {
+ double error = 0;
+ int n = 100;
+ for (int i = 0; i < n; i++) {
+ double e = merge(i);
+ error += e;
+ }
+ System.out.println("Total average error is " + error / n);
+
+ System.out.println(" errorRateCount1 is " + errorCount1 + "!");
+ System.out.println(" errorRateCount2 is " + errorCount2 + "!");
+ System.out.println(" errorRateCount3 is " + errorCount3 + "!");
+
+ Assert.assertTrue(errorCount1 <= n * 0.30);
+ Assert.assertTrue(errorCount2 <= n * 0.05);
+ Assert.assertTrue(errorCount3 <= n * 0.02);
+ }
+
+ /* compare the result of two different hll counter */
+ @Test
+ public void compareResult() throws IOException {
+ int p = 12; //4096
+ int m = 1 << p;
+
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+
+ for (int t = 0; t < 5; t++) {
+ //compare sparse
+ HLLCounterOld oldCounter = new HLLCounterOld(p);
+ HLLCounter newCounter = new HLLCounter(p);
+ HLLCounter newCounter2 = new HLLCounter(p);
+
+ for (int i = 0; i < 20; i++) {
+ int r = rand1.nextInt();
+ oldCounter.add(r);
+ newCounter.add(r);
+ }
+ assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+ assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+
+ buf.clear();
+ oldCounter.writeRegisters(buf);
+ buf.flip();
+ newCounter2.readRegisters(buf);
+ assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
+
+ //compare dense
+ for (int i = 0; i < m / 2; i++) {
+ int r = rand1.nextInt();
+ oldCounter.add(r);
+ newCounter.add(r);
+ }
+ assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+ assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+
+ buf.clear();
+ oldCounter.writeRegisters(buf);
+ buf.flip();
+ newCounter2.readRegisters(buf);
+ assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
+ }
+ }
+
+ @Test
+ public void testPeekLength() throws IOException {
+ HLLCounter hllc = new HLLCounter(10);
+ HLLCounter copy = new HLLCounter(10);
+ byte[] value = new byte[10];
+ for (int i = 0; i < 200000; i++) {
+ rand1.nextBytes(value);
+ hllc.add(value);
+
+ buf.clear();
+ hllc.writeRegisters(buf);
+
+ int len = buf.position();
+ buf.position(0);
+ assertEquals(len, hllc.peekLength(buf));
+
+ copy.readRegisters(buf);
+ assertEquals(len, buf.position());
+ assertEquals(hllc, copy);
+ }
+ buf.clear();
+ }
+
+ @Test
+ public void testEquivalence() {
+ byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+ byte[] b = new byte[] { 3, 4, 42 };
+ HLLCounter ha = new HLLCounter();
+ HLLCounter hb = new HLLCounter();
+ ha.add(a, 1, 3);
+ hb.add(b);
+
+ Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+ }
+
+ @Test
+ public void testAutoChangeToSparse() {
+ int p = 15;
+ int m = 1 << p;
+ HLLCounter counter = new HLLCounter(p);
+ assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+ double over = HLLCounter.OVERFLOW_FACTOR * m;
+ int overFlow = (int) over + 1000;
+ for (int i = 0; i < overFlow; i++)
+ counter.add(i);
+ assertEquals(RegisterType.DENSE, counter.getRegisterType());
+ }
+
+ @Test
+ public void testSerialilze() throws Exception {
+ //test sparse serialize
+ int p = 15;
+ int m = 1 << p;
+ HLLCounter counter = new HLLCounter(p);
+ counter.add(123);
+ assertEquals(RegisterType.SPARSE, counter.getRegisterType());
+ checkSerialize(counter);
+ //test dense serialize
+ double over = HLLCounter.OVERFLOW_FACTOR * m;
+ int overFlow = (int) over + 1000;
+ for (int i = 0; i < overFlow; i++)
+ counter.add(i);
+ assertEquals(RegisterType.DENSE, counter.getRegisterType());
+ checkSerialize(counter);
+ }
+
+ private Set<String> generateTestData(int n) {
+ Set<String> testData = new HashSet<String>();
+ for (int i = 0; i < n; i++) {
+ String[] samples = generateSampleData();
+ for (String sample : samples) {
+ testData.add(sample);
+ }
+ }
+ return testData;
+ }
+
+ // simulate the visit (=visitor+id)
+ private String[] generateSampleData() {
+
+ StringBuilder buf = new StringBuilder();
+ for (int i = 0; i < 19; i++) {
+ buf.append(Math.abs(rand1.nextInt()) % 10);
+ }
+ String header = buf.toString();
+
+ int size = Math.abs(rand3.nextInt()) % 9 + 1;
+ String[] samples = new String[size];
+ for (int k = 0; k < size; k++) {
+ buf = new StringBuilder(header);
+ buf.append("-");
+ for (int i = 0; i < 10; i++) {
+ buf.append(Math.abs(rand3.nextInt()) % 10);
+ }
+ samples[k] = buf.toString();
+ }
+
+ return samples;
+ }
+
+ private double merge(int round) throws IOException {
+ int ln = 20;
+ int dn = 100 * (round + 1);
+ Set<String> testSet = new HashSet<String>();
+ HLLCounter[] hllcs = new HLLCounter[ln];
+ for (int i = 0; i < ln; i++) {
+ hllcs[i] = newHLLC();
+ for (int k = 0; k < dn; k++) {
+ String[] samples = generateSampleData();
+ for (String data : samples) {
+ testSet.add(data);
+ hllcs[i].add(Bytes.toBytes(data));
+ }
+ }
+ }
+ HLLCounter mergeHllc = newHLLC();
+ for (HLLCounter hllc : hllcs) {
+ mergeHllc.merge(hllc);
+ }
+
+ double errorRate = mergeHllc.getErrorRate();
+ long estimate = mergeHllc.getCountEstimate();
+ double actualError = Math.abs((double) (testSet.size() - estimate) / testSet.size());
+
+ System.out.println(testSet.size() + "-" + estimate + " ~ " + actualError);
+ Assert.assertTrue(actualError < 0.1);
+
+ if (actualError > errorRate) {
+ errorCount1++;
+ }
+ if (actualError > 2 * errorRate) {
+ errorCount2++;
+ }
+ if (actualError > 3 * errorRate) {
+ errorCount3++;
+ }
+
+ return actualError;
+ }
+
+ private HLLCounter newHLLC() {
+ return new HLLCounter(16);
+ }
+
+ private void count(int n) throws IOException {
+ Set<String> testSet = generateTestData(n);
+
+ HLLCounter hllc = newHLLC();
+ for (String testData : testSet) {
+ hllc.add(Bytes.toBytes(testData));
+ }
+ long estimate = hllc.getCountEstimate();
+ double errorRate = hllc.getErrorRate();
+ double actualError = (double) Math.abs(testSet.size() - estimate) / testSet.size();
+ System.out.println(estimate);
+ System.out.println(testSet.size());
+ System.out.println(errorRate);
+ System.out.println("=" + actualError);
+ Assert.assertTrue(actualError < errorRate * 3.0);
+
+ checkSerialize(hllc);
+ }
+
+ private void checkSerialize(HLLCounter hllc) throws IOException {
+ long estimate = hllc.getCountEstimate();
+ buf.clear();
+ hllc.writeRegisters(buf);
+ buf.flip();
+ hllc.readRegisters(buf);
+ Assert.assertEquals(estimate, hllc.getCountEstimate());
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
new file mode 100644
index 0000000..586c007
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+import org.apache.kylin.measure.hllc.HLLCounterOld;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.measure.hllc.RegisterType;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Created by xiefan on 16-12-12.
+ */
+@SuppressWarnings("deprecation")
+public class NewHyperLogLogBenchmarkTest {
+
+ public static final Random rand = new Random(1);
+
+ final int testTimes = 10000;
+
+ @Test
+ public void denseToDenseRegisterMergeBenchmark() throws Exception {
+ final int p = 15;
+ int m = 1 << p;
+
+ System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
+ double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+ HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+ for (int cardinality : getTestDataDivide(m)) {
+ final HLLCounterOld oldCounter = new HLLCounterOld(p);
+ final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+ long oldTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+
+ for (int i = 0; i < testTimes; i++) {
+ oldCounter.merge(oldCounter2);
+ }
+ }
+ });
+ final HLLCounter newCounter = new HLLCounter(p, RegisterType.DENSE);
+ final HLLCounter newCounter2 = new HLLCounter(p, RegisterType.DENSE);
+ for (int i = 0; i < testTimes; i++)
+ newCounter2.add(i);
+ long newTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+ for (int i = 0; i < testTimes; i++) {
+ newCounter.merge(newCounter2);
+ }
+ }
+ });
+ assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+ assertEquals(RegisterType.DENSE, newCounter2.getRegisterType());
+ System.out.println("----------------------------");
+ System.out.println("cardinality : " + cardinality);
+ System.out.println("old time : " + oldTime);
+ System.out.println("new time : " + newTime);
+ }
+ HLLCounter.OVERFLOW_FACTOR = oldFactor;
+ }
+
+ @Test
+ public void sparseToSparseMergeBenchmark() throws Exception {
+ final int p = 15;
+ int m = 1 << p;
+ System.out.println("sparseToSparseMergeBenchmark(), m : " + m);
+ double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+ HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+ for (int cardinality : getTestDataDivide(m)) {
+ final HLLCounterOld oldCounter = new HLLCounterOld(p);
+ final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+ long oldTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+
+ for (int i = 0; i < testTimes; i++) {
+ oldCounter.merge(oldCounter2);
+ }
+ }
+ });
+ final HLLCounter newCounter = new HLLCounter(p);
+ final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
+ long newTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+ for (int i = 0; i < testTimes; i++) {
+ newCounter.merge(newCounter2);
+ }
+ }
+ });
+ assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+ assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+ System.out.println("----------------------------");
+ System.out.println("cardinality : " + cardinality);
+ System.out.println("old time : " + oldTime);
+ System.out.println("new time : " + newTime);
+ }
+ HLLCounter.OVERFLOW_FACTOR = oldFactor;
+ }
+
+ @Test
+ public void sparseToDenseRegisterMergeBenchmark() throws Exception {
+ final int p = 15;
+ int m = 1 << p;
+ System.out.println("sparseToDenseRegisterMergeBenchmark(), m : " + m);
+ double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+ HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+ for (int cardinality : getTestDataDivide(m)) {
+ System.out.println("----------------------------");
+ System.out.println("cardinality : " + cardinality);
+ final HLLCounterOld oldCounter = new HLLCounterOld(p);
+ final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
+ long oldTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+ for (int i = 0; i < testTimes; i++) {
+ oldCounter.merge(oldCounter2);
+ }
+ }
+ });
+ final HLLCounter newCounter = new HLLCounter(p, RegisterType.DENSE);
+ final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
+ long newTime = runTestCase(new TestCase() {
+ @Override
+ public void run() {
+ for (int i = 0; i < testTimes; i++) {
+ newCounter.merge(newCounter2);
+ }
+ }
+ });
+ assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+ assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+ System.out.println("old time : " + oldTime);
+ System.out.println("new time : " + newTime);
+ }
+ HLLCounter.OVERFLOW_FACTOR = oldFactor;
+ }
+
+ @Test
+ public void sparseSerializeBenchmark() throws Exception {
+ final int p = 15;
+ int m = 1 << p;
+ double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+ HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
+ System.out.println("sparseSerializeBenchmark()");
+ for (int cardinality : getTestDataDivide(m)) {
+ System.out.println("----------------------------");
+ System.out.println("cardinality : " + cardinality);
+ final HLLCounterOld oldCounter = getRandOldCounter(p, cardinality);
+ long oldTime = runTestCase(new TestCase() {
+ @Override
+ public void run() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ long totalBytes = 0;
+ for (int i = 0; i < testTimes; i++) {
+ buf.clear();
+ oldCounter.writeRegisters(buf);
+ totalBytes += buf.position();
+ buf.flip();
+ oldCounter.readRegisters(buf);
+ }
+ System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+ }
+ });
+ final HLLCounter newCounter = getRandNewCounter(p, cardinality);
+ long newTime = runTestCase(new TestCase() {
+ @Override
+ public void run() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ long totalBytes = 0;
+ for (int i = 0; i < testTimes; i++) {
+ buf.clear();
+ newCounter.writeRegisters(buf);
+ totalBytes += buf.position();
+ buf.flip();
+ newCounter.readRegisters(buf);
+ }
+ System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+ }
+ });
+ assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+ System.out.println("old serialize time : " + oldTime);
+ System.out.println("new serialize time : " + newTime);
+ }
+ HLLCounter.OVERFLOW_FACTOR = oldFactor;
+ }
+
+ @Test
+ public void denseSerializeBenchmark() throws Exception {
+ final int p = 15;
+ final int m = 1 << p;
+ double oldFactor = HLLCounter.OVERFLOW_FACTOR;
+ HLLCounter.OVERFLOW_FACTOR = 0; //keep sparse
+ System.out.println("denseSerializeBenchmark()");
+ for (int cardinality : getTestDataDivide(m)) {
+ System.out.println("----------------------------");
+ System.out.println("cardinality : " + cardinality);
+ final HLLCounterOld oldCounter = getRandOldCounter(p, cardinality);
+ long oldTime = runTestCase(new TestCase() {
+ @Override
+ public void run() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ long totalBytes = 0;
+ for (int i = 0; i < testTimes; i++) {
+ buf.clear();
+ oldCounter.writeRegisters(buf);
+ totalBytes += buf.position();
+ buf.flip();
+ oldCounter.readRegisters(buf);
+ }
+ System.out.println("old serialize bytes : " + totalBytes / testTimes + "B");
+ }
+ });
+ final HLLCounter newCounter = getRandNewCounter(p, cardinality, RegisterType.DENSE);
+ long newTime = runTestCase(new TestCase() {
+ @Override
+ public void run() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
+ long totalBytes = 0;
+ for (int i = 0; i < testTimes; i++) {
+ buf.clear();
+ newCounter.writeRegisters(buf);
+ totalBytes += buf.position();
+ buf.flip();
+ newCounter.readRegisters(buf);
+ }
+ System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
+ }
+ });
+ assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
+ System.out.println("old serialize time : " + oldTime);
+ System.out.println("new serialize time : " + newTime);
+ }
+ HLLCounter.OVERFLOW_FACTOR = oldFactor;
+ }
+
+ interface TestCase {
+ void run() throws Exception;
+ }
+
+ public long runTestCase(TestCase testCase) throws Exception {
+ long startTime = System.currentTimeMillis();
+ testCase.run();
+ return System.currentTimeMillis() - startTime;
+ }
+
+ public HLLCounterOld getRandOldCounter(int p, int num) {
+ HLLCounterOld c = new HLLCounterOld(p);
+ for (int i = 0; i < num; i++)
+ c.add(i);
+ return c;
+ }
+
+ public HLLCounter getRandNewCounter(int p, int num) {
+ HLLCounter c = new HLLCounter(p);
+ for (int i = 0; i < num; i++)
+ c.add(i);
+ return c;
+ }
+
+ public HLLCounter getRandNewCounter(int p, int num, RegisterType type) {
+ HLLCounter c = new HLLCounter(p, type);
+ for (int i = 0; i < num; i++)
+ c.add(i);
+ return c;
+ }
+
+ public static int[] getTestDataDivide(int m) {
+ return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index 5445491..ffba181 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -53,7 +53,7 @@ import org.apache.kylin.cube.kv.CubeDimEncMap;
import org.apache.kylin.cube.kv.RowKeyEncoder;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
@@ -76,7 +76,7 @@ public class CubeStatsReader {
final int samplingPercentage;
final int mapperNumberOfFirstBuild; // becomes meaningless after merge
final double mapperOverlapRatioOfFirstBuild; // becomes meaningless after merge
- final Map<Long, HyperLogLogPlusCounterNew> cuboidRowEstimatesHLL;
+ final Map<Long, HLLCounter> cuboidRowEstimatesHLL;
final CuboidScheduler cuboidScheduler;
public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
@@ -96,7 +96,7 @@ public class CubeStatsReader {
int percentage = 100;
int mapperNumber = 0;
double mapperOverlapRatio = 0;
- Map<Long, HyperLogLogPlusCounterNew> counterMap = Maps.newHashMap();
+ Map<Long, HLLCounter> counterMap = Maps.newHashMap();
LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
@@ -108,7 +108,7 @@ public class CubeStatsReader {
} else if (key.get() == -2) {
mapperNumber = Bytes.toInt(value.getBytes());
} else if (key.get() > 0) {
- HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConfig.getCubeStatsHLLPrecision());
+ HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
counterMap.put(key.get(), hll);
@@ -161,9 +161,9 @@ public class CubeStatsReader {
return mapperOverlapRatioOfFirstBuild;
}
- public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HyperLogLogPlusCounterNew> hllcMap, int samplingPercentage) {
+ public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HLLCounter> hllcMap, int samplingPercentage) {
Map<Long, Long> cuboidRowCountMap = Maps.newHashMap();
- for (Map.Entry<Long, HyperLogLogPlusCounterNew> entry : hllcMap.entrySet()) {
+ for (Map.Entry<Long, HLLCounter> entry : hllcMap.entrySet()) {
// No need to adjust according sampling percentage. Assumption is that data set is far
// more than cardinality. Even a percentage of the data should already see all cardinalities.
cuboidRowCountMap.put(entry.getKey(), entry.getValue().getCountEstimate());
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
index 219cdf2..8f400c3 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
@@ -33,17 +33,17 @@ import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
public class CubeStatsWriter {
public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
- Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage) throws IOException {
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage) throws IOException {
writeCuboidStatistics(conf, outputPath, cuboidHLLMap, samplingPercentage, 0, 0);
}
public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
- Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio) throws IOException {
Path seqFilePath = new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
List<Long> allCuboids = new ArrayList<Long>();
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 0d388c7..3115fe4 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -47,7 +47,7 @@ import org.apache.kylin.engine.mr.KylinReducer;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,7 +64,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
private List<TblColRef> columnList;
private String statisticsOutput = null;
private List<Long> baseCuboidRowCountInMappers;
- protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = null;
+ protected Map<Long, HLLCounter> cuboidHLLMap = null;
protected long baseCuboidId;
protected CubeDesc cubeDesc;
private long totalRowsBeforeMerge = 0;
@@ -156,7 +156,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
// for hll
long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG);
for (Text value : values) {
- HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(cubeConfig.getCubeStatsHLLPrecision());
+ HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision());
ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength());
hll.readRegisters(bf);
@@ -270,7 +270,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK
if (isStatistics) {
// output the hll info
long grandTotal = 0;
- for (HyperLogLogPlusCounterNew hll : cuboidHLLMap.values()) {
+ for (HLLCounter hll : cuboidHLLMap.values()) {
grandTotal += hll.getCountEstimate();
}
double mapperOverlapRatio = grandTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grandTotal;
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index c0575f1..5692c76 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -29,7 +29,7 @@ import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.measure.BufferedMeasureCodec;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.model.TblColRef;
import com.google.common.collect.Lists;
@@ -45,7 +45,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
protected CuboidScheduler cuboidScheduler = null;
protected int nRowKey;
private Integer[][] allCuboidsBitSet = null;
- private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
+ private HLLCounter[] allCuboidsHLL = null;
private Long[] cuboidIds;
private HashFunction hf = null;
private int rowCount = 0;
@@ -76,9 +76,9 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[cuboidIdList.size()][]);
cuboidIds = cuboidIdList.toArray(new Long[cuboidIdList.size()]);
- allCuboidsHLL = new HyperLogLogPlusCounterNew[cuboidIds.length];
+ allCuboidsHLL = new HLLCounter[cuboidIds.length];
for (int i = 0; i < cuboidIds.length; i++) {
- allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(cubeDesc.getConfig().getCubeStatsHLLPrecision());
+ allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
}
hf = Hashing.murmur3_32();
@@ -207,7 +207,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
if (collectStatistics) {
ByteBuffer hllBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
// output each cuboid's hll to reducer, key is 0 - cuboidId
- HyperLogLogPlusCounterNew hll;
+ HLLCounter hll;
for (int i = 0; i < cuboidIds.length; i++) {
hll = allCuboidsHLL[i];
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index e839989..811fc24 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -47,7 +47,7 @@ import org.apache.kylin.job.exception.ExecuteException;
import org.apache.kylin.job.execution.AbstractExecutable;
import org.apache.kylin.job.execution.ExecutableContext;
import org.apache.kylin.job.execution.ExecuteResult;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -56,7 +56,7 @@ import com.google.common.collect.Maps;
public class MergeStatisticsStep extends AbstractExecutable {
private static final Logger logger = LoggerFactory.getLogger(MergeStatisticsStep.class);
- protected Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
+ protected Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
public MergeStatisticsStep() {
super();
@@ -100,7 +100,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
// sampling percentage;
averageSamplingPercentage += Bytes.toInt(value.getBytes());
} else if (key.get() > 0) {
- HyperLogLogPlusCounterNew hll = new HyperLogLogPlusCounterNew(kylinConf.getCubeStatsHLLPrecision());
+ HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision());
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
index cae3b62..beec00f 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
@@ -24,7 +24,7 @@ import java.util.List;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.junit.Before;
import org.junit.Test;
@@ -45,7 +45,7 @@ public class CubeSamplingTest {
private Integer[][] allCuboidsBitSet;
private HashFunction hf = null;
private long baseCuboidId;
- private HyperLogLogPlusCounterNew[] allCuboidsHLL = null;
+ private HLLCounter[] allCuboidsHLL = null;
private final byte[] seperator = Bytes.toBytes(",");
@Before
@@ -61,9 +61,9 @@ public class CubeSamplingTest {
allCuboidsBitSet = allCuboidsBitSetList.toArray(new Integer[allCuboidsBitSetList.size()][]);
System.out.println("Totally have " + allCuboidsBitSet.length + " cuboids.");
- allCuboidsHLL = new HyperLogLogPlusCounterNew[allCuboids.size()];
+ allCuboidsHLL = new HLLCounter[allCuboids.size()];
for (int i = 0; i < allCuboids.size(); i++) {
- allCuboidsHLL[i] = new HyperLogLogPlusCounterNew(14);
+ allCuboidsHLL[i] = new HLLCounter(14);
}
// hf = Hashing.goodFastHash(32);
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6e330a8/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
index a00db94..f6f790e 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerTest.java
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.engine.mr.HadoopUtil;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
-import org.apache.kylin.measure.hllc.HyperLogLogPlusCounterNew;
+import org.apache.kylin.measure.hllc.HLLCounter;
import org.junit.Test;
import com.google.common.collect.Maps;
@@ -48,7 +48,7 @@ public class FactDistinctColumnsReducerTest {
}
System.out.println(outputPath);
- Map<Long, HyperLogLogPlusCounterNew> cuboidHLLMap = Maps.newHashMap();
+ Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
FileSystem.getLocal(conf).delete(outputPath, true);