You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/22 06:31:17 UTC
[1/2] kylin git commit: KYLIN-1832 single value register
Repository: kylin
Updated Branches:
refs/heads/master c3bb878fa -> 78a591798
KYLIN-1832 single value register
Signed-off-by: Li Yang <li...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/a39d0786
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/a39d0786
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/a39d0786
Branch: refs/heads/master
Commit: a39d0786008d2a65fdb19440949477673516b69c
Parents: c3bb878
Author: xiefan46 <95...@qq.com>
Authored: Tue Dec 20 12:59:24 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Dec 22 13:45:30 2016 +0800
----------------------------------------------------------------------
.../kylin/measure/hllc/DenseRegister.java | 14 ++-
.../apache/kylin/measure/hllc/HLLCounter.java | 102 +++++++++++-----
.../org/apache/kylin/measure/hllc/Register.java | 2 +
.../apache/kylin/measure/hllc/RegisterType.java | 2 +-
.../kylin/measure/hllc/SingleValueRegister.java | 118 +++++++++++++++++++
.../kylin/measure/hllc/SparseRegister.java | 26 +++-
.../kylin/measure/hllc/HLLCounterTest.java | 64 ++++++++--
.../hllc/NewHyperLogLogBenchmarkTest.java | 27 +++--
8 files changed, 301 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
index c5814aa..0dea535 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -46,19 +46,24 @@ public class DenseRegister implements Register {
@Override
public void merge(Register another) {
- if (another instanceof DenseRegister) {
+ if (another.getRegisterType() == RegisterType.DENSE) {
DenseRegister dr = (DenseRegister) another;
for (int i = 0; i < register.length; i++) {
if (dr.register[i] > register[i])
register[i] = dr.register[i];
}
- } else {
+ } else if(another.getRegisterType() == RegisterType.SPARSE){
SparseRegister sr = (SparseRegister) another;
Collection<Map.Entry<Integer, Byte>> allValue = sr.getAllValue();
for (Map.Entry<Integer, Byte> entry : allValue) {
if (entry.getValue() > register[entry.getKey()])
register[entry.getKey()] = entry.getValue();
}
+ }else{
+ SingleValueRegister sr = (SingleValueRegister)another;
+ if(sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]){
+ register[sr.getSingleValuePos()] = sr.getValue();
+ }
}
}
@@ -79,6 +84,11 @@ public class DenseRegister implements Register {
}
@Override
+ public RegisterType getRegisterType() {
+ return RegisterType.DENSE;
+ }
+
+ @Override
public int hashCode() {
final int prime = 31;
int result = 1;
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
index 22b5e55..6325651 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
@@ -44,19 +44,19 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
private Register register;
public HLLCounter() {
- this(10, RegisterType.SPARSE, Hashing.murmur3_128());
+ this(10, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
}
public HLLCounter(int p) {
- this(p, RegisterType.SPARSE, Hashing.murmur3_128());
+ this(p, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
}
public HLLCounter(int p, HashFunction hashFunc) {
- this(p, RegisterType.SPARSE, hashFunc);
+ this(p, RegisterType.SINGLE_VALUE, hashFunc);
}
public HLLCounter(HLLCounter another) {
- this(another.p, another.hashFunc);
+ this(another.p, another.getRegisterType(), another.hashFunc);
merge(another);
}
@@ -68,7 +68,10 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
this.p = p;
this.m = 1 << p;//(int) Math.pow(2, p);
this.hashFunc = hashFunc;
- if (type == RegisterType.SPARSE) {
+
+ if (type == RegisterType.SINGLE_VALUE) {
+ this.register = new SingleValueRegister();
+ } else if (type == RegisterType.SPARSE) {
this.register = new SparseRegister();
} else {
this.register = new DenseRegister(p);
@@ -79,7 +82,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
double over = OVERFLOW_FACTOR * m;
return size > (int) over;
}
-
+
public void add(int value) {
add(hashFunc.hashInt(value).asLong());
}
@@ -100,11 +103,26 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
int bucketMask = m - 1;
int bucket = (int) (hash & bucketMask);
int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
- Byte b = register.get(bucket);
- if (b == null || (byte) firstOnePos > b) {
- register.set(bucket, (byte) firstOnePos);
+ if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+ SingleValueRegister sr = (SingleValueRegister) register;
+ int pos = sr.getSingleValuePos();
+ if (pos < 0 || pos == bucket) { //one or zero value
+ setIfBigger(register, bucket, (byte) firstOnePos);
+ } else { //two value
+ this.register = sr.toSparse();
+ setIfBigger(register, bucket, (byte) firstOnePos);
+ }
+ } else {
+ setIfBigger(register, bucket, (byte) firstOnePos);
+ toDenseIfNeeded();
+ }
+ }
+
+ private void setIfBigger(Register register, int pos, byte value) {
+ Byte b = register.get(pos);
+ if (b == null || value > b) {
+ register.set(pos, value);
}
- toDenseIfNeeded();
}
private void toDenseIfNeeded() {
@@ -118,15 +136,38 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
public void merge(HLLCounter another) {
assert this.p == another.p;
assert this.hashFunc == another.hashFunc;
- if (register instanceof SparseRegister && another.register instanceof SparseRegister) {
- register.merge(another.register);
- toDenseIfNeeded();
- } else if (register instanceof SparseRegister && another.register instanceof DenseRegister) {
- register = ((SparseRegister) register).toDense(p);
- register.merge(another.register);
- } else {
- register.merge(another.register);
+ switch (register.getRegisterType()) {
+ case SINGLE_VALUE:
+ switch (another.getRegisterType()) {
+ case SINGLE_VALUE:
+ if (register.getSize() > 0 && another.register.getSize() > 0) {
+ register = ((SingleValueRegister) register).toSparse();
+ } else if (register.getSize() == 0 && another.register.getSize() > 0) {
+ SingleValueRegister sr = (SingleValueRegister) another.register;
+ register.set(sr.getSingleValuePos(), sr.getValue());
+ }
+ break;
+ case SPARSE:
+ register = ((SingleValueRegister) register).toSparse();
+ break;
+ case DENSE:
+ register = ((SingleValueRegister) register).toDense(this.p);
+ break;
+ default:
+ break;
+ }
+
+ break;
+ case SPARSE:
+ if (another.getRegisterType() == RegisterType.DENSE) {
+ register = ((SparseRegister) register).toDense(p);
+ }
+ break;
+ default:
+ break;
}
+ register.merge(another.register);
+ toDenseIfNeeded();
}
public long getCountEstimate() {
@@ -160,7 +201,9 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
zeroBuckets = 0;
Register register = hllc.getRegister();
DenseRegister dr;
- if (register instanceof SparseRegister) {
+ if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+ dr = ((SingleValueRegister) register).toDense(p);
+ } else if (register.getRegisterType() == RegisterType.SPARSE) {
dr = ((SparseRegister) register).toDense(p);
} else {
dr = (DenseRegister) register;
@@ -224,7 +267,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
// decide output scheme -- map (3*size bytes) or array (2^p bytes)
byte scheme;
- if (register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+ if (register instanceof SingleValueRegister || register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
scheme = 0; // map
} else {
scheme = 1; // array
@@ -232,12 +275,18 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
out.put(scheme);
if (scheme == 0) { // map scheme
BytesUtil.writeVInt(size, out);
- if (register instanceof SparseRegister) { //sparse register
+ if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
for (Map.Entry<Integer, Byte> entry : allValue) {
writeUnsigned(entry.getKey(), indexLen, out);
out.put(entry.getValue());
}
+ } else if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+ if (size > 0) {
+ SingleValueRegister sr = (SingleValueRegister) register;
+ writeUnsigned(sr.getSingleValuePos(), indexLen, out);
+ out.put(sr.getValue());
+ }
} else { //dense register
byte[] registers = ((DenseRegister) register).getRawRegister();
for (int i = 0; i < m; i++) {
@@ -262,8 +311,10 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
if (isDense(size)) {
register = new DenseRegister(p);
+ } else if (size <= 1) {
+ register = new SingleValueRegister();
} else {
- register = new SparseRegister();//default is sparse
+ register = new SparseRegister();
}
int indexLen = getRegisterIndexSize();
int key = 0;
@@ -272,7 +323,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
register.set(key, in.get());
}
} else if (scheme == 1) { // array scheme
- if (register instanceof SparseRegister) {
+ if (register.getRegisterType() != RegisterType.DENSE) {
register = new DenseRegister(p);
}
in.get(((DenseRegister) register).getRawRegister());
@@ -368,10 +419,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
}
public RegisterType getRegisterType() {
- if (register instanceof SparseRegister)
- return RegisterType.SPARSE;
- else
- return RegisterType.DENSE;
+ return register.getRegisterType();
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
index a6ef94f..6d8086d 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
@@ -32,4 +32,6 @@ public interface Register {
int getSize();
+ RegisterType getRegisterType();
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
index fec9939..7288bca 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
@@ -21,5 +21,5 @@ package org.apache.kylin.measure.hllc;
* Created by xiefan on 16-12-9.
*/
public enum RegisterType {
- SPARSE, DENSE
+ SINGLE_VALUE, SPARSE, DENSE
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
new file mode 100644
index 0000000..5b2f7c8
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+/**
+ * Created by xiefan on 16-12-20.
+ */
+public class SingleValueRegister implements Register {
+
+ private int singleValuePos;
+
+ private byte value;
+
+ public SingleValueRegister() {
+ this.singleValuePos = -1;
+ }
+
+ @Override
+ public void set(int pos, byte value) {
+ this.singleValuePos = pos;
+ this.value = value;
+ }
+
+ @Override
+ public byte get(int pos) {
+ if (pos != this.singleValuePos)
+ return 0;
+ return value;
+ }
+
+ /*
+ this method should not be used in single value register
+ */
+ @Deprecated
+ @Override
+ public void merge(Register another) {
+ return;
+ }
+
+ @Override
+ public void clear() {
+ this.singleValuePos = -1;
+ }
+
+ @Override
+ public int getSize() {
+ if (this.singleValuePos >= 0)
+ return 1;
+ return 0;
+ }
+
+ @Override
+ public RegisterType getRegisterType() {
+ return RegisterType.SINGLE_VALUE;
+ }
+
+ public int getSingleValuePos() {
+ return singleValuePos;
+ }
+
+ public byte getValue() {
+ return value;
+ }
+
+ public SparseRegister toSparse() {
+ SparseRegister sr = new SparseRegister();
+ if (singleValuePos >= 0)
+ sr.set(singleValuePos, value);
+ return sr;
+ }
+
+ public DenseRegister toDense(int p) {
+ DenseRegister dr = new DenseRegister(p);
+ if (singleValuePos >= 0) {
+ dr.set(singleValuePos, value);
+ }
+ return dr;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + singleValuePos;
+ result = prime * result + value;
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SingleValueRegister other = (SingleValueRegister) obj;
+ if (this.singleValuePos != other.singleValuePos || this.value != other.value) {
+ return false;
+ }
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
index d6bb024..bb550e1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -53,12 +53,21 @@ public class SparseRegister implements Register {
@Override
public void merge(Register another) {
- assert another instanceof SparseRegister;
- SparseRegister sr = (SparseRegister) another;
- for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
- byte v = get(entry.getKey());
- if (entry.getValue() > v)
- sparseRegister.put(entry.getKey(), entry.getValue());
+ assert another.getRegisterType() != RegisterType.DENSE;
+ if(another.getRegisterType() == RegisterType.SPARSE) {
+ SparseRegister sr = (SparseRegister) another;
+ for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
+ byte v = get(entry.getKey());
+ if (entry.getValue() > v)
+ sparseRegister.put(entry.getKey(), entry.getValue());
+ }
+ }else if(another.getRegisterType() == RegisterType.SINGLE_VALUE){
+ SingleValueRegister sr = (SingleValueRegister)another;
+ if(sr.getSize() > 0){
+ byte v = get(sr.getSingleValuePos());
+ if (sr.getValue() > v)
+ sparseRegister.put(sr.getSingleValuePos(), sr.getValue());
+ }
}
}
@@ -73,6 +82,11 @@ public class SparseRegister implements Register {
}
@Override
+ public RegisterType getRegisterType() {
+ return RegisterType.SPARSE;
+ }
+
+ @Override
public int hashCode() {
final int prime = 31;
int result = 1;
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
index cd75eac..1b603a7 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -52,10 +52,12 @@ public class HLLCounterTest {
HLLCounter one = new HLLCounter(14);
for (int i = 0; i < 1000000; i++) {
one.clear();
- one.add(rand1.nextInt());
+ one.add(i);
+ //System.out.println(hllc.getCountEstimate());
hllc.merge(one);
}
System.out.println(hllc.getCountEstimate());
+ System.out.println(hllc.getRegister().getRegisterType());
assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
}
@@ -102,15 +104,28 @@ public class HLLCounterTest {
public void compareResult() throws IOException {
int p = 12; //4096
int m = 1 << p;
-
+
ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-
+
for (int t = 0; t < 5; t++) {
- //compare sparse
+ //compare single
HLLCounterOld oldCounter = new HLLCounterOld(p);
HLLCounter newCounter = new HLLCounter(p);
HLLCounter newCounter2 = new HLLCounter(p);
+ newCounter.add(1);
+ oldCounter.add(1);
+ assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+ assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+ buf.clear();
+ oldCounter.writeRegisters(buf);
+ buf.flip();
+ newCounter2.readRegisters(buf);
+ assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
+ //compare sparse
+ oldCounter.clear();
+ newCounter.clear();
+ newCounter2.clear();
for (int i = 0; i < 20; i++) {
int r = rand1.nextInt();
oldCounter.add(r);
@@ -118,13 +133,13 @@ public class HLLCounterTest {
}
assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
+
buf.clear();
oldCounter.writeRegisters(buf);
buf.flip();
newCounter2.readRegisters(buf);
assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
-
+
//compare dense
for (int i = 0; i < m / 2; i++) {
int r = rand1.nextInt();
@@ -133,7 +148,7 @@ public class HLLCounterTest {
}
assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
+
buf.clear();
oldCounter.writeRegisters(buf);
buf.flip();
@@ -167,13 +182,32 @@ public class HLLCounterTest {
@Test
public void testEquivalence() {
- byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
- byte[] b = new byte[] { 3, 4, 42 };
+ //test single
HLLCounter ha = new HLLCounter();
HLLCounter hb = new HLLCounter();
+ ha.add(1);
+ hb.add(1);
+ Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+ //test sparse
+ ha = new HLLCounter();
+ hb = new HLLCounter();
+ byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+ byte[] b = new byte[] { 3, 4, 42 };
ha.add(a, 1, 3);
hb.add(b);
-
+ Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+ //test dense
+ int p = 10;
+ ha = new HLLCounter(p);
+ hb = new HLLCounter(p);
+ int m = 1 << p;
+ double over = HLLCounter.OVERFLOW_FACTOR * m;
+ int overFlow = (int) over + 1000;
+ for (int i = 0; i < overFlow; i++){
+ int k = rand1.nextInt();
+ ha.add(k);
+ hb.add(k);
+ }
Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
}
@@ -182,6 +216,10 @@ public class HLLCounterTest {
int p = 15;
int m = 1 << p;
HLLCounter counter = new HLLCounter(p);
+ assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+ counter.add(1);
+ assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+ counter.add(2);
assertEquals(RegisterType.SPARSE, counter.getRegisterType());
double over = HLLCounter.OVERFLOW_FACTOR * m;
int overFlow = (int) over + 1000;
@@ -192,11 +230,15 @@ public class HLLCounterTest {
@Test
public void testSerialilze() throws Exception {
- //test sparse serialize
+ //test single serialize
int p = 15;
int m = 1 << p;
HLLCounter counter = new HLLCounter(p);
counter.add(123);
+ assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+ checkSerialize(counter);
+ //test sparse serialize
+ counter.add(124);
assertEquals(RegisterType.SPARSE, counter.getRegisterType());
checkSerialize(counter);
//test dense serialize
http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
index 5de2a3a..26f45d6 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -27,6 +27,7 @@ import java.nio.ByteBuffer;
import java.util.Random;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
/**
* Created by xiefan on 16-12-12.
@@ -37,7 +38,7 @@ public class NewHyperLogLogBenchmarkTest {
public static final Random rand = new Random(1);
- final int testTimes = 10000;
+ final int testTimes = 100000;
@Test
public void denseToDenseRegisterMergeBenchmark() throws Exception {
@@ -47,7 +48,7 @@ public class NewHyperLogLogBenchmarkTest {
System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
double oldFactor = HLLCounter.OVERFLOW_FACTOR;
HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
- for (int cardinality : getTestDataDivide(m)) {
+ for (int cardinality : new int[]{m/10,m/5,m/2,m}) {
final HLLCounterOld oldCounter = new HLLCounterOld(p);
final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
long oldTime = runTestCase(new TestCase() {
@@ -100,7 +101,7 @@ public class NewHyperLogLogBenchmarkTest {
}
}
});
- final HLLCounter newCounter = new HLLCounter(p);
+ final HLLCounter newCounter = new HLLCounter(p,RegisterType.SPARSE);
final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
long newTime = runTestCase(new TestCase() {
@Override
@@ -111,7 +112,11 @@ public class NewHyperLogLogBenchmarkTest {
}
});
assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
- assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+ if(cardinality == 1){
+ assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
+ }else{
+ assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+ }
System.out.println("----------------------------");
System.out.println("cardinality : " + cardinality);
System.out.println("old time : " + oldTime);
@@ -151,7 +156,11 @@ public class NewHyperLogLogBenchmarkTest {
}
});
assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+ if(cardinality == 1){
+ assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
+ }else{
+ assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+ }
System.out.println("old time : " + oldTime);
System.out.println("new time : " + newTime);
}
@@ -200,7 +209,11 @@ public class NewHyperLogLogBenchmarkTest {
System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
}
});
- assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+ if(cardinality == 1){
+ assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+ }else{
+ assertEquals(RegisterType.SPARSE,newCounter.getRegisterType());
+ }
System.out.println("old serialize time : " + oldTime);
System.out.println("new serialize time : " + newTime);
}
@@ -288,6 +301,6 @@ public class NewHyperLogLogBenchmarkTest {
}
public static int[] getTestDataDivide(int m) {
- return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
+ return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10};
}
}
[2/2] kylin git commit: KYLIN-1832 code review
Posted by li...@apache.org.
KYLIN-1832 code review
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/78a59179
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/78a59179
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/78a59179
Branch: refs/heads/master
Commit: 78a59179825012daa94a117babbf0c3aea15ec89
Parents: a39d078
Author: Li Yang <li...@apache.org>
Authored: Thu Dec 22 14:31:07 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Dec 22 14:31:07 2016 +0800
----------------------------------------------------------------------
.../kylin/measure/hllc/DenseRegister.java | 8 ++--
.../apache/kylin/measure/hllc/HLLCounter.java | 22 ++++++-----
.../kylin/measure/hllc/SingleValueRegister.java | 7 +---
.../kylin/measure/hllc/SparseRegister.java | 8 ++--
.../kylin/measure/hllc/HLLCounterTest.java | 15 ++++----
.../hllc/NewHyperLogLogBenchmarkTest.java | 40 +++++++++-----------
6 files changed, 48 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
index 0dea535..5b929b2 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -52,16 +52,16 @@ public class DenseRegister implements Register {
if (dr.register[i] > register[i])
register[i] = dr.register[i];
}
- } else if(another.getRegisterType() == RegisterType.SPARSE){
+ } else if (another.getRegisterType() == RegisterType.SPARSE) {
SparseRegister sr = (SparseRegister) another;
Collection<Map.Entry<Integer, Byte>> allValue = sr.getAllValue();
for (Map.Entry<Integer, Byte> entry : allValue) {
if (entry.getValue() > register[entry.getKey()])
register[entry.getKey()] = entry.getValue();
}
- }else{
- SingleValueRegister sr = (SingleValueRegister)another;
- if(sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]){
+ } else {
+ SingleValueRegister sr = (SingleValueRegister) another;
+ if (sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]) {
register[sr.getSingleValuePos()] = sr.getValue();
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
index 6325651..21f3a0e 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
@@ -119,8 +119,8 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
}
private void setIfBigger(Register register, int pos, byte value) {
- Byte b = register.get(pos);
- if (b == null || value > b) {
+ byte b = register.get(pos);
+ if (value > b) {
register.set(pos, value);
}
}
@@ -145,6 +145,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
} else if (register.getSize() == 0 && another.register.getSize() > 0) {
SingleValueRegister sr = (SingleValueRegister) another.register;
register.set(sr.getSingleValuePos(), sr.getValue());
+ return;
}
break;
case SPARSE:
@@ -267,7 +268,8 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
// decide output scheme -- map (3*size bytes) or array (2^p bytes)
byte scheme;
- if (register instanceof SingleValueRegister || register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+ if (register instanceof SingleValueRegister || register instanceof SparseRegister //
+ || 5 + (indexLen + 1) * size < m) {
scheme = 0; // map
} else {
scheme = 1; // array
@@ -275,18 +277,18 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
out.put(scheme);
if (scheme == 0) { // map scheme
BytesUtil.writeVInt(size, out);
- if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
- Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
- for (Map.Entry<Integer, Byte> entry : allValue) {
- writeUnsigned(entry.getKey(), indexLen, out);
- out.put(entry.getValue());
- }
- } else if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+ if (register.getRegisterType() == RegisterType.SINGLE_VALUE) { //single value register
if (size > 0) {
SingleValueRegister sr = (SingleValueRegister) register;
writeUnsigned(sr.getSingleValuePos(), indexLen, out);
out.put(sr.getValue());
}
+ } else if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
+ Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
+ for (Map.Entry<Integer, Byte> entry : allValue) {
+ writeUnsigned(entry.getKey(), indexLen, out);
+ out.put(entry.getValue());
+ }
} else { //dense register
byte[] registers = ((DenseRegister) register).getRawRegister();
for (int i = 0; i < m; i++) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
index 5b2f7c8..7f612e2 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
@@ -32,6 +32,7 @@ public class SingleValueRegister implements Register {
@Override
public void set(int pos, byte value) {
+ assert this.singleValuePos < 0 || this.singleValuePos == pos;
this.singleValuePos = pos;
this.value = value;
}
@@ -43,13 +44,9 @@ public class SingleValueRegister implements Register {
return value;
}
- /*
- this method should not be used in single value register
- */
- @Deprecated
@Override
public void merge(Register another) {
- return;
+ throw new IllegalStateException();
}
@Override
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
index bb550e1..dd7d7c8 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -54,16 +54,16 @@ public class SparseRegister implements Register {
@Override
public void merge(Register another) {
assert another.getRegisterType() != RegisterType.DENSE;
- if(another.getRegisterType() == RegisterType.SPARSE) {
+ if (another.getRegisterType() == RegisterType.SPARSE) {
SparseRegister sr = (SparseRegister) another;
for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
byte v = get(entry.getKey());
if (entry.getValue() > v)
sparseRegister.put(entry.getKey(), entry.getValue());
}
- }else if(another.getRegisterType() == RegisterType.SINGLE_VALUE){
- SingleValueRegister sr = (SingleValueRegister)another;
- if(sr.getSize() > 0){
+ } else if (another.getRegisterType() == RegisterType.SINGLE_VALUE) {
+ SingleValueRegister sr = (SingleValueRegister) another;
+ if (sr.getSize() > 0) {
byte v = get(sr.getSingleValuePos());
if (sr.getValue() > v)
sparseRegister.put(sr.getSingleValuePos(), sr.getValue());
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
index 1b603a7..4a95bd4 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -53,8 +53,8 @@ public class HLLCounterTest {
for (int i = 0; i < 1000000; i++) {
one.clear();
one.add(i);
- //System.out.println(hllc.getCountEstimate());
hllc.merge(one);
+ assertTrue(one.getRegisterType() == RegisterType.SINGLE_VALUE);
}
System.out.println(hllc.getCountEstimate());
System.out.println(hllc.getRegister().getRegisterType());
@@ -112,9 +112,10 @@ public class HLLCounterTest {
HLLCounterOld oldCounter = new HLLCounterOld(p);
HLLCounter newCounter = new HLLCounter(p);
HLLCounter newCounter2 = new HLLCounter(p);
- newCounter.add(1);
- oldCounter.add(1);
- assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+ int rr = rand1.nextInt();
+ newCounter.add(rr);
+ oldCounter.add(rr);
+ assertEquals(RegisterType.SINGLE_VALUE, newCounter.getRegisterType());
assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
buf.clear();
oldCounter.writeRegisters(buf);
@@ -133,7 +134,6 @@ public class HLLCounterTest {
}
assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
buf.clear();
oldCounter.writeRegisters(buf);
buf.flip();
@@ -141,6 +141,8 @@ public class HLLCounterTest {
assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
//compare dense
+ oldCounter.clear();
+ newCounter.clear();
for (int i = 0; i < m / 2; i++) {
int r = rand1.nextInt();
oldCounter.add(r);
@@ -148,7 +150,6 @@ public class HLLCounterTest {
}
assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
buf.clear();
oldCounter.writeRegisters(buf);
buf.flip();
@@ -203,7 +204,7 @@ public class HLLCounterTest {
int m = 1 << p;
double over = HLLCounter.OVERFLOW_FACTOR * m;
int overFlow = (int) over + 1000;
- for (int i = 0; i < overFlow; i++){
+ for (int i = 0; i < overFlow; i++) {
int k = rand1.nextInt();
ha.add(k);
hb.add(k);
http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
index 26f45d6..ee82f9b 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -17,17 +17,13 @@
*/
package org.apache.kylin.measure.hllc;
-import org.apache.kylin.measure.hllc.HLLCounterOld;
-import org.apache.kylin.measure.hllc.HLLCounter;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Ignore;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
import java.nio.ByteBuffer;
import java.util.Random;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import org.junit.Ignore;
+import org.junit.Test;
/**
* Created by xiefan on 16-12-12.
@@ -48,7 +44,7 @@ public class NewHyperLogLogBenchmarkTest {
System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
double oldFactor = HLLCounter.OVERFLOW_FACTOR;
HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
- for (int cardinality : new int[]{m/10,m/5,m/2,m}) {
+ for (int cardinality : new int[] { m / 10, m / 5, m / 2, m }) {
final HLLCounterOld oldCounter = new HLLCounterOld(p);
final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
long oldTime = runTestCase(new TestCase() {
@@ -101,7 +97,7 @@ public class NewHyperLogLogBenchmarkTest {
}
}
});
- final HLLCounter newCounter = new HLLCounter(p,RegisterType.SPARSE);
+ final HLLCounter newCounter = new HLLCounter(p, RegisterType.SPARSE);
final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
long newTime = runTestCase(new TestCase() {
@Override
@@ -112,10 +108,10 @@ public class NewHyperLogLogBenchmarkTest {
}
});
assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
- if(cardinality == 1){
- assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
- }else{
- assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+ if (cardinality == 1) {
+ assertEquals(RegisterType.SINGLE_VALUE, newCounter2.getRegisterType());
+ } else {
+ assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
}
System.out.println("----------------------------");
System.out.println("cardinality : " + cardinality);
@@ -156,10 +152,10 @@ public class NewHyperLogLogBenchmarkTest {
}
});
assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
- if(cardinality == 1){
- assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
- }else{
- assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+ if (cardinality == 1) {
+ assertEquals(RegisterType.SINGLE_VALUE, newCounter2.getRegisterType());
+ } else {
+ assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
}
System.out.println("old time : " + oldTime);
System.out.println("new time : " + newTime);
@@ -209,10 +205,10 @@ public class NewHyperLogLogBenchmarkTest {
System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
}
});
- if(cardinality == 1){
- assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
- }else{
- assertEquals(RegisterType.SPARSE,newCounter.getRegisterType());
+ if (cardinality == 1) {
+ assertEquals(RegisterType.SINGLE_VALUE, newCounter.getRegisterType());
+ } else {
+ assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
}
System.out.println("old serialize time : " + oldTime);
System.out.println("new serialize time : " + newTime);
@@ -301,6 +297,6 @@ public class NewHyperLogLogBenchmarkTest {
}
public static int[] getTestDataDivide(int m) {
- return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10};
+ return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10 };
}
}