You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/22 06:31:17 UTC

[1/2] kylin git commit: KYLIN-1832 single value register

Repository: kylin
Updated Branches:
  refs/heads/master c3bb878fa -> 78a591798


KYLIN-1832 single value register

Signed-off-by: Li Yang <li...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/a39d0786
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/a39d0786
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/a39d0786

Branch: refs/heads/master
Commit: a39d0786008d2a65fdb19440949477673516b69c
Parents: c3bb878
Author: xiefan46 <95...@qq.com>
Authored: Tue Dec 20 12:59:24 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Dec 22 13:45:30 2016 +0800

----------------------------------------------------------------------
 .../kylin/measure/hllc/DenseRegister.java       |  14 ++-
 .../apache/kylin/measure/hllc/HLLCounter.java   | 102 +++++++++++-----
 .../org/apache/kylin/measure/hllc/Register.java |   2 +
 .../apache/kylin/measure/hllc/RegisterType.java |   2 +-
 .../kylin/measure/hllc/SingleValueRegister.java | 118 +++++++++++++++++++
 .../kylin/measure/hllc/SparseRegister.java      |  26 +++-
 .../kylin/measure/hllc/HLLCounterTest.java      |  64 ++++++++--
 .../hllc/NewHyperLogLogBenchmarkTest.java       |  27 +++--
 8 files changed, 301 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
index c5814aa..0dea535 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -46,19 +46,24 @@ public class DenseRegister implements Register {
 
     @Override
     public void merge(Register another) {
-        if (another instanceof DenseRegister) {
+        if (another.getRegisterType() == RegisterType.DENSE) {
             DenseRegister dr = (DenseRegister) another;
             for (int i = 0; i < register.length; i++) {
                 if (dr.register[i] > register[i])
                     register[i] = dr.register[i];
             }
-        } else {
+        } else if(another.getRegisterType() == RegisterType.SPARSE){
             SparseRegister sr = (SparseRegister) another;
             Collection<Map.Entry<Integer, Byte>> allValue = sr.getAllValue();
             for (Map.Entry<Integer, Byte> entry : allValue) {
                 if (entry.getValue() > register[entry.getKey()])
                     register[entry.getKey()] = entry.getValue();
             }
+        }else{
+            SingleValueRegister sr = (SingleValueRegister)another;
+            if(sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]){
+                register[sr.getSingleValuePos()] = sr.getValue();
+            }
         }
     }
 
@@ -79,6 +84,11 @@ public class DenseRegister implements Register {
     }
 
     @Override
+    public RegisterType getRegisterType() {
+        return RegisterType.DENSE;
+    }
+
+    @Override
     public int hashCode() {
         final int prime = 31;
         int result = 1;

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
index 22b5e55..6325651 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
@@ -44,19 +44,19 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
     private Register register;
 
     public HLLCounter() {
-        this(10, RegisterType.SPARSE, Hashing.murmur3_128());
+        this(10, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
     }
 
     public HLLCounter(int p) {
-        this(p, RegisterType.SPARSE, Hashing.murmur3_128());
+        this(p, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
     }
 
     public HLLCounter(int p, HashFunction hashFunc) {
-        this(p, RegisterType.SPARSE, hashFunc);
+        this(p, RegisterType.SINGLE_VALUE, hashFunc);
     }
 
     public HLLCounter(HLLCounter another) {
-        this(another.p, another.hashFunc);
+        this(another.p, another.getRegisterType(), another.hashFunc);
         merge(another);
     }
 
@@ -68,7 +68,10 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
         this.p = p;
         this.m = 1 << p;//(int) Math.pow(2, p);
         this.hashFunc = hashFunc;
-        if (type == RegisterType.SPARSE) {
+
+        if (type == RegisterType.SINGLE_VALUE) {
+            this.register = new SingleValueRegister();
+        } else if (type == RegisterType.SPARSE) {
             this.register = new SparseRegister();
         } else {
             this.register = new DenseRegister(p);
@@ -79,7 +82,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
         double over = OVERFLOW_FACTOR * m;
         return size > (int) over;
     }
-    
+
     public void add(int value) {
         add(hashFunc.hashInt(value).asLong());
     }
@@ -100,11 +103,26 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
         int bucketMask = m - 1;
         int bucket = (int) (hash & bucketMask);
         int firstOnePos = Long.numberOfLeadingZeros(hash | bucketMask) + 1;
-        Byte b = register.get(bucket);
-        if (b == null || (byte) firstOnePos > b) {
-            register.set(bucket, (byte) firstOnePos);
+        if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+            SingleValueRegister sr = (SingleValueRegister) register;
+            int pos = sr.getSingleValuePos();
+            if (pos < 0 || pos == bucket) { //one or zero value
+                setIfBigger(register, bucket, (byte) firstOnePos);
+            } else { //two value
+                this.register = sr.toSparse();
+                setIfBigger(register, bucket, (byte) firstOnePos);
+            }
+        } else {
+            setIfBigger(register, bucket, (byte) firstOnePos);
+            toDenseIfNeeded();
+        }
+    }
+
+    private void setIfBigger(Register register, int pos, byte value) {
+        Byte b = register.get(pos);
+        if (b == null || value > b) {
+            register.set(pos, value);
         }
-        toDenseIfNeeded();
     }
 
     private void toDenseIfNeeded() {
@@ -118,15 +136,38 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
     public void merge(HLLCounter another) {
         assert this.p == another.p;
         assert this.hashFunc == another.hashFunc;
-        if (register instanceof SparseRegister && another.register instanceof SparseRegister) {
-            register.merge(another.register);
-            toDenseIfNeeded();
-        } else if (register instanceof SparseRegister && another.register instanceof DenseRegister) {
-            register = ((SparseRegister) register).toDense(p);
-            register.merge(another.register);
-        } else {
-            register.merge(another.register);
+        switch (register.getRegisterType()) {
+        case SINGLE_VALUE:
+            switch (another.getRegisterType()) {
+            case SINGLE_VALUE:
+                if (register.getSize() > 0 && another.register.getSize() > 0) {
+                    register = ((SingleValueRegister) register).toSparse();
+                } else if (register.getSize() == 0 && another.register.getSize() > 0) {
+                    SingleValueRegister sr = (SingleValueRegister) another.register;
+                    register.set(sr.getSingleValuePos(), sr.getValue());
+                }
+                break;
+            case SPARSE:
+                register = ((SingleValueRegister) register).toSparse();
+                break;
+            case DENSE:
+                register = ((SingleValueRegister) register).toDense(this.p);
+                break;
+            default:
+                break;
+            }
+
+            break;
+        case SPARSE:
+            if (another.getRegisterType() == RegisterType.DENSE) {
+                register = ((SparseRegister) register).toDense(p);
+            }
+            break;
+        default:
+            break;
         }
+        register.merge(another.register);
+        toDenseIfNeeded();
     }
 
     public long getCountEstimate() {
@@ -160,7 +201,9 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
             zeroBuckets = 0;
             Register register = hllc.getRegister();
             DenseRegister dr;
-            if (register instanceof SparseRegister) {
+            if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+                dr = ((SingleValueRegister) register).toDense(p);
+            } else if (register.getRegisterType() == RegisterType.SPARSE) {
                 dr = ((SparseRegister) register).toDense(p);
             } else {
                 dr = (DenseRegister) register;
@@ -224,7 +267,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
 
         // decide output scheme -- map (3*size bytes) or array (2^p bytes)
         byte scheme;
-        if (register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+        if (register instanceof SingleValueRegister || register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
             scheme = 0; // map
         } else {
             scheme = 1; // array
@@ -232,12 +275,18 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
         out.put(scheme);
         if (scheme == 0) { // map scheme
             BytesUtil.writeVInt(size, out);
-            if (register instanceof SparseRegister) { //sparse register
+            if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
                 Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
                 for (Map.Entry<Integer, Byte> entry : allValue) {
                     writeUnsigned(entry.getKey(), indexLen, out);
                     out.put(entry.getValue());
                 }
+            } else if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+                if (size > 0) {
+                    SingleValueRegister sr = (SingleValueRegister) register;
+                    writeUnsigned(sr.getSingleValuePos(), indexLen, out);
+                    out.put(sr.getValue());
+                }
             } else { //dense register
                 byte[] registers = ((DenseRegister) register).getRawRegister();
                 for (int i = 0; i < m; i++) {
@@ -262,8 +311,10 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
                 throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
             if (isDense(size)) {
                 register = new DenseRegister(p);
+            } else if (size <= 1) {
+                register = new SingleValueRegister();
             } else {
-                register = new SparseRegister();//default is sparse
+                register = new SparseRegister();
             }
             int indexLen = getRegisterIndexSize();
             int key = 0;
@@ -272,7 +323,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
                 register.set(key, in.get());
             }
         } else if (scheme == 1) { // array scheme
-            if (register instanceof SparseRegister) {
+            if (register.getRegisterType() != RegisterType.DENSE) {
                 register = new DenseRegister(p);
             }
             in.get(((DenseRegister) register).getRawRegister());
@@ -368,10 +419,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
     }
 
     public RegisterType getRegisterType() {
-        if (register instanceof SparseRegister)
-            return RegisterType.SPARSE;
-        else
-            return RegisterType.DENSE;
+        return register.getRegisterType();
     }
 
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
index a6ef94f..6d8086d 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/Register.java
@@ -32,4 +32,6 @@ public interface Register {
 
     int getSize();
 
+    RegisterType getRegisterType();
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
index fec9939..7288bca 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/RegisterType.java
@@ -21,5 +21,5 @@ package org.apache.kylin.measure.hllc;
  * Created by xiefan on 16-12-9.
  */
 public enum RegisterType {
-    SPARSE, DENSE
+    SINGLE_VALUE, SPARSE, DENSE
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
new file mode 100644
index 0000000..5b2f7c8
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.measure.hllc;
+
+/**
+ * Created by xiefan on 16-12-20.
+ */
+public class SingleValueRegister implements Register {
+
+    private int singleValuePos;
+
+    private byte value;
+
+    public SingleValueRegister() {
+        this.singleValuePos = -1;
+    }
+
+    @Override
+    public void set(int pos, byte value) {
+        this.singleValuePos = pos;
+        this.value = value;
+    }
+
+    @Override
+    public byte get(int pos) {
+        if (pos != this.singleValuePos)
+            return 0;
+        return value;
+    }
+
+    /*
+    this method should not be used in single value register
+     */
+    @Deprecated
+    @Override
+    public void merge(Register another) {
+        return;
+    }
+
+    @Override
+    public void clear() {
+        this.singleValuePos = -1;
+    }
+
+    @Override
+    public int getSize() {
+        if (this.singleValuePos >= 0)
+            return 1;
+        return 0;
+    }
+
+    @Override
+    public RegisterType getRegisterType() {
+        return RegisterType.SINGLE_VALUE;
+    }
+
+    public int getSingleValuePos() {
+        return singleValuePos;
+    }
+
+    public byte getValue() {
+        return value;
+    }
+
+    public SparseRegister toSparse() {
+        SparseRegister sr = new SparseRegister();
+        if (singleValuePos >= 0)
+            sr.set(singleValuePos, value);
+        return sr;
+    }
+
+    public DenseRegister toDense(int p) {
+        DenseRegister dr = new DenseRegister(p);
+        if (singleValuePos >= 0) {
+            dr.set(singleValuePos, value);
+        }
+        return dr;
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + singleValuePos;
+        result = prime * result + value;
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        SingleValueRegister other = (SingleValueRegister) obj;
+        if (this.singleValuePos != other.singleValuePos || this.value != other.value) {
+            return false;
+        }
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
index d6bb024..bb550e1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -53,12 +53,21 @@ public class SparseRegister implements Register {
 
     @Override
     public void merge(Register another) {
-        assert another instanceof SparseRegister;
-        SparseRegister sr = (SparseRegister) another;
-        for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
-            byte v = get(entry.getKey());
-            if (entry.getValue() > v)
-                sparseRegister.put(entry.getKey(), entry.getValue());
+        assert another.getRegisterType() != RegisterType.DENSE;
+        if(another.getRegisterType() == RegisterType.SPARSE) {
+            SparseRegister sr = (SparseRegister) another;
+            for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
+                byte v = get(entry.getKey());
+                if (entry.getValue() > v)
+                    sparseRegister.put(entry.getKey(), entry.getValue());
+            }
+        }else if(another.getRegisterType() == RegisterType.SINGLE_VALUE){
+            SingleValueRegister sr = (SingleValueRegister)another;
+            if(sr.getSize() > 0){
+                byte v = get(sr.getSingleValuePos());
+                if (sr.getValue() > v)
+                    sparseRegister.put(sr.getSingleValuePos(), sr.getValue());
+            }
         }
     }
 
@@ -73,6 +82,11 @@ public class SparseRegister implements Register {
     }
 
     @Override
+    public RegisterType getRegisterType() {
+        return RegisterType.SPARSE;
+    }
+
+    @Override
     public int hashCode() {
         final int prime = 31;
         int result = 1;

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
index cd75eac..1b603a7 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -52,10 +52,12 @@ public class HLLCounterTest {
         HLLCounter one = new HLLCounter(14);
         for (int i = 0; i < 1000000; i++) {
             one.clear();
-            one.add(rand1.nextInt());
+            one.add(i);
+            //System.out.println(hllc.getCountEstimate());
             hllc.merge(one);
         }
         System.out.println(hllc.getCountEstimate());
+        System.out.println(hllc.getRegister().getRegisterType());
         assertTrue(hllc.getCountEstimate() > 1000000 * 0.9);
     }
 
@@ -102,15 +104,28 @@ public class HLLCounterTest {
     public void compareResult() throws IOException {
         int p = 12; //4096
         int m = 1 << p;
-        
+
         ByteBuffer buf = ByteBuffer.allocate(1024 * 1024);
-    
+
         for (int t = 0; t < 5; t++) {
-            //compare sparse
+            //compare single
             HLLCounterOld oldCounter = new HLLCounterOld(p);
             HLLCounter newCounter = new HLLCounter(p);
             HLLCounter newCounter2 = new HLLCounter(p);
+            newCounter.add(1);
+            oldCounter.add(1);
+            assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+            assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
+            buf.clear();
+            oldCounter.writeRegisters(buf);
+            buf.flip();
+            newCounter2.readRegisters(buf);
+            assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
 
+            //compare sparse
+            oldCounter.clear();
+            newCounter.clear();
+            newCounter2.clear();
             for (int i = 0; i < 20; i++) {
                 int r = rand1.nextInt();
                 oldCounter.add(r);
@@ -118,13 +133,13 @@ public class HLLCounterTest {
             }
             assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
             assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-            
+
             buf.clear();
             oldCounter.writeRegisters(buf);
             buf.flip();
             newCounter2.readRegisters(buf);
             assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
-            
+
             //compare dense
             for (int i = 0; i < m / 2; i++) {
                 int r = rand1.nextInt();
@@ -133,7 +148,7 @@ public class HLLCounterTest {
             }
             assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
             assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-            
+
             buf.clear();
             oldCounter.writeRegisters(buf);
             buf.flip();
@@ -167,13 +182,32 @@ public class HLLCounterTest {
 
     @Test
     public void testEquivalence() {
-        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
-        byte[] b = new byte[] { 3, 4, 42 };
+        //test single
         HLLCounter ha = new HLLCounter();
         HLLCounter hb = new HLLCounter();
+        ha.add(1);
+        hb.add(1);
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+        //test sparse
+        ha = new HLLCounter();
+        hb = new HLLCounter();
+        byte[] a = new byte[] { 0, 3, 4, 42, 2, 2 };
+        byte[] b = new byte[] { 3, 4, 42 };
         ha.add(a, 1, 3);
         hb.add(b);
-
+        Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
+        //test dense
+        int p = 10;
+        ha = new HLLCounter(p);
+        hb = new HLLCounter(p);
+        int m = 1 << p;
+        double over = HLLCounter.OVERFLOW_FACTOR * m;
+        int overFlow = (int) over + 1000;
+        for (int i = 0; i < overFlow; i++){
+            int k = rand1.nextInt();
+            ha.add(k);
+            hb.add(k);
+        }
         Assert.assertTrue(ha.getCountEstimate() == hb.getCountEstimate());
     }
 
@@ -182,6 +216,10 @@ public class HLLCounterTest {
         int p = 15;
         int m = 1 << p;
         HLLCounter counter = new HLLCounter(p);
+        assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+        counter.add(1);
+        assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+        counter.add(2);
         assertEquals(RegisterType.SPARSE, counter.getRegisterType());
         double over = HLLCounter.OVERFLOW_FACTOR * m;
         int overFlow = (int) over + 1000;
@@ -192,11 +230,15 @@ public class HLLCounterTest {
 
     @Test
     public void testSerialilze() throws Exception {
-        //test sparse serialize
+        //test single serialize
         int p = 15;
         int m = 1 << p;
         HLLCounter counter = new HLLCounter(p);
         counter.add(123);
+        assertEquals(RegisterType.SINGLE_VALUE, counter.getRegisterType());
+        checkSerialize(counter);
+        //test sparse serialize
+        counter.add(124);
         assertEquals(RegisterType.SPARSE, counter.getRegisterType());
         checkSerialize(counter);
         //test dense serialize

http://git-wip-us.apache.org/repos/asf/kylin/blob/a39d0786/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
index 5de2a3a..26f45d6 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -27,6 +27,7 @@ import java.nio.ByteBuffer;
 import java.util.Random;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Created by xiefan on 16-12-12.
@@ -37,7 +38,7 @@ public class NewHyperLogLogBenchmarkTest {
 
     public static final Random rand = new Random(1);
 
-    final int testTimes = 10000;
+    final int testTimes = 100000;
 
     @Test
     public void denseToDenseRegisterMergeBenchmark() throws Exception {
@@ -47,7 +48,7 @@ public class NewHyperLogLogBenchmarkTest {
         System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
         double oldFactor = HLLCounter.OVERFLOW_FACTOR;
         HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
-        for (int cardinality : getTestDataDivide(m)) {
+        for (int cardinality : new int[]{m/10,m/5,m/2,m}) {
             final HLLCounterOld oldCounter = new HLLCounterOld(p);
             final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
             long oldTime = runTestCase(new TestCase() {
@@ -100,7 +101,7 @@ public class NewHyperLogLogBenchmarkTest {
                     }
                 }
             });
-            final HLLCounter newCounter = new HLLCounter(p);
+            final HLLCounter newCounter = new HLLCounter(p,RegisterType.SPARSE);
             final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
             long newTime = runTestCase(new TestCase() {
                 @Override
@@ -111,7 +112,11 @@ public class NewHyperLogLogBenchmarkTest {
                 }
             });
             assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
-            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            if(cardinality == 1){
+                assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
+            }else{
+                assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+            }
             System.out.println("----------------------------");
             System.out.println("cardinality : " + cardinality);
             System.out.println("old time : " + oldTime);
@@ -151,7 +156,11 @@ public class NewHyperLogLogBenchmarkTest {
                 }
             });
             assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
+            if(cardinality == 1){
+                assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
+            }else{
+                assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+            }
             System.out.println("old time : " + oldTime);
             System.out.println("new time : " + newTime);
         }
@@ -200,7 +209,11 @@ public class NewHyperLogLogBenchmarkTest {
                     System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
                 }
             });
-            assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
+            if(cardinality == 1){
+                assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+            }else{
+                assertEquals(RegisterType.SPARSE,newCounter.getRegisterType());
+            }
             System.out.println("old serialize time : " + oldTime);
             System.out.println("new serialize time : " + newTime);
         }
@@ -288,6 +301,6 @@ public class NewHyperLogLogBenchmarkTest {
     }
 
     public static int[] getTestDataDivide(int m) {
-        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10, m };
+        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10};
     }
 }


[2/2] kylin git commit: KYLIN-1832 code review

Posted by li...@apache.org.
KYLIN-1832 code review


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/78a59179
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/78a59179
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/78a59179

Branch: refs/heads/master
Commit: 78a59179825012daa94a117babbf0c3aea15ec89
Parents: a39d078
Author: Li Yang <li...@apache.org>
Authored: Thu Dec 22 14:31:07 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Dec 22 14:31:07 2016 +0800

----------------------------------------------------------------------
 .../kylin/measure/hllc/DenseRegister.java       |  8 ++--
 .../apache/kylin/measure/hllc/HLLCounter.java   | 22 ++++++-----
 .../kylin/measure/hllc/SingleValueRegister.java |  7 +---
 .../kylin/measure/hllc/SparseRegister.java      |  8 ++--
 .../kylin/measure/hllc/HLLCounterTest.java      | 15 ++++----
 .../hllc/NewHyperLogLogBenchmarkTest.java       | 40 +++++++++-----------
 6 files changed, 48 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
index 0dea535..5b929b2 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/DenseRegister.java
@@ -52,16 +52,16 @@ public class DenseRegister implements Register {
                 if (dr.register[i] > register[i])
                     register[i] = dr.register[i];
             }
-        } else if(another.getRegisterType() == RegisterType.SPARSE){
+        } else if (another.getRegisterType() == RegisterType.SPARSE) {
             SparseRegister sr = (SparseRegister) another;
             Collection<Map.Entry<Integer, Byte>> allValue = sr.getAllValue();
             for (Map.Entry<Integer, Byte> entry : allValue) {
                 if (entry.getValue() > register[entry.getKey()])
                     register[entry.getKey()] = entry.getValue();
             }
-        }else{
-            SingleValueRegister sr = (SingleValueRegister)another;
-            if(sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]){
+        } else {
+            SingleValueRegister sr = (SingleValueRegister) another;
+            if (sr.getSize() > 0 && sr.getValue() > register[sr.getSingleValuePos()]) {
                 register[sr.getSingleValuePos()] = sr.getValue();
             }
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
index 6325651..21f3a0e 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HLLCounter.java
@@ -119,8 +119,8 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
     }
 
     private void setIfBigger(Register register, int pos, byte value) {
-        Byte b = register.get(pos);
-        if (b == null || value > b) {
+        byte b = register.get(pos);
+        if (value > b) {
             register.set(pos, value);
         }
     }
@@ -145,6 +145,7 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
                 } else if (register.getSize() == 0 && another.register.getSize() > 0) {
                     SingleValueRegister sr = (SingleValueRegister) another.register;
                     register.set(sr.getSingleValuePos(), sr.getValue());
+                    return;
                 }
                 break;
             case SPARSE:
@@ -267,7 +268,8 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
 
         // decide output scheme -- map (3*size bytes) or array (2^p bytes)
         byte scheme;
-        if (register instanceof SingleValueRegister || register instanceof SparseRegister || 5 + (indexLen + 1) * size < m) {
+        if (register instanceof SingleValueRegister || register instanceof SparseRegister //
+                || 5 + (indexLen + 1) * size < m) {
             scheme = 0; // map
         } else {
             scheme = 1; // array
@@ -275,18 +277,18 @@ public class HLLCounter implements Serializable, Comparable<HLLCounter> {
         out.put(scheme);
         if (scheme == 0) { // map scheme
             BytesUtil.writeVInt(size, out);
-            if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
-                Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
-                for (Map.Entry<Integer, Byte> entry : allValue) {
-                    writeUnsigned(entry.getKey(), indexLen, out);
-                    out.put(entry.getValue());
-                }
-            } else if (register.getRegisterType() == RegisterType.SINGLE_VALUE) {
+            if (register.getRegisterType() == RegisterType.SINGLE_VALUE) { //single value register
                 if (size > 0) {
                     SingleValueRegister sr = (SingleValueRegister) register;
                     writeUnsigned(sr.getSingleValuePos(), indexLen, out);
                     out.put(sr.getValue());
                 }
+            } else if (register.getRegisterType() == RegisterType.SPARSE) { //sparse register
+                Collection<Map.Entry<Integer, Byte>> allValue = ((SparseRegister) register).getAllValue();
+                for (Map.Entry<Integer, Byte> entry : allValue) {
+                    writeUnsigned(entry.getKey(), indexLen, out);
+                    out.put(entry.getValue());
+                }
             } else { //dense register
                 byte[] registers = ((DenseRegister) register).getRawRegister();
                 for (int i = 0; i < m; i++) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
index 5b2f7c8..7f612e2 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SingleValueRegister.java
@@ -32,6 +32,7 @@ public class SingleValueRegister implements Register {
 
     @Override
     public void set(int pos, byte value) {
+        assert this.singleValuePos < 0 || this.singleValuePos == pos;
         this.singleValuePos = pos;
         this.value = value;
     }
@@ -43,13 +44,9 @@ public class SingleValueRegister implements Register {
         return value;
     }
 
-    /*
-    this method should not be used in single value register
-     */
-    @Deprecated
     @Override
     public void merge(Register another) {
-        return;
+        throw new IllegalStateException();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
index bb550e1..dd7d7c8 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/SparseRegister.java
@@ -54,16 +54,16 @@ public class SparseRegister implements Register {
     @Override
     public void merge(Register another) {
         assert another.getRegisterType() != RegisterType.DENSE;
-        if(another.getRegisterType() == RegisterType.SPARSE) {
+        if (another.getRegisterType() == RegisterType.SPARSE) {
             SparseRegister sr = (SparseRegister) another;
             for (Map.Entry<Integer, Byte> entry : sr.sparseRegister.entrySet()) {
                 byte v = get(entry.getKey());
                 if (entry.getValue() > v)
                     sparseRegister.put(entry.getKey(), entry.getValue());
             }
-        }else if(another.getRegisterType() == RegisterType.SINGLE_VALUE){
-            SingleValueRegister sr = (SingleValueRegister)another;
-            if(sr.getSize() > 0){
+        } else if (another.getRegisterType() == RegisterType.SINGLE_VALUE) {
+            SingleValueRegister sr = (SingleValueRegister) another;
+            if (sr.getSize() > 0) {
                 byte v = get(sr.getSingleValuePos());
                 if (sr.getValue() > v)
                     sparseRegister.put(sr.getSingleValuePos(), sr.getValue());

http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
index 1b603a7..4a95bd4 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/HLLCounterTest.java
@@ -53,8 +53,8 @@ public class HLLCounterTest {
         for (int i = 0; i < 1000000; i++) {
             one.clear();
             one.add(i);
-            //System.out.println(hllc.getCountEstimate());
             hllc.merge(one);
+            assertTrue(one.getRegisterType() == RegisterType.SINGLE_VALUE);
         }
         System.out.println(hllc.getCountEstimate());
         System.out.println(hllc.getRegister().getRegisterType());
@@ -112,9 +112,10 @@ public class HLLCounterTest {
             HLLCounterOld oldCounter = new HLLCounterOld(p);
             HLLCounter newCounter = new HLLCounter(p);
             HLLCounter newCounter2 = new HLLCounter(p);
-            newCounter.add(1);
-            oldCounter.add(1);
-            assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
+            int rr = rand1.nextInt();
+            newCounter.add(rr);
+            oldCounter.add(rr);
+            assertEquals(RegisterType.SINGLE_VALUE, newCounter.getRegisterType());
             assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
             buf.clear();
             oldCounter.writeRegisters(buf);
@@ -133,7 +134,6 @@ public class HLLCounterTest {
             }
             assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
             assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
             buf.clear();
             oldCounter.writeRegisters(buf);
             buf.flip();
@@ -141,6 +141,8 @@ public class HLLCounterTest {
             assertEquals(oldCounter.getCountEstimate(), newCounter2.getCountEstimate());
 
             //compare dense
+            oldCounter.clear();
+            newCounter.clear();
             for (int i = 0; i < m / 2; i++) {
                 int r = rand1.nextInt();
                 oldCounter.add(r);
@@ -148,7 +150,6 @@ public class HLLCounterTest {
             }
             assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
             assertEquals(oldCounter.getCountEstimate(), newCounter.getCountEstimate());
-
             buf.clear();
             oldCounter.writeRegisters(buf);
             buf.flip();
@@ -203,7 +204,7 @@ public class HLLCounterTest {
         int m = 1 << p;
         double over = HLLCounter.OVERFLOW_FACTOR * m;
         int overFlow = (int) over + 1000;
-        for (int i = 0; i < overFlow; i++){
+        for (int i = 0; i < overFlow; i++) {
             int k = rand1.nextInt();
             ha.add(k);
             hb.add(k);

http://git-wip-us.apache.org/repos/asf/kylin/blob/78a59179/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
index 26f45d6..ee82f9b 100644
--- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java
@@ -17,17 +17,13 @@
 */
 package org.apache.kylin.measure.hllc;
 
-import org.apache.kylin.measure.hllc.HLLCounterOld;
-import org.apache.kylin.measure.hllc.HLLCounter;
-import org.apache.kylin.measure.hllc.RegisterType;
-import org.junit.Ignore;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
 
 import java.nio.ByteBuffer;
 import java.util.Random;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import org.junit.Ignore;
+import org.junit.Test;
 
 /**
  * Created by xiefan on 16-12-12.
@@ -48,7 +44,7 @@ public class NewHyperLogLogBenchmarkTest {
         System.out.println("denseToDenseRegisterMergeBenchmark(), m : " + m);
         double oldFactor = HLLCounter.OVERFLOW_FACTOR;
         HLLCounter.OVERFLOW_FACTOR = 1.1; //keep sparse
-        for (int cardinality : new int[]{m/10,m/5,m/2,m}) {
+        for (int cardinality : new int[] { m / 10, m / 5, m / 2, m }) {
             final HLLCounterOld oldCounter = new HLLCounterOld(p);
             final HLLCounterOld oldCounter2 = getRandOldCounter(p, cardinality);
             long oldTime = runTestCase(new TestCase() {
@@ -101,7 +97,7 @@ public class NewHyperLogLogBenchmarkTest {
                     }
                 }
             });
-            final HLLCounter newCounter = new HLLCounter(p,RegisterType.SPARSE);
+            final HLLCounter newCounter = new HLLCounter(p, RegisterType.SPARSE);
             final HLLCounter newCounter2 = getRandNewCounter(p, cardinality);
             long newTime = runTestCase(new TestCase() {
                 @Override
@@ -112,10 +108,10 @@ public class NewHyperLogLogBenchmarkTest {
                 }
             });
             assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
-            if(cardinality == 1){
-                assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
-            }else{
-                assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+            if (cardinality == 1) {
+                assertEquals(RegisterType.SINGLE_VALUE, newCounter2.getRegisterType());
+            } else {
+                assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
             }
             System.out.println("----------------------------");
             System.out.println("cardinality : " + cardinality);
@@ -156,10 +152,10 @@ public class NewHyperLogLogBenchmarkTest {
                 }
             });
             assertEquals(RegisterType.DENSE, newCounter.getRegisterType());
-            if(cardinality == 1){
-                assertEquals(RegisterType.SINGLE_VALUE,newCounter2.getRegisterType());
-            }else{
-                assertEquals(RegisterType.SPARSE,newCounter2.getRegisterType());
+            if (cardinality == 1) {
+                assertEquals(RegisterType.SINGLE_VALUE, newCounter2.getRegisterType());
+            } else {
+                assertEquals(RegisterType.SPARSE, newCounter2.getRegisterType());
             }
             System.out.println("old time : " + oldTime);
             System.out.println("new time : " + newTime);
@@ -209,10 +205,10 @@ public class NewHyperLogLogBenchmarkTest {
                     System.out.println("new serialize bytes : " + totalBytes / testTimes + "B");
                 }
             });
-            if(cardinality == 1){
-                assertEquals(RegisterType.SINGLE_VALUE,newCounter.getRegisterType());
-            }else{
-                assertEquals(RegisterType.SPARSE,newCounter.getRegisterType());
+            if (cardinality == 1) {
+                assertEquals(RegisterType.SINGLE_VALUE, newCounter.getRegisterType());
+            } else {
+                assertEquals(RegisterType.SPARSE, newCounter.getRegisterType());
             }
             System.out.println("old serialize time : " + oldTime);
             System.out.println("new serialize time : " + newTime);
@@ -301,6 +297,6 @@ public class NewHyperLogLogBenchmarkTest {
     }
 
     public static int[] getTestDataDivide(int m) {
-        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10};
+        return new int[] { 1, 5, 10, 100, m / 200, m / 100, m / 50, m / 20, m / 10 };
     }
 }