You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/06/05 01:13:03 UTC

[6/7] kylin git commit: KYLIN-1379 More stable and functional precise count distinct implements after KYLIN-1186

KYLIN-1379 More stable and functional precise count distinct implements after KYLIN-1186


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5198b877
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5198b877
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5198b877

Branch: refs/heads/KYLIN-1705-CI
Commit: 5198b877e7cbedd361756dd9cd5addbef46d1ed8
Parents: 59aaeb3
Author: sunyerui <su...@gmail.com>
Authored: Sat May 28 17:50:54 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Sun Jun 5 08:25:49 2016 +0800

----------------------------------------------------------------------
 .../validation/rule/DictionaryRuleTest.java     |   8 +-
 .../kylin/dict/AppendTrieDictionaryTest.java    |  18 +
 .../kylin/measure/bitmap/BitmapCounter.java     |   5 +
 .../kylin/measure/bitmap/BitmapMeasureType.java |  82 +-
 ...t_kylin_cube_without_slr_left_join_desc.json |  22 +-
 .../flatten_data_for_without_slr_left_join.csv  | 804 +++++++++----------
 .../apache/kylin/query/ITKylinQueryTest.java    |   2 +-
 .../query/sql_distinct_precisely/query03.sql    |   1 +
 .../query/sql_distinct_precisely/query04.sql    |   1 +
 .../coprocessor/endpoint/CubeVisitService.java  |   6 +-
 10 files changed, 508 insertions(+), 441 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
index ba58d40..99ca7cf 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
@@ -71,16 +71,16 @@ public class DictionaryRuleTest extends LocalFileMetadataTestCase {
 
     @Test
     public void testBadDesc() throws IOException {
-        testBadDictionaryDesc("Column DEFAULT.TEST_KYLIN_FACT.SELLER_ID has inconsistent builders " +
+        testBadDictionaryDesc("Column EDW.TEST_SITES.SITE_NAME has inconsistent builders " +
                 "FakeBuilderClass and org.apache.kylin.dict.GlobalDictionaryBuilder",
-            DictionaryDesc.create("SELLER_ID", null, "FakeBuilderClass"));
+            DictionaryDesc.create("SITE_NAME", null, "FakeBuilderClass"));
     }
 
     @Test
     public void testBadDesc2() throws IOException {
-        testBadDictionaryDesc("Column DEFAULT.TEST_KYLIN_FACT.SELLER_ID has inconsistent builders " +
+        testBadDictionaryDesc("Column EDW.TEST_SITES.SITE_NAME has inconsistent builders " +
                         "FakeBuilderClass and org.apache.kylin.dict.GlobalDictionaryBuilder",
-                DictionaryDesc.create("lstg_site_id", "SELLER_ID", "FakeBuilderClass"));
+                DictionaryDesc.create("lstg_site_id", "SITE_NAME", "FakeBuilderClass"));
     }
 
     @Test

http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
index 10bbb77..e7603ec 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
 package org.apache.kylin.dict;
 
 import org.apache.hadoop.conf.Configuration;

http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
index 6c90275..bbf812a 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java
@@ -22,6 +22,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 import java.io.*;
 import java.nio.ByteBuffer;
+import java.util.Iterator;
 
 /**
  * Created by sunyerui on 15/12/1.
@@ -75,6 +76,10 @@ public class BitmapCounter implements Comparable<BitmapCounter> {
         return this.bitmap.getSizeInBytes();
     }
 
+    public Iterator<Integer> iterator() {
+        return bitmap.iterator();
+    }
+
     public void writeRegisters(ByteBuffer out) throws IOException {
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         DataOutputStream dos = new DataOutputStream(bos);

http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
index 1b0edbf..3b0fe29 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
@@ -18,6 +18,8 @@
 
 package org.apache.kylin.measure.bitmap;
 
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
 import org.apache.kylin.common.util.Dictionary;
 import org.apache.kylin.measure.MeasureAggregator;
 import org.apache.kylin.measure.MeasureIngester;
@@ -30,6 +32,7 @@ import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -76,11 +79,6 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
 
         if (DATATYPE_BITMAP.equals(functionDesc.getReturnDataType().getName()) == false)
             throw new IllegalArgumentException("BitmapMeasureType datatype is not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName());
-
-        List<TblColRef> colRefs = functionDesc.getParameter().getColRefs();
-        if (colRefs.size() != 1 && colRefs.size() != 2) {
-            throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters, but has " + colRefs.size());
-        }
     }
 
     @Override
@@ -95,25 +93,48 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
 
             @Override
             public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
-                List<TblColRef> literalCols = measureDesc.getFunction().getParameter().getColRefs();
-                TblColRef literalCol = null;
-                if (literalCols.size() == 1) {
-                    literalCol = literalCols.get(0);
-                } else if (literalCols.size() == 2) {
-                    literalCol = literalCols.get(1);
-                } else {
-                    throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters");
-                }
-                Dictionary<String> dictionary = dictionaryMap.get(literalCol);
                 BitmapCounter bitmap = current;
                 bitmap.clear();
-                // bitmap measure may have two values due to two parameters, only the first value should be ingested
-                if (values != null && values.length > 0 && values[0] != null) {
-                    int id = dictionary.getIdFromValue(values[0]);
-                    bitmap.add(id);
+                if (needDictionaryColumn(measureDesc.getFunction())) {
+                    TblColRef literalCol = measureDesc.getFunction().getParameter().getColRefs().get(0);
+                    Dictionary<String> dictionary = dictionaryMap.get(literalCol);
+                    if (values != null && values.length > 0 && values[0] != null) {
+                        int id = dictionary.getIdFromValue(values[0]);
+                        bitmap.add(id);
+                    }
+                } else {
+                    for (String value : values) {
+                        bitmap.add(value);
+                    }
                 }
                 return bitmap;
             }
+
+            @Override
+            public BitmapCounter reEncodeDictionary(BitmapCounter value, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> oldDicts, Map<TblColRef, Dictionary<String>> newDicts) {
+                if (!needDictionaryColumn(measureDesc.getFunction())) {
+                    return value;
+                }
+                TblColRef colRef = measureDesc.getFunction().getParameter().getColRefs().get(0);
+                Dictionary<String> sourceDict = oldDicts.get(colRef);
+                Dictionary<String> mergedDict = newDicts.get(colRef);
+
+                BitmapCounter retValue = new BitmapCounter();
+                byte[] literal = new byte[sourceDict.getSizeOfValue()];
+                Iterator<Integer> iterator = value.iterator();
+                while (iterator.hasNext()) {
+                    int id = iterator.next();
+                    int newId;
+                    int size = sourceDict.getValueBytesFromId(id, literal, 0);
+                    if (size < 0) {
+                        newId = mergedDict.nullId();
+                    } else {
+                        newId = mergedDict.getIdFromValueBytes(literal, 0, size);
+                    }
+                    retValue.add(newId);
+                }
+                return retValue;
+            }
         };
     }
 
@@ -122,21 +143,12 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
         return new BitmapAggregator();
     }
 
-    /**
-     * generate dict with first col by default, and with second col if specified
-     *
-     * Typical case: we have col uuid, and another col flag_uuid (if flag==1, uuid, null),
-     * the metrics count(distinct uuid) and count(distinct flag_uuid) should both generate dict with uuid, instead of uuid and flag_uuid
-     */
     @Override
     public List<TblColRef> getColumnsNeedDictionary(FunctionDesc functionDesc) {
-        List<TblColRef> literalCols = functionDesc.getParameter().getColRefs();
-        if (literalCols.size() == 1) {
-            return Collections.singletonList(literalCols.get(0));
-        } else if (literalCols.size() == 2) {
-            return Collections.singletonList(literalCols.get(1));
+        if (needDictionaryColumn(functionDesc)) {
+            return Collections.singletonList(functionDesc.getParameter().getColRefs().get(0));
         } else {
-            throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters");
+            return Collections.emptyList();
         }
     }
 
@@ -150,4 +162,12 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
         return BitmapDistinctCountAggFunc.class;
     }
 
+    // In order to keep compatibility with old version, tinyint/smallint/int column use value directly, without dictionary
+    private boolean needDictionaryColumn(FunctionDesc functionDesc) {
+        DataType dataType = functionDesc.getParameter().getColRefs().get(0).getType();
+        if (dataType.isIntegerFamily() && !dataType.isBigInt()) {
+            return false;
+        }
+        return true;
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/5198b877/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 3361a3d..a501bd1 100644
--- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -112,7 +112,19 @@
         "value" : "SELLER_ID",
         "next_parameter" : null
       },
-      "returntype" : "hllc(10)"
+      "returntype" : "bitmap"
+    },
+    "dependent_measure_ref" : null
+  }, {
+    "name" : "SITE_NAME_BITMAP",
+    "function" : {
+      "expression" : "COUNT_DISTINCT",
+      "parameter" : {
+        "type" : "column",
+        "value" : "SITE_NAME",
+        "next_parameter" : null
+      },
+      "returntype" : "bitmap"
     },
     "dependent_measure_ref" : null
   }, {
@@ -196,6 +208,12 @@
     },
     "dependent_measure_ref" : null
   } ],
+  "dictionaries" : [
+    {
+      "column" : "SITE_NAME",
+      "builder": "org.apache.kylin.dict.GlobalDictionaryBuilder"
+    }
+  ],
   "rowkey" : {
     "rowkey_columns" : [ {
       "column" : "cal_dt",
@@ -238,7 +256,7 @@
       "name" : "f2",
       "columns" : [ {
         "qualifier" : "m",
-        "measure_refs" : [ "seller_cnt_bitmap", "seller_format_cnt"]
+        "measure_refs" : [ "seller_cnt_bitmap", "site_name_bitmap", "seller_format_cnt"]
       } ]
     }, {
       "name" : "f3",