You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2020/02/24 09:00:25 UTC

[kylin] branch 2.6.x updated: KYLIN-4314 extend intersect_count and add intersect_value UDAF

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/2.6.x by this push:
     new ae0f31e  KYLIN-4314 extend intersect_count and add intersect_value UDAF
ae0f31e is described below

commit ae0f31e16eda8d1ebc188e8935f603080b77a54d
Author: shaofengshi <sh...@apache.org>
AuthorDate: Mon Jul 29 08:58:25 2019 +0800

    KYLIN-4314 extend intersect_count and add intersect_value UDAF
---
 .../org/apache/kylin/common/KylinConfigBase.java   |   4 +
 .../BitmapIntersectDistinctCountAggFunc.java       |  48 +--------
 ...gFunc.java => BitmapIntersectValueAggFunc.java} |  56 +---------
 .../kylin/measure/bitmap/BitmapMeasureType.java    |   7 +-
 .../measure/bitmap/RetentionPartialResult.java     | 120 +++++++++++++++++++++
 .../storage/gtrecord/GTCubeStorageQueryBase.java   |   5 +-
 .../query/sql_intersect_count/query04.sql          |  33 ++++++
 .../query/sql_intersect_count/query05.sql          |  23 ++++
 8 files changed, 194 insertions(+), 102 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 92f3cb5..3e6bd5f 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -1974,4 +1974,8 @@ abstract public class KylinConfigBase implements Serializable {
     public boolean isLimitPushDownEnabled() {
         return Boolean.parseBoolean(getOptional("kylin.storage.limit-push-down-enabled", TRUE));
     }
+
+    public String getIntersectFilterOrSeparator() {
+        return getOptional("kylin.query.intersect.separator", "|");
+    }
 }
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
index 9771352..f1968e4 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
@@ -19,9 +19,7 @@ package org.apache.kylin.measure.bitmap;
 
 import org.apache.kylin.measure.ParamAsMeasureCount;
 
-import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps
@@ -30,56 +28,12 @@ import java.util.Map;
  *          requires an bitmap count distinct measure of uuid, and an dimension of event
  */
 public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount {
-    private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
 
     @Override
     public int getParamAsMeasureCount() {
         return -2;
     }
 
-    public static class RetentionPartialResult {
-        Map<Object, BitmapCounter> map;
-        List keyList;
-
-        public RetentionPartialResult() {
-            map = new LinkedHashMap<>();
-        }
-
-        public void add(Object key, List keyList, Object value) {
-            if (this.keyList == null) {
-                this.keyList = keyList;
-            }
-            if (this.keyList != null && this.keyList.contains(key)) {
-                BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap());
-
-                counter.orWith((BitmapCounter) value);
-            }
-        }
-
-        public long result() {
-            if (keyList == null || keyList.isEmpty()) {
-                return 0;
-            }
-            // if any specified key not in map, the intersection must be 0
-            for (Object key : keyList) {
-                if (!map.containsKey(key)) {
-                    return 0;
-                }
-            }
-            BitmapCounter counter = null;
-            for (Object key : keyList) {
-                BitmapCounter c = map.get(key);
-                if (counter == null) {
-                    counter = factory.newBitmap();
-                    counter.orWith(c);
-                } else {
-                    counter.andWith(c);
-                }
-            }
-            return counter != null ? counter.getCount() : 0;
-        }
-    }
-
     public static RetentionPartialResult init() {
         return new RetentionPartialResult();
     }
@@ -94,7 +48,7 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount
     }
 
     public static long result(RetentionPartialResult result) {
-        return result.result();
+        return result.countResult();
     }
 }
 
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
similarity index 52%
copy from core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
copy to core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
index 9771352..7ec21b5 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
@@ -17,11 +17,9 @@
 */
 package org.apache.kylin.measure.bitmap;
 
-import org.apache.kylin.measure.ParamAsMeasureCount;
-
-import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
+
+import org.apache.kylin.measure.ParamAsMeasureCount;
 
 /**
  * BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps
@@ -29,57 +27,13 @@ import java.util.Map;
  * Example: intersect_count(uuid, event, array['A', 'B', 'C']), meaning find the count of uuid in all A/B/C 3 bitmaps
  *          requires an bitmap count distinct measure of uuid, and an dimension of event
  */
-public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount {
-    private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
+public class BitmapIntersectValueAggFunc implements ParamAsMeasureCount {
 
     @Override
     public int getParamAsMeasureCount() {
         return -2;
     }
 
-    public static class RetentionPartialResult {
-        Map<Object, BitmapCounter> map;
-        List keyList;
-
-        public RetentionPartialResult() {
-            map = new LinkedHashMap<>();
-        }
-
-        public void add(Object key, List keyList, Object value) {
-            if (this.keyList == null) {
-                this.keyList = keyList;
-            }
-            if (this.keyList != null && this.keyList.contains(key)) {
-                BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap());
-
-                counter.orWith((BitmapCounter) value);
-            }
-        }
-
-        public long result() {
-            if (keyList == null || keyList.isEmpty()) {
-                return 0;
-            }
-            // if any specified key not in map, the intersection must be 0
-            for (Object key : keyList) {
-                if (!map.containsKey(key)) {
-                    return 0;
-                }
-            }
-            BitmapCounter counter = null;
-            for (Object key : keyList) {
-                BitmapCounter c = map.get(key);
-                if (counter == null) {
-                    counter = factory.newBitmap();
-                    counter.orWith(c);
-                } else {
-                    counter.andWith(c);
-                }
-            }
-            return counter != null ? counter.getCount() : 0;
-        }
-    }
-
     public static RetentionPartialResult init() {
         return new RetentionPartialResult();
     }
@@ -93,8 +47,8 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount
         return add(result, value, key, keyList);
     }
 
-    public static long result(RetentionPartialResult result) {
-        return result.result();
+    public static String result(RetentionPartialResult result) {
+        return result.valueResult();
     }
 }
 
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
index f724257..9b36bef 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
@@ -46,6 +46,7 @@ import com.google.common.collect.ImmutableMap;
 public class BitmapMeasureType extends MeasureType<BitmapCounter> {
     public static final String FUNC_COUNT_DISTINCT = FunctionDesc.FUNC_COUNT_DISTINCT;
     public static final String FUNC_INTERSECT_COUNT_DISTINCT = "INTERSECT_COUNT";
+    public static final String FUNC_INTERSECT_VALUE = "INTERSECT_VALUE";
     public static final String DATATYPE_BITMAP = "bitmap";
 
     public static class Factory extends MeasureTypeFactory<BitmapCounter> {
@@ -164,7 +165,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
 
     static final Map<String, Class<?>> UDAF_MAP = ImmutableMap.of(
             FUNC_COUNT_DISTINCT, BitmapDistinctCountAggFunc.class,
-            FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class);
+            FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class,
+            FUNC_INTERSECT_VALUE, BitmapIntersectValueAggFunc.class);
 
     @Override
     public Map<String, Class<?>> getRewriteCalciteAggrFunctions() {
@@ -174,7 +176,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
     @Override
     public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) {
         for (SQLCall call : sqlDigest.aggrSqlCalls) {
-            if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function)) {
+            if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function)
+                    || FUNC_INTERSECT_VALUE.equals(call.function)) {
                 TblColRef col = (TblColRef) call.args.get(1);
                 if (!sqlDigest.groupbyColumns.contains(col))
                     sqlDigest.groupbyColumns.add(col);
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java
new file mode 100644
index 0000000..6366b3c
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kylin.measure.bitmap;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.StringUtil;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+public class RetentionPartialResult {
+
+    private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
+    public static final String FILTER_DELIMETER = KylinConfig.getInstanceFromEnv().getIntersectFilterOrSeparator();
+    Map<String, BitmapCounter> map;
+    List<String> keyList;
+    Map<String, List<String>> childKeyToParentKey;
+
+    public RetentionPartialResult() {
+        map = new LinkedHashMap<>();
+    }
+
+    public void add(Object key, List keyList, Object value) {
+        Preconditions.checkArgument(key != null);
+        Preconditions.checkArgument(keyList != null && keyList.size() >= 0);
+        if (this.keyList == null) {
+            this.keyList = Lists.transform(keyList, i -> i.toString());
+            childKeyToParentKey = new HashMap<>(5);
+
+            for (String sKey : this.keyList) {
+                String[] elements = StringUtil.splitAndTrim(sKey, FILTER_DELIMETER);
+                for (String s : elements) {
+                    if (s != null && s.trim().length() > 0) {
+                        List<String> parent = childKeyToParentKey.computeIfAbsent(s.trim(), o -> new ArrayList());
+                        parent.add(sKey);
+                    }
+                }
+            }
+
+        }
+
+        if (this.keyList != null) {
+            if (this.keyList.contains(key.toString())) {
+                BitmapCounter counter = map.computeIfAbsent(key.toString(), o -> factory.newBitmap());
+                counter.orWith((BitmapCounter) value);
+            }
+
+            if (childKeyToParentKey.size() > 0) {
+                String sKey = key.toString();
+                if (childKeyToParentKey.containsKey(sKey)) {
+                    List<String> parents = childKeyToParentKey.get(sKey);
+                    for (String parent : parents) {
+                        BitmapCounter counter = map.computeIfAbsent(parent, o -> factory.newBitmap());
+                        counter.orWith((BitmapCounter) value);
+                    }
+                }
+            }
+        }
+    }
+
+    private BitmapCounter result() {
+        if (keyList == null || keyList.isEmpty()) {
+            return null;
+        }
+        // if any specified key not in map, the intersection must be 0
+        for (String key : keyList) {
+            if (!map.containsKey(key)) {
+                return null;
+            }
+        }
+        BitmapCounter counter = null;
+        for (String key : keyList) {
+            BitmapCounter c = map.get(key);
+            if (counter == null) {
+                counter = factory.newBitmap();
+                counter.orWith(c);
+            } else {
+                counter.andWith(c);
+            }
+        }
+
+        return counter;
+    }
+
+    public String valueResult() {
+        BitmapCounter counter = result();
+        String result = "";
+        if (counter != null && counter.getCount() > 0) {
+            result = "[" + StringUtils.join(counter.iterator(), ",") + "]";
+        }
+        return result;
+    }
+
+    public long countResult() {
+        BitmapCounter counter = result();
+        return counter != null ? counter.getCount() : 0;
+    }
+
+}
\ No newline at end of file
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
index 61d5b7f..59eda73 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
@@ -598,8 +598,9 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery {
             }
         }
         for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) {
-            if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)) {
-                logger.info("exactAggregation is false because has INTERSECT_COUNT");
+            if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)
+            || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) {
+                logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE");
                 return false;
             }
         }
diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql
new file mode 100644
index 0000000..e5de89b
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql
@@ -0,0 +1,33 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select
+week_beg_dt as week,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Auction']) as b,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Others']) as c,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Others']) as ac,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction', 'Others']) as abc,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC|Auction', 'Others']) as a_or_b_and_c,
+count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers,
+count(*) as cnt
+from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT
+where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23')
+group by week_beg_dt
+
diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql
new file mode 100644
index 0000000..6de4eac
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql
@@ -0,0 +1,23 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+select
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01', '2012-01-02']) as first_and_second_day
+from test_kylin_fact
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
+