You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2020/02/24 09:00:25 UTC
[kylin] branch 2.6.x updated: KYLIN-4314 extend intersect_count and
add intersect_value UDAF
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push:
new ae0f31e KYLIN-4314 extend intersect_count and add intersect_value UDAF
ae0f31e is described below
commit ae0f31e16eda8d1ebc188e8935f603080b77a54d
Author: shaofengshi <sh...@apache.org>
AuthorDate: Mon Jul 29 08:58:25 2019 +0800
KYLIN-4314 extend intersect_count and add intersect_value UDAF
---
.../org/apache/kylin/common/KylinConfigBase.java | 4 +
.../BitmapIntersectDistinctCountAggFunc.java | 48 +--------
...gFunc.java => BitmapIntersectValueAggFunc.java} | 56 +---------
.../kylin/measure/bitmap/BitmapMeasureType.java | 7 +-
.../measure/bitmap/RetentionPartialResult.java | 120 +++++++++++++++++++++
.../storage/gtrecord/GTCubeStorageQueryBase.java | 5 +-
.../query/sql_intersect_count/query04.sql | 33 ++++++
.../query/sql_intersect_count/query05.sql | 23 ++++
8 files changed, 194 insertions(+), 102 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 92f3cb5..3e6bd5f 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -1974,4 +1974,8 @@ abstract public class KylinConfigBase implements Serializable {
public boolean isLimitPushDownEnabled() {
return Boolean.parseBoolean(getOptional("kylin.storage.limit-push-down-enabled", TRUE));
}
+
+ public String getIntersectFilterOrSeparator() {
+ return getOptional("kylin.query.intersect.separator", "|");
+ }
}
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
index 9771352..f1968e4 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
@@ -19,9 +19,7 @@ package org.apache.kylin.measure.bitmap;
import org.apache.kylin.measure.ParamAsMeasureCount;
-import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Map;
/**
* BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps
@@ -30,56 +28,12 @@ import java.util.Map;
* requires an bitmap count distinct measure of uuid, and an dimension of event
*/
public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount {
- private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
@Override
public int getParamAsMeasureCount() {
return -2;
}
- public static class RetentionPartialResult {
- Map<Object, BitmapCounter> map;
- List keyList;
-
- public RetentionPartialResult() {
- map = new LinkedHashMap<>();
- }
-
- public void add(Object key, List keyList, Object value) {
- if (this.keyList == null) {
- this.keyList = keyList;
- }
- if (this.keyList != null && this.keyList.contains(key)) {
- BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap());
-
- counter.orWith((BitmapCounter) value);
- }
- }
-
- public long result() {
- if (keyList == null || keyList.isEmpty()) {
- return 0;
- }
- // if any specified key not in map, the intersection must be 0
- for (Object key : keyList) {
- if (!map.containsKey(key)) {
- return 0;
- }
- }
- BitmapCounter counter = null;
- for (Object key : keyList) {
- BitmapCounter c = map.get(key);
- if (counter == null) {
- counter = factory.newBitmap();
- counter.orWith(c);
- } else {
- counter.andWith(c);
- }
- }
- return counter != null ? counter.getCount() : 0;
- }
- }
-
public static RetentionPartialResult init() {
return new RetentionPartialResult();
}
@@ -94,7 +48,7 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount
}
public static long result(RetentionPartialResult result) {
- return result.result();
+ return result.countResult();
}
}
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
similarity index 52%
copy from core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
copy to core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
index 9771352..7ec21b5 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java
@@ -17,11 +17,9 @@
*/
package org.apache.kylin.measure.bitmap;
-import org.apache.kylin.measure.ParamAsMeasureCount;
-
-import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Map;
+
+import org.apache.kylin.measure.ParamAsMeasureCount;
/**
* BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps
@@ -29,57 +27,13 @@ import java.util.Map;
* Example: intersect_count(uuid, event, array['A', 'B', 'C']), meaning find the count of uuid in all A/B/C 3 bitmaps
* requires an bitmap count distinct measure of uuid, and an dimension of event
*/
-public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount {
- private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
+public class BitmapIntersectValueAggFunc implements ParamAsMeasureCount {
@Override
public int getParamAsMeasureCount() {
return -2;
}
- public static class RetentionPartialResult {
- Map<Object, BitmapCounter> map;
- List keyList;
-
- public RetentionPartialResult() {
- map = new LinkedHashMap<>();
- }
-
- public void add(Object key, List keyList, Object value) {
- if (this.keyList == null) {
- this.keyList = keyList;
- }
- if (this.keyList != null && this.keyList.contains(key)) {
- BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap());
-
- counter.orWith((BitmapCounter) value);
- }
- }
-
- public long result() {
- if (keyList == null || keyList.isEmpty()) {
- return 0;
- }
- // if any specified key not in map, the intersection must be 0
- for (Object key : keyList) {
- if (!map.containsKey(key)) {
- return 0;
- }
- }
- BitmapCounter counter = null;
- for (Object key : keyList) {
- BitmapCounter c = map.get(key);
- if (counter == null) {
- counter = factory.newBitmap();
- counter.orWith(c);
- } else {
- counter.andWith(c);
- }
- }
- return counter != null ? counter.getCount() : 0;
- }
- }
-
public static RetentionPartialResult init() {
return new RetentionPartialResult();
}
@@ -93,8 +47,8 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount
return add(result, value, key, keyList);
}
- public static long result(RetentionPartialResult result) {
- return result.result();
+ public static String result(RetentionPartialResult result) {
+ return result.valueResult();
}
}
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
index f724257..9b36bef 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
@@ -46,6 +46,7 @@ import com.google.common.collect.ImmutableMap;
public class BitmapMeasureType extends MeasureType<BitmapCounter> {
public static final String FUNC_COUNT_DISTINCT = FunctionDesc.FUNC_COUNT_DISTINCT;
public static final String FUNC_INTERSECT_COUNT_DISTINCT = "INTERSECT_COUNT";
+ public static final String FUNC_INTERSECT_VALUE = "INTERSECT_VALUE";
public static final String DATATYPE_BITMAP = "bitmap";
public static class Factory extends MeasureTypeFactory<BitmapCounter> {
@@ -164,7 +165,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
static final Map<String, Class<?>> UDAF_MAP = ImmutableMap.of(
FUNC_COUNT_DISTINCT, BitmapDistinctCountAggFunc.class,
- FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class);
+ FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class,
+ FUNC_INTERSECT_VALUE, BitmapIntersectValueAggFunc.class);
@Override
public Map<String, Class<?>> getRewriteCalciteAggrFunctions() {
@@ -174,7 +176,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> {
@Override
public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) {
for (SQLCall call : sqlDigest.aggrSqlCalls) {
- if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function)) {
+ if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function)
+ || FUNC_INTERSECT_VALUE.equals(call.function)) {
TblColRef col = (TblColRef) call.args.get(1);
if (!sqlDigest.groupbyColumns.contains(col))
sqlDigest.groupbyColumns.add(col);
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java
new file mode 100644
index 0000000..6366b3c
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kylin.measure.bitmap;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.StringUtil;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+public class RetentionPartialResult {
+
+ private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE;
+ public static final String FILTER_DELIMETER = KylinConfig.getInstanceFromEnv().getIntersectFilterOrSeparator();
+ Map<String, BitmapCounter> map;
+ List<String> keyList;
+ Map<String, List<String>> childKeyToParentKey;
+
+ public RetentionPartialResult() {
+ map = new LinkedHashMap<>();
+ }
+
+ public void add(Object key, List keyList, Object value) {
+ Preconditions.checkArgument(key != null);
+ Preconditions.checkArgument(keyList != null && keyList.size() >= 0);
+ if (this.keyList == null) {
+ this.keyList = Lists.transform(keyList, i -> i.toString());
+ childKeyToParentKey = new HashMap<>(5);
+
+ for (String sKey : this.keyList) {
+ String[] elements = StringUtil.splitAndTrim(sKey, FILTER_DELIMETER);
+ for (String s : elements) {
+ if (s != null && s.trim().length() > 0) {
+ List<String> parent = childKeyToParentKey.computeIfAbsent(s.trim(), o -> new ArrayList());
+ parent.add(sKey);
+ }
+ }
+ }
+
+ }
+
+ if (this.keyList != null) {
+ if (this.keyList.contains(key.toString())) {
+ BitmapCounter counter = map.computeIfAbsent(key.toString(), o -> factory.newBitmap());
+ counter.orWith((BitmapCounter) value);
+ }
+
+ if (childKeyToParentKey.size() > 0) {
+ String sKey = key.toString();
+ if (childKeyToParentKey.containsKey(sKey)) {
+ List<String> parents = childKeyToParentKey.get(sKey);
+ for (String parent : parents) {
+ BitmapCounter counter = map.computeIfAbsent(parent, o -> factory.newBitmap());
+ counter.orWith((BitmapCounter) value);
+ }
+ }
+ }
+ }
+ }
+
+ private BitmapCounter result() {
+ if (keyList == null || keyList.isEmpty()) {
+ return null;
+ }
+ // if any specified key not in map, the intersection must be 0
+ for (String key : keyList) {
+ if (!map.containsKey(key)) {
+ return null;
+ }
+ }
+ BitmapCounter counter = null;
+ for (String key : keyList) {
+ BitmapCounter c = map.get(key);
+ if (counter == null) {
+ counter = factory.newBitmap();
+ counter.orWith(c);
+ } else {
+ counter.andWith(c);
+ }
+ }
+
+ return counter;
+ }
+
+ public String valueResult() {
+ BitmapCounter counter = result();
+ String result = "";
+ if (counter != null && counter.getCount() > 0) {
+ result = "[" + StringUtils.join(counter.iterator(), ",") + "]";
+ }
+ return result;
+ }
+
+ public long countResult() {
+ BitmapCounter counter = result();
+ return counter != null ? counter.getCount() : 0;
+ }
+
+}
\ No newline at end of file
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
index 61d5b7f..59eda73 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
@@ -598,8 +598,9 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery {
}
}
for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) {
- if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)) {
- logger.info("exactAggregation is false because has INTERSECT_COUNT");
+ if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)
+ || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) {
+ logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE");
return false;
}
}
diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql
new file mode 100644
index 0000000..e5de89b
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql
@@ -0,0 +1,33 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select
+week_beg_dt as week,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Auction']) as b,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Others']) as c,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Others']) as ac,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction', 'Others']) as abc,
+intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC|Auction', 'Others']) as a_or_b_and_c,
+count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers,
+count(*) as cnt
+from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT
+where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23')
+group by week_beg_dt
+
diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql
new file mode 100644
index 0000000..6de4eac
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql
@@ -0,0 +1,23 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+select
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01', '2012-01-02']) as first_and_second_day
+from test_kylin_fact
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
+