You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/08/20 10:48:09 UTC
[kylin] 03/03: KYLIN-3490 introduce DictionaryEnumerator to answer
single encoded column related queries which will not hit cuboid
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit c9d7f5ec2ff8834f0f1b24610485bb819741206f
Author: Zhong <nj...@apache.org>
AuthorDate: Wed Aug 15 16:33:19 2018 +0800
KYLIN-3490 introduce DictionaryEnumerator to answer single encoded column related queries which will not hit cuboid
---
.../org/apache/kylin/common/KylinConfigBase.java | 4 +
.../java/org/apache/kylin/cube/model/CubeDesc.java | 81 +++++++-----
.../org/apache/kylin/query/ITKylinQueryTest.java | 12 ++
.../query/sql_dict_enumerator/query01.sql | 21 +++
.../query/sql_dict_enumerator/query02.sql | 20 +++
.../query/sql_dict_enumerator/query03.sql | 20 +++
.../query/enumerator/DictionaryEnumerator.java | 142 +++++++++++++++++++++
.../apache/kylin/query/enumerator/OLAPQuery.java | 5 +-
.../apache/kylin/query/relnode/OLAPTableScan.java | 3 +
.../org/apache/kylin/query/schema/OLAPTable.java | 4 +
10 files changed, 276 insertions(+), 36 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index dbf22b5..f154eee 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -1227,6 +1227,10 @@ abstract public class KylinConfigBase implements Serializable {
// QUERY
// ============================================================================
+ public boolean isDictionaryEnumeratorEnabled() {
+ return Boolean.valueOf(getOptional("kylin.query.enable-dict-enumerator", "false"));
+ }
+
public Boolean isEnumerableRulesEnabled() {
return Boolean.parseBoolean(getOptional("kylin.query.calcite.enumerable-rules-enabled", "false"));
}
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 5b4a134..95c8b40 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -18,16 +18,28 @@
package org.apache.kylin.cube.model;
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+import java.lang.reflect.Method;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeSet;
+
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.ArrayUtils;
@@ -65,27 +77,16 @@ import org.apache.kylin.metadata.realization.RealizationType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.lang.reflect.Method;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeSet;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
/**
*/
@@ -1298,18 +1299,28 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
}
/**
- * Get columns that have dictionary
+ * Get dimensions that have dictionary
*/
- public Set<TblColRef> getAllColumnsHaveDictionary() {
- Set<TblColRef> result = Sets.newLinkedHashSet();
+ public Set<TblColRef> getAllDimsHaveDictionary() {
+ Set<TblColRef> result = Sets.newHashSet();
- // dictionaries in dimensions
for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) {
TblColRef colRef = rowKeyColDesc.getColRef();
if (rowkey.isUseDictionary(colRef)) {
result.add(colRef);
}
}
+ return result;
+ }
+
+ /**
+ * Get columns that have dictionary
+ */
+ public Set<TblColRef> getAllColumnsHaveDictionary() {
+ Set<TblColRef> result = Sets.newLinkedHashSet();
+
+ // dictionaries in dimensions
+ result.addAll(getAllDimsHaveDictionary());
// dictionaries in measures
for (MeasureDesc measure : measures) {
diff --git a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
index e6afbe0..e01334f 100644
--- a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
@@ -428,6 +428,18 @@ public class ITKylinQueryTest extends KylinTestBase {
}
@Test
+ public void testDictionaryEnumerator() throws Exception {
+ boolean ifDictEnumeratorEnabled = config.isDictionaryEnumeratorEnabled();
+ if (!ifDictEnumeratorEnabled) {
+ config.setProperty("kylin.query.enable-dict-enumerator", "true");
+ }
+ batchExecuteQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_dict_enumerator");
+ if (!ifDictEnumeratorEnabled) {
+ config.setProperty("kylin.query.enable-dict-enumerator", "false");
+ }
+ }
+
+ @Test
public void testValues() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_values", null, true);
}
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql
new file mode 100644
index 0000000..963e3b3
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql
@@ -0,0 +1,21 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select SLR_SEGMENT_CD
+from TEST_KYLIN_FACT
+group by SLR_SEGMENT_CD
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql
new file mode 100644
index 0000000..2617e91
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select DISTINCT SLR_SEGMENT_CD
+from TEST_KYLIN_FACT
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql
new file mode 100644
index 0000000..cb07666
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select MAX(SLR_SEGMENT_CD)
+from TEST_KYLIN_FACT
\ No newline at end of file
diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java
new file mode 100644
index 0000000..6af65ee
--- /dev/null
+++ b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.query.enumerator;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.IRealization;
+import org.apache.kylin.metadata.tuple.Tuple;
+import org.apache.kylin.query.relnode.OLAPContext;
+import org.apache.kylin.storage.hybrid.HybridInstance;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class DictionaryEnumerator implements Enumerator<Object[]> {
+
+ private final static Logger logger = LoggerFactory.getLogger(DictionaryEnumerator.class);
+
+ private List<Dictionary<String>> dictList;
+ private final Object[] current;
+ private final TblColRef dictCol;
+ private final int dictColIdx;
+ private Iterator<String> currentDict;
+ private Iterator<Dictionary<String>> iterator;
+
+ public DictionaryEnumerator(OLAPContext olapContext) {
+ Preconditions.checkArgument(olapContext.allColumns.size() == 1, "The query should only relate to one column");
+
+ dictCol = olapContext.allColumns.iterator().next();
+ Preconditions.checkArgument(ifColumnHaveDictionary(dictCol, olapContext.realization, false),
+ "The column " + dictCol + " should be encoded as dictionary for " + olapContext.realization);
+
+ dictList = getAllDictionaries(dictCol, olapContext.realization);
+ current = new Object[olapContext.returnTupleInfo.size()];
+ dictColIdx = olapContext.returnTupleInfo.getColumnIndex(dictCol);
+
+ reset();
+ logger.info("Will use DictionaryEnumerator to answer query which is only related to column " + dictCol);
+ }
+
+ public static boolean ifDictionaryEnumeratorEligible(OLAPContext olapContext) {
+ if (olapContext.allColumns.size() != 1) {
+ return false;
+ }
+
+ TblColRef dictCol = olapContext.allColumns.iterator().next();
+ if (!ifColumnHaveDictionary(dictCol, olapContext.realization, true)) {
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean ifColumnHaveDictionary(TblColRef col, IRealization realization, boolean enableCheck) {
+ if (realization instanceof CubeInstance) {
+ final CubeInstance cube = (CubeInstance) realization;
+ boolean ifEnabled = !enableCheck || cube.getConfig().isDictionaryEnumeratorEnabled();
+ return ifEnabled && cube.getDescriptor().getAllDimsHaveDictionary().contains(col);
+ } else if (realization instanceof HybridInstance) {
+ final HybridInstance hybridInstance = (HybridInstance) realization;
+ for (IRealization entry : hybridInstance.getRealizations()) {
+ if (!ifColumnHaveDictionary(col, entry, enableCheck)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ public static List<Dictionary<String>> getAllDictionaries(TblColRef col, IRealization realization) {
+ Set<Dictionary<String>> result = Sets.newHashSet();
+ if (realization instanceof CubeInstance) {
+ final CubeInstance cube = (CubeInstance) realization;
+ for (CubeSegment segment : cube.getSegments(SegmentStatusEnum.READY)) {
+ result.add(segment.getDictionary(col));
+ }
+ } else if (realization instanceof HybridInstance) {
+ final HybridInstance hybridInstance = (HybridInstance) realization;
+ for (IRealization entry : hybridInstance.getRealizations()) {
+ result.addAll(getAllDictionaries(col, entry));
+ }
+ } else {
+ throw new IllegalStateException("All leaf realizations should be CubeInstance");
+ }
+ return Lists.newArrayList(result);
+ }
+
+ @Override
+ public boolean moveNext() {
+ while (currentDict == null || !currentDict.hasNext()) {
+ if (!iterator.hasNext()) {
+ return false;
+ }
+ final Dictionary<String> dict = iterator.next();
+ currentDict = dict.enumeratorValues().iterator();
+ }
+
+ current[dictColIdx] = Tuple.convertOptiqCellValue(currentDict.next(), dictCol.getDatatype());
+ return true;
+ }
+
+ @Override
+ public Object[] current() {
+ return current;
+ }
+
+ @Override
+ public void reset() {
+ iterator = dictList.iterator();
+ }
+
+ @Override
+ public void close() {
+ }
+}
\ No newline at end of file
diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
index 84ac5cf..c094ff5 100644
--- a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
+++ b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
@@ -37,7 +37,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl
public enum EnumeratorTypeEnum {
OLAP, //finish query with Cube or II, or a combination of both
LOOKUP_TABLE, //using a snapshot of lookup table
- HIVE //using hive
+ HIVE, //using hive
+ COL_DICT // using a column's dictionary
}
private final DataContext optiqContext;
@@ -65,6 +66,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl
: new OLAPEnumerator(olapContext, optiqContext);
case LOOKUP_TABLE:
return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new LookupTableEnumerator(olapContext);
+ case COL_DICT:
+ return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new DictionaryEnumerator(olapContext);
case HIVE:
return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new HiveEnumerator(olapContext);
default:
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
index c23f1c5..ac6241f 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
@@ -72,6 +72,7 @@ import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.TableRef;
import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.query.enumerator.DictionaryEnumerator;
import org.apache.kylin.query.optrule.AggregateMultipleExpandRule;
import org.apache.kylin.query.optrule.AggregateProjectReduceRule;
import org.apache.kylin.query.optrule.OLAPAggregateRule;
@@ -419,6 +420,8 @@ public class OLAPTableScan extends TableScan implements OLAPRel, EnumerableRel {
// if the table to scan is not the fact table of cube, then it's a lookup table
if (context.realization.getModel().isLookupTable(tableName)) {
return "executeLookupTableQuery";
+ } else if (DictionaryEnumerator.ifDictionaryEnumeratorEligible(context)) {
+ return "executeColumnDictionaryQuery";
} else {
return "executeOLAPQuery";
}
diff --git a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
index 216c6d4..60a856d 100644
--- a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
+++ b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
@@ -280,6 +280,10 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab
return new OLAPQuery(optiqContext, EnumeratorTypeEnum.LOOKUP_TABLE, ctxSeq);
}
+ public Enumerable<Object[]> executeColumnDictionaryQuery(DataContext optiqContext, int ctxSeq) {
+ return new OLAPQuery(optiqContext, EnumeratorTypeEnum.COL_DICT, ctxSeq);
+ }
+
public Enumerable<Object[]> executeHiveQuery(DataContext optiqContext, int ctxSeq) {
return new OLAPQuery(optiqContext, EnumeratorTypeEnum.HIVE, ctxSeq);
}