You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/08/20 10:48:09 UTC

[kylin] 03/03: KYLIN-3490 introduce DictionaryEnumerator to answer single encoded column related queries which will not hit cuboid

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit c9d7f5ec2ff8834f0f1b24610485bb819741206f
Author: Zhong <nj...@apache.org>
AuthorDate: Wed Aug 15 16:33:19 2018 +0800

    KYLIN-3490 introduce DictionaryEnumerator to answer single encoded column related queries which will not hit cuboid
---
 .../org/apache/kylin/common/KylinConfigBase.java   |   4 +
 .../java/org/apache/kylin/cube/model/CubeDesc.java |  81 +++++++-----
 .../org/apache/kylin/query/ITKylinQueryTest.java   |  12 ++
 .../query/sql_dict_enumerator/query01.sql          |  21 +++
 .../query/sql_dict_enumerator/query02.sql          |  20 +++
 .../query/sql_dict_enumerator/query03.sql          |  20 +++
 .../query/enumerator/DictionaryEnumerator.java     | 142 +++++++++++++++++++++
 .../apache/kylin/query/enumerator/OLAPQuery.java   |   5 +-
 .../apache/kylin/query/relnode/OLAPTableScan.java  |   3 +
 .../org/apache/kylin/query/schema/OLAPTable.java   |   4 +
 10 files changed, 276 insertions(+), 36 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index dbf22b5..f154eee 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -1227,6 +1227,10 @@ abstract public class KylinConfigBase implements Serializable {
     // QUERY
     // ============================================================================
 
+    public boolean isDictionaryEnumeratorEnabled() {
+        return Boolean.valueOf(getOptional("kylin.query.enable-dict-enumerator", "false"));
+    }
+
     public Boolean isEnumerableRulesEnabled() {
         return Boolean.parseBoolean(getOptional("kylin.query.calcite.enumerable-rules-enabled", "false"));
     }
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 5b4a134..95c8b40 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -18,16 +18,28 @@
 
 package org.apache.kylin.cube.model;
 
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+import java.lang.reflect.Method;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeSet;
+
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang.ArrayUtils;
@@ -65,27 +77,16 @@ import org.apache.kylin.metadata.realization.RealizationType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.lang.reflect.Method;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeSet;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 /**
  */
@@ -1298,18 +1299,28 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
     }
 
     /**
-     * Get columns that have dictionary
+     * Get dimensions that have dictionary
      */
-    public Set<TblColRef> getAllColumnsHaveDictionary() {
-        Set<TblColRef> result = Sets.newLinkedHashSet();
+    public Set<TblColRef> getAllDimsHaveDictionary() {
+        Set<TblColRef> result = Sets.newHashSet();
 
-        // dictionaries in dimensions
         for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) {
             TblColRef colRef = rowKeyColDesc.getColRef();
             if (rowkey.isUseDictionary(colRef)) {
                 result.add(colRef);
             }
         }
+        return result;
+    }
+
+    /**
+     * Get columns that have dictionary
+     */
+    public Set<TblColRef> getAllColumnsHaveDictionary() {
+        Set<TblColRef> result = Sets.newLinkedHashSet();
+
+        // dictionaries in dimensions
+        result.addAll(getAllDimsHaveDictionary());
 
         // dictionaries in measures
         for (MeasureDesc measure : measures) {
diff --git a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
index e6afbe0..e01334f 100644
--- a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
@@ -428,6 +428,18 @@ public class ITKylinQueryTest extends KylinTestBase {
     }
 
     @Test
+    public void testDictionaryEnumerator() throws Exception {
+        boolean ifDictEnumeratorEnabled = config.isDictionaryEnumeratorEnabled();
+        if (!ifDictEnumeratorEnabled) {
+            config.setProperty("kylin.query.enable-dict-enumerator", "true");
+        }
+        batchExecuteQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_dict_enumerator");
+        if (!ifDictEnumeratorEnabled) {
+            config.setProperty("kylin.query.enable-dict-enumerator", "false");
+        }
+    }
+
+    @Test
     public void testValues() throws Exception {
         execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_values", null, true);
     }
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql
new file mode 100644
index 0000000..963e3b3
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql
@@ -0,0 +1,21 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select SLR_SEGMENT_CD
+from TEST_KYLIN_FACT
+group by SLR_SEGMENT_CD
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql
new file mode 100644
index 0000000..2617e91
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select DISTINCT SLR_SEGMENT_CD
+from TEST_KYLIN_FACT
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql
new file mode 100644
index 0000000..cb07666
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select MAX(SLR_SEGMENT_CD)
+from TEST_KYLIN_FACT
\ No newline at end of file
diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java
new file mode 100644
index 0000000..6af65ee
--- /dev/null
+++ b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.query.enumerator;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.realization.IRealization;
+import org.apache.kylin.metadata.tuple.Tuple;
+import org.apache.kylin.query.relnode.OLAPContext;
+import org.apache.kylin.storage.hybrid.HybridInstance;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class DictionaryEnumerator implements Enumerator<Object[]> {
+
+    private final static Logger logger = LoggerFactory.getLogger(DictionaryEnumerator.class);
+
+    private List<Dictionary<String>> dictList;
+    private final Object[] current;
+    private final TblColRef dictCol;
+    private final int dictColIdx;
+    private Iterator<String> currentDict;
+    private Iterator<Dictionary<String>> iterator;
+
+    public DictionaryEnumerator(OLAPContext olapContext) {
+        Preconditions.checkArgument(olapContext.allColumns.size() == 1, "The query should only relate to one column");
+
+        dictCol = olapContext.allColumns.iterator().next();
+        Preconditions.checkArgument(ifColumnHaveDictionary(dictCol, olapContext.realization, false),
+                "The column " + dictCol + " should be encoded as dictionary for " + olapContext.realization);
+
+        dictList = getAllDictionaries(dictCol, olapContext.realization);
+        current = new Object[olapContext.returnTupleInfo.size()];
+        dictColIdx = olapContext.returnTupleInfo.getColumnIndex(dictCol);
+
+        reset();
+        logger.info("Will use DictionaryEnumerator to answer query which is only related to column " + dictCol);
+    }
+
+    public static boolean ifDictionaryEnumeratorEligible(OLAPContext olapContext) {
+        if (olapContext.allColumns.size() != 1) {
+            return false;
+        }
+
+        TblColRef dictCol = olapContext.allColumns.iterator().next();
+        if (!ifColumnHaveDictionary(dictCol, olapContext.realization, true)) {
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean ifColumnHaveDictionary(TblColRef col, IRealization realization, boolean enableCheck) {
+        if (realization instanceof CubeInstance) {
+            final CubeInstance cube = (CubeInstance) realization;
+            boolean ifEnabled = !enableCheck || cube.getConfig().isDictionaryEnumeratorEnabled();
+            return ifEnabled && cube.getDescriptor().getAllDimsHaveDictionary().contains(col);
+        } else if (realization instanceof HybridInstance) {
+            final HybridInstance hybridInstance = (HybridInstance) realization;
+            for (IRealization entry : hybridInstance.getRealizations()) {
+                if (!ifColumnHaveDictionary(col, entry, enableCheck)) {
+                    return false;
+                }
+            }
+            return true;
+        }
+        return false;
+    }
+
+    public static List<Dictionary<String>> getAllDictionaries(TblColRef col, IRealization realization) {
+        Set<Dictionary<String>> result = Sets.newHashSet();
+        if (realization instanceof CubeInstance) {
+            final CubeInstance cube = (CubeInstance) realization;
+            for (CubeSegment segment : cube.getSegments(SegmentStatusEnum.READY)) {
+                result.add(segment.getDictionary(col));
+            }
+        } else if (realization instanceof HybridInstance) {
+            final HybridInstance hybridInstance = (HybridInstance) realization;
+            for (IRealization entry : hybridInstance.getRealizations()) {
+                result.addAll(getAllDictionaries(col, entry));
+            }
+        } else {
+            throw new IllegalStateException("All leaf realizations should be CubeInstance");
+        }
+        return Lists.newArrayList(result);
+    }
+
+    @Override
+    public boolean moveNext() {
+        while (currentDict == null || !currentDict.hasNext()) {
+            if (!iterator.hasNext()) {
+                return false;
+            }
+            final Dictionary<String> dict = iterator.next();
+            currentDict = dict.enumeratorValues().iterator();
+        }
+
+        current[dictColIdx] = Tuple.convertOptiqCellValue(currentDict.next(), dictCol.getDatatype());
+        return true;
+    }
+
+    @Override
+    public Object[] current() {
+        return current;
+    }
+
+    @Override
+    public void reset() {
+        iterator = dictList.iterator();
+    }
+
+    @Override
+    public void close() {
+    }
+}
\ No newline at end of file
diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
index 84ac5cf..c094ff5 100644
--- a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
+++ b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java
@@ -37,7 +37,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl
     public enum EnumeratorTypeEnum {
         OLAP, //finish query with Cube or II, or a combination of both
         LOOKUP_TABLE, //using a snapshot of lookup table
-        HIVE //using hive
+        HIVE, //using hive
+        COL_DICT // using a column's dictionary
     }
 
     private final DataContext optiqContext;
@@ -65,6 +66,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl
                     : new OLAPEnumerator(olapContext, optiqContext);
         case LOOKUP_TABLE:
             return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new LookupTableEnumerator(olapContext);
+        case COL_DICT:
+            return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new DictionaryEnumerator(olapContext);
         case HIVE:
             return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new HiveEnumerator(olapContext);
         default:
diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
index c23f1c5..ac6241f 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java
@@ -72,6 +72,7 @@ import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.DataModelDesc;
 import org.apache.kylin.metadata.model.TableRef;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.query.enumerator.DictionaryEnumerator;
 import org.apache.kylin.query.optrule.AggregateMultipleExpandRule;
 import org.apache.kylin.query.optrule.AggregateProjectReduceRule;
 import org.apache.kylin.query.optrule.OLAPAggregateRule;
@@ -419,6 +420,8 @@ public class OLAPTableScan extends TableScan implements OLAPRel, EnumerableRel {
         // if the table to scan is not the fact table of cube, then it's a lookup table
         if (context.realization.getModel().isLookupTable(tableName)) {
             return "executeLookupTableQuery";
+        } else if (DictionaryEnumerator.ifDictionaryEnumeratorEligible(context)) {
+            return "executeColumnDictionaryQuery";
         } else {
             return "executeOLAPQuery";
         }
diff --git a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
index 216c6d4..60a856d 100644
--- a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
+++ b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java
@@ -280,6 +280,10 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab
         return new OLAPQuery(optiqContext, EnumeratorTypeEnum.LOOKUP_TABLE, ctxSeq);
     }
 
+    public Enumerable<Object[]> executeColumnDictionaryQuery(DataContext optiqContext, int ctxSeq) {
+        return new OLAPQuery(optiqContext, EnumeratorTypeEnum.COL_DICT, ctxSeq);
+    }
+
     public Enumerable<Object[]> executeHiveQuery(DataContext optiqContext, int ctxSeq) {
         return new OLAPQuery(optiqContext, EnumeratorTypeEnum.HIVE, ctxSeq);
     }