You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2020/06/22 01:19:35 UTC

[GitHub] [incubator-doris] wangbo commented on a change in pull request #3727: (#3726) [Spark Load] Rollup Tree Builder

wangbo commented on a change in pull request #3727:
URL: https://github.com/apache/incubator-doris/pull/3727#discussion_r443278487



##########
File path: fe/src/main/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilder.java
##########
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.load.loadv2.etl.EtlJobConfig;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+// Build RollupTree by using minimum coverage strategy,
+// which is to find the index with the minimum columns that
+// has all columns of rollup index as parent index node.
+// Eg:
+// There are three indexes:
+//   index1(c1, c2, c3, c4, c5)
+//   index2(c1, c2, c4)
+//   index3(c1, c2)
+//   index4(c3, c4)
+//   index5(c1, c2, c5)
+// then the result tree is:
+//          index1
+//      |     \      \
+//  index2  index4   index5
+//    |
+//  index3
+// Now, if there are more than one indexes meet the column coverage requirement,
+// have the same column size(eg: index2 vs index5), child rollup is preferred
+// builded from the front index(eg: index3 is the child of index2). This can be
+// further optimized based on the row number of the index.
+public class MinimumCoverageRollupTreeBuilder implements RollupTreeBuilder {
+    public RollupTreeNode build(EtlJobConfig.EtlTable tableMeta) {
+        List<EtlJobConfig.EtlIndex> indexes = tableMeta.indexes;
+        List<EtlJobConfig.EtlIndex> indexMetas = new ArrayList<>();
+        EtlJobConfig.EtlIndex baseIndex = null;
+        for (EtlJobConfig.EtlIndex indexMeta : indexes) {
+            if (indexMeta.isBaseIndex) {
+                baseIndex = indexMeta;
+                continue;
+            }
+            indexMetas.add(indexMeta);
+        }
+        List<EtlJobConfig.EtlColumn> baseIndexColumns = baseIndex.columns;
+        List<String> baseKeyColumns = new ArrayList<>();
+        List<String> baseValueColumns = new ArrayList<>();
+        for (EtlJobConfig.EtlColumn columnMeta : baseIndexColumns) {
+            if (columnMeta.isKey) {
+                baseKeyColumns.add(columnMeta.columnName);
+            } else {
+                baseValueColumns.add(columnMeta.columnName);
+            }
+        }
+        RollupTreeNode root = new RollupTreeNode();
+        root.parent = null;
+        root.keyColumnNames = baseKeyColumns;
+        root.valueColumnNames = baseValueColumns;
+        root.indexId = baseIndex.indexId;
+        root.indexMeta = baseIndex;
+
+        // sort the index metas to make sure the column number decrease
+        Collections.sort(indexMetas, new EtlJobConfig.EtlIndexComparator().reversed());
+        boolean[] flags = new boolean[indexMetas.size()];
+        for (int i = 0; i < indexMetas.size(); ++i) {
+            flags[i] = false;
+        }
+        for (int i = 0; i < indexMetas.size(); ++i) {
+            List<String> keyColumns = new ArrayList<>();
+            List<String> valueColumns = new ArrayList<>();
+            for (EtlJobConfig.EtlColumn column : indexMetas.get(i).columns) {
+                if (column.isKey) {
+                    keyColumns.add(column.columnName);
+                } else {
+                    valueColumns.add(column.columnName);
+                }
+            }
+            insertIndex(root, indexMetas.get(i), keyColumns, valueColumns, i, flags);
+        }
+        return root;
+    }
+
+    // DFS traverse to build the rollup tree
+    private void insertIndex(RollupTreeNode root, EtlJobConfig.EtlIndex indexMeta,
+                             List<String> keyColumns,
+                             List<String> valueColumns, int id, boolean[] flags) {
+        if (root.children != null) {
+            for (RollupTreeNode child : root.children) {
+                insertIndex(child, indexMeta, keyColumns, valueColumns, id, flags);
+                if (flags[id]) {
+                    return;
+                }
+            }
+        }
+        if (flags[id]) {

Review comment:
       Recursive happends when root node has children, so it is needed




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org