You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2016/12/30 02:31:21 UTC

[29/42] kylin git commit: KYLIN-2330 fix CubeDesc returning redundant DeriveInfo

KYLIN-2330 fix CubeDesc returning redundant DeriveInfo


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1a190ecf
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1a190ecf
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1a190ecf

Branch: refs/heads/sparkcubing-rebase
Commit: 1a190ecfcaad88e2551c5aff3bd4938719ba46a6
Parents: fdecf09
Author: Yang Li <li...@apache.org>
Authored: Thu Dec 29 07:13:12 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Thu Dec 29 07:31:17 2016 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/QueryContext.java   |  2 +-
 .../org/apache/kylin/cube/model/CubeDesc.java   | 46 +++++++++++++++-----
 .../org/apache/kylin/cube/CubeDescTest.java     | 45 ++++++++++++++++++-
 .../test_case_data/localmeta/cube_desc/ssb.json | 12 ++++-
 4 files changed, 91 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/1a190ecf/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
index ef0cb14..93b8556 100644
--- a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
+++ b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
@@ -23,7 +23,7 @@ import java.util.Map;
 import com.google.common.collect.Maps;
 
 /**
- * checkout {@link org.apache.kylin.common.debug.BackdoorToggles} for comparision
+ * checkout {@link org.apache.kylin.common.debug.BackdoorToggles} for comparison
  */
 public class QueryContext {
     private static final ThreadLocal<Map<String, String>> _queryContext = new ThreadLocal<Map<String, String>>();

http://git-wip-us.apache.org/repos/asf/kylin/blob/1a190ecf/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 3b8d034..4686a15 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -807,25 +807,49 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         for (int i = 0; i < derivedCols.length; i++) {
             if (ArrayUtils.contains(hostCols, derivedCols[i])) {
                 derivedCols = (TblColRef[]) ArrayUtils.remove(derivedCols, i);
-                extra = (String[]) ArrayUtils.remove(extra, i);
+                if (extra != null)
+                    extra = (String[]) ArrayUtils.remove(extra, i);
                 i--;
             }
         }
+        
+        if (derivedCols.length == 0)
+            return;
 
-        Map<TblColRef, DeriveInfo> toHostMap = derivedToHostMap;
-        Map<Array<TblColRef>, List<DeriveInfo>> hostToMap = hostToDerivedMap;
+        for (int i = 0; i < derivedCols.length; i++) {
+            TblColRef derivedCol = derivedCols[i];
+            boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1"));
+            derivedToHostMap.put(derivedCol, new DeriveInfo(type, join, hostCols, isOneToOne));
+        }
 
         Array<TblColRef> hostColArray = new Array<TblColRef>(hostCols);
-        List<DeriveInfo> infoList = hostToMap.get(hostColArray);
+        List<DeriveInfo> infoList = hostToDerivedMap.get(hostColArray);
         if (infoList == null) {
-            hostToMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>());
+            hostToDerivedMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>());
         }
-        infoList.add(new DeriveInfo(type, join, derivedCols, false));
-
-        for (int i = 0; i < derivedCols.length; i++) {
-            TblColRef derivedCol = derivedCols[i];
-            boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1"));
-            toHostMap.put(derivedCol, new DeriveInfo(type, join, hostCols, isOneToOne));
+        
+        // Merged duplicated derived column
+        List<TblColRef> whatsLeft = new ArrayList<>();
+        for (TblColRef derCol : derivedCols) {
+            boolean merged = false;
+            for (DeriveInfo existing : infoList) {
+                if (existing.type == type && existing.join.getPKSide().equals(join.getPKSide())) {
+                    if (ArrayUtils.contains(existing.columns, derCol)) {
+                        merged = true;
+                        break;
+                    }
+                    if (type == DeriveType.LOOKUP) {
+                        existing.columns = (TblColRef[]) ArrayUtils.add(existing.columns, derCol);
+                        merged = true;
+                        break;
+                    }
+                }
+            }
+            if (!merged)
+                whatsLeft.add(derCol);
+        }
+        if (whatsLeft.size() > 0) {
+            infoList.add(new DeriveInfo(type, join, (TblColRef[]) whatsLeft.toArray(new TblColRef[whatsLeft.size()]), false));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/1a190ecf/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
index 0a7adf1..86ea1df 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
@@ -23,20 +23,28 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
 import java.io.File;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeSet;
 
+import org.apache.kylin.common.util.Array;
 import org.apache.kylin.common.util.JsonUtil;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.model.AggregationGroup;
 import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
+import org.apache.kylin.cube.model.CubeDesc.DeriveType;
 import org.apache.kylin.cube.model.DimensionDesc;
 import org.apache.kylin.cube.model.SelectRule;
 import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.TblColRef;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -331,8 +339,43 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
 
         Map<?, ?> map2 = JsonUtil.readValue(mapStr, HashMap.class);
 
-        assertEquals(map, map2);
+        Assert.assertEquals(map, map2);
+    }
 
+    @Test
+    public void testDerivedInfo() {
+        {
+            CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc(CUBE_WITH_SLR_DESC);
+            List<TblColRef> givenCols = new ArrayList<>();
+            givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LSTG_SITE_ID"));
+            givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LEAF_CATEG_ID"));
+            Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null);
+            assertEquals(3, hostToDerivedInfo.size());
+            assertEquals(Pair.newPair(3, 2), countDerivedInfo(hostToDerivedInfo));
+        }
+
+        {
+            CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("ssb");
+            List<TblColRef> givenCols = new ArrayList<>();
+            givenCols.add(cube.findColumnRef("V_LINEORDER", "LO_PARTKEY"));
+            Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null);
+            assertEquals(1, hostToDerivedInfo.size());
+            assertEquals(Pair.newPair(1, 1), countDerivedInfo(hostToDerivedInfo));
+        }
+    }
+
+    private Pair<Integer, Integer> countDerivedInfo(Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo) {
+        int pkfkCount = 0;
+        int lookupCount = 0;
+        for (Entry<Array<TblColRef>, List<DeriveInfo>> entry : hostToDerivedInfo.entrySet()) {
+            for (DeriveInfo deriveInfo : entry.getValue()) {
+                if (deriveInfo.type == DeriveType.PK_FK)
+                    pkfkCount++;
+                if (deriveInfo.type == DeriveType.LOOKUP)
+                    lookupCount++;
+            }
+        }
+        return Pair.newPair(pkfkCount, lookupCount);
     }
 
     private Collection<String> sortStrs(String[] strs) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/1a190ecf/examples/test_case_data/localmeta/cube_desc/ssb.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ssb.json b/examples/test_case_data/localmeta/cube_desc/ssb.json
index d3ea10b..a13ac53 100644
--- a/examples/test_case_data/localmeta/cube_desc/ssb.json
+++ b/examples/test_case_data/localmeta/cube_desc/ssb.json
@@ -6,7 +6,17 @@
     "name" : "SSB.PART_DERIVED",
     "table" : "SSB.PART",
     "column" : null,
-    "derived" : [ "P_MFGR", "P_CATEGORY", "P_BRAND" ]
+    "derived" : [ "P_MFGR" ]
+  }, {
+    "name" : "SSB.PART_DERIVED",
+    "table" : "SSB.PART",
+    "column" : null,
+    "derived" : [ "P_CATEGORY" ]
+  }, {
+    "name" : "SSB.PART_DERIVED",
+    "table" : "SSB.PART",
+    "column" : null,
+    "derived" : [ "P_BRAND" ]
   }, {
     "name" : "C_CITY",
     "table" : "SSB.CUSTOMER",