You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/29 01:47:34 UTC

[5/8] kylin git commit: KYLIN-2330 fix CubeDesc returning redundant DeriveInfo

KYLIN-2330 fix CubeDesc returning redundant DeriveInfo


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4d35edd7
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4d35edd7
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4d35edd7

Branch: refs/heads/yang22-cdh5.7
Commit: 4d35edd7b47af65e706465a7fe7c9f7ddd83fd9d
Parents: 17e323e
Author: Yang Li <li...@apache.org>
Authored: Thu Dec 29 07:13:12 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Thu Dec 29 07:13:24 2016 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/cube/model/CubeDesc.java   | 46 +++++++++++++----
 .../org/apache/kylin/cube/CubeDescTest.java     | 53 ++++++++++++++++++--
 .../test_case_data/localmeta/cube_desc/ssb.json | 12 ++++-
 3 files changed, 95 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 853571c..219435e 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -789,25 +789,49 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware {
         for (int i = 0; i < derivedCols.length; i++) {
             if (ArrayUtils.contains(hostCols, derivedCols[i])) {
                 derivedCols = (TblColRef[]) ArrayUtils.remove(derivedCols, i);
-                extra = (String[]) ArrayUtils.remove(extra, i);
+                if (extra != null)
+                    extra = (String[]) ArrayUtils.remove(extra, i);
                 i--;
             }
         }
+        
+        if (derivedCols.length == 0)
+            return;
 
-        Map<TblColRef, DeriveInfo> toHostMap = derivedToHostMap;
-        Map<Array<TblColRef>, List<DeriveInfo>> hostToMap = hostToDerivedMap;
+        for (int i = 0; i < derivedCols.length; i++) {
+            TblColRef derivedCol = derivedCols[i];
+            boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1"));
+            derivedToHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne));
+        }
 
         Array<TblColRef> hostColArray = new Array<TblColRef>(hostCols);
-        List<DeriveInfo> infoList = hostToMap.get(hostColArray);
+        List<DeriveInfo> infoList = hostToDerivedMap.get(hostColArray);
         if (infoList == null) {
-            hostToMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>());
+            hostToDerivedMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>());
         }
-        infoList.add(new DeriveInfo(type, dimension, derivedCols, false));
-
-        for (int i = 0; i < derivedCols.length; i++) {
-            TblColRef derivedCol = derivedCols[i];
-            boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1"));
-            toHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne));
+        
+        // Merged duplicated derived column
+        List<TblColRef> whatsLeft = new ArrayList<>();
+        for (TblColRef derCol : derivedCols) {
+            boolean merged = false;
+            for (DeriveInfo existing : infoList) {
+                if (existing.type == type && existing.dimension.getTableRef().equals(dimension.getTableRef())) {
+                    if (ArrayUtils.contains(existing.columns, derCol)) {
+                        merged = true;
+                        break;
+                    }
+                    if (type == DeriveType.LOOKUP) {
+                        existing.columns = (TblColRef[]) ArrayUtils.add(existing.columns, derCol);
+                        merged = true;
+                        break;
+                    }
+                }
+            }
+            if (!merged)
+                whatsLeft.add(derCol);
+        }
+        if (whatsLeft.size() > 0) {
+            infoList.add(new DeriveInfo(type, dimension, (TblColRef[]) whatsLeft.toArray(new TblColRef[whatsLeft.size()]), false));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
index 27c154b..c192da0 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java
@@ -18,19 +18,29 @@
 
 package org.apache.kylin.cube;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.File;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeSet;
 
+import org.apache.kylin.common.util.Array;
 import org.apache.kylin.common.util.JsonUtil;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.model.AggregationGroup;
 import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
+import org.apache.kylin.cube.model.CubeDesc.DeriveType;
 import org.apache.kylin.cube.model.SelectRule;
+import org.apache.kylin.metadata.model.TblColRef;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -46,7 +56,7 @@ import com.google.common.collect.Maps;
 public class CubeDescTest extends LocalFileMetadataTestCase {
 
     private static final String CUBE_WITH_SLR_DESC = "test_kylin_cube_with_slr_desc";
-    
+
     private String SELLER_ID;
     private String SLR_SEGMENT_CD;
     private String LSTG_FORMAT_NAME;
@@ -55,14 +65,14 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     private String CATEG_LVL2_NAME;
     private String CATEG_LVL3_NAME;
     private String LEAF_CATEG_ID;
-    
+
     @Rule
     public ExpectedException thrown = ExpectedException.none();
 
     @Before
     public void setUp() throws Exception {
         this.createTestMetadata();
-        
+
         CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc(CUBE_WITH_SLR_DESC);
         AggregationGroup g = cubeDesc.getAggregationGroups().get(0);
         SELLER_ID = getColInAggrGroup(g, "SELLER_ID");
@@ -230,7 +240,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
     }
 
     @Test
-    public void testCombinationIntOverflow() throws  Exception {
+    public void testCombinationIntOverflow() throws Exception {
         for (File f : new File(LocalFileMetadataTestCase.LOCALMETA_TEMP_DATA, "cube_desc").listFiles()) {
             if (f.getName().endsWith(".bad")) {
                 String path = f.getPath();
@@ -273,7 +283,42 @@ public class CubeDescTest extends LocalFileMetadataTestCase {
         Map<?, ?> map2 = JsonUtil.readValue(mapStr, HashMap.class);
 
         Assert.assertEquals(map, map2);
+    }
 
+    @Test
+    public void testDerivedInfo() {
+        {
+            CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc(CUBE_WITH_SLR_DESC);
+            List<TblColRef> givenCols = new ArrayList<>();
+            givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LSTG_SITE_ID"));
+            givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LEAF_CATEG_ID"));
+            Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null);
+            assertEquals(3, hostToDerivedInfo.size());
+            assertEquals(Pair.newPair(3, 2), countDerivedInfo(hostToDerivedInfo));
+        }
+
+        {
+            CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("ssb");
+            List<TblColRef> givenCols = new ArrayList<>();
+            givenCols.add(cube.findColumnRef("V_LINEORDER", "LO_PARTKEY"));
+            Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null);
+            assertEquals(1, hostToDerivedInfo.size());
+            assertEquals(Pair.newPair(1, 1), countDerivedInfo(hostToDerivedInfo));
+        }
+    }
+
+    private Pair<Integer, Integer> countDerivedInfo(Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo) {
+        int pkfkCount = 0;
+        int lookupCount = 0;
+        for (Entry<Array<TblColRef>, List<DeriveInfo>> entry : hostToDerivedInfo.entrySet()) {
+            for (DeriveInfo deriveInfo : entry.getValue()) {
+                if (deriveInfo.type == DeriveType.PK_FK)
+                    pkfkCount++;
+                if (deriveInfo.type == DeriveType.LOOKUP)
+                    lookupCount++;
+            }
+        }
+        return Pair.newPair(pkfkCount, lookupCount);
     }
 
     private Collection<String> sortStrs(String[] strs) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/examples/test_case_data/localmeta/cube_desc/ssb.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ssb.json b/examples/test_case_data/localmeta/cube_desc/ssb.json
index d3ea10b..a13ac53 100644
--- a/examples/test_case_data/localmeta/cube_desc/ssb.json
+++ b/examples/test_case_data/localmeta/cube_desc/ssb.json
@@ -6,7 +6,17 @@
     "name" : "SSB.PART_DERIVED",
     "table" : "SSB.PART",
     "column" : null,
-    "derived" : [ "P_MFGR", "P_CATEGORY", "P_BRAND" ]
+    "derived" : [ "P_MFGR" ]
+  }, {
+    "name" : "SSB.PART_DERIVED",
+    "table" : "SSB.PART",
+    "column" : null,
+    "derived" : [ "P_CATEGORY" ]
+  }, {
+    "name" : "SSB.PART_DERIVED",
+    "table" : "SSB.PART",
+    "column" : null,
+    "derived" : [ "P_BRAND" ]
   }, {
     "name" : "C_CITY",
     "table" : "SSB.CUSTOMER",