You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/11/24 09:25:19 UTC

kylin git commit: KYLIN-2217 add saveDictionary() on CubeManager & DictionaryManager

Repository: kylin
Updated Branches:
  refs/heads/KYLIN-2217-2 [created] fde30a157


KYLIN-2217 add saveDictionary() on CubeManager & DictionaryManager


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fde30a15
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fde30a15
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fde30a15

Branch: refs/heads/KYLIN-2217-2
Commit: fde30a157dff088b796cc778be34201a4e6e6dd4
Parents: 827205f
Author: Li Yang <li...@apache.org>
Authored: Thu Nov 24 17:24:57 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Thu Nov 24 17:24:57 2016 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/cube/CubeManager.java | 47 +++++++-------
 .../kylin/cube/cli/DictionaryGeneratorCLI.java  | 34 +++++++++-
 .../org/apache/kylin/cube/util/CubingUtils.java |  2 +-
 .../org/apache/kylin/dict/DictionaryInfo.java   |  5 ++
 .../apache/kylin/dict/DictionaryManager.java    | 65 +++++++++-----------
 .../engine/mr/steps/MergeCuboidMapperTest.java  |  2 +-
 .../kylin/cube/ITDictionaryManagerTest.java     |  6 +-
 7 files changed, 93 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
index 9893040..3a4c754 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
@@ -48,7 +48,6 @@ import org.apache.kylin.cube.model.DictionaryDesc;
 import org.apache.kylin.cube.model.DimensionDesc;
 import org.apache.kylin.dict.DictionaryInfo;
 import org.apache.kylin.dict.DictionaryManager;
-import org.apache.kylin.dict.DistinctColumnValuesProvider;
 import org.apache.kylin.dict.lookup.LookupStringTable;
 import org.apache.kylin.dict.lookup.SnapshotManager;
 import org.apache.kylin.dict.lookup.SnapshotTable;
@@ -214,25 +213,39 @@ public class CubeManager implements IRealizationProvider {
         return result;
     }
 
-    public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException {
+    public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable) throws IOException {
         CubeDesc cubeDesc = cubeSeg.getCubeDesc();
         if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col))
             return null;
 
-        DictionaryManager dictMgr = getDictionaryManager();
         String builderClass = cubeDesc.getDictionaryBuilderClass(col);
-        DictionaryInfo dictInfo = dictMgr.buildDictionary(cubeDesc.getModel(), col, factTableValueProvider, builderClass);
+        DictionaryInfo dictInfo = getDictionaryManager().buildDictionary(cubeDesc.getModel(), col, inpTable, builderClass);
 
+        saveDictionaryInfo(cubeSeg, col, dictInfo);
+        return dictInfo;
+    }
+    
+    public DictionaryInfo saveDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable, Dictionary<String> dict) throws IOException {
+        CubeDesc cubeDesc = cubeSeg.getCubeDesc();
+        if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col))
+            return null;
+
+        DictionaryInfo dictInfo = getDictionaryManager().saveDictionary(cubeDesc.getModel(), col, inpTable, dict);
+        
+        saveDictionaryInfo(cubeSeg, col, dictInfo);
+        return dictInfo;
+    }
+
+    private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException {
         if (dictInfo != null) {
             Dictionary<?> dict = dictInfo.getDictionaryObject();
             cubeSeg.putDictResPath(col, dictInfo.getResourcePath());
             cubeSeg.getRowkeyStats().add(new Object[] { col.getName(), dict.getSize(), dict.getSizeOfId() });
 
-            CubeUpdate cubeBuilder = new CubeUpdate(cubeSeg.getCubeInstance());
-            cubeBuilder.setToUpdateSegs(cubeSeg);
-            updateCube(cubeBuilder);
+            CubeUpdate update = new CubeUpdate(cubeSeg.getCubeInstance());
+            update.setToUpdateSegs(cubeSeg);
+            updateCube(update);
         }
-        return dictInfo;
     }
 
     /**
@@ -617,24 +630,6 @@ public class CubeManager implements IRealizationProvider {
         }
     }
 
-    private long calculateStartOffsetForAppendSegment(CubeInstance cube) {
-        List<CubeSegment> existing = cube.getSegments();
-        if (existing.isEmpty()) {
-            return 0;
-        } else {
-            return existing.get(existing.size() - 1).getSourceOffsetEnd();
-        }
-    }
-
-    private long calculateStartDateForAppendSegment(CubeInstance cube) {
-        List<CubeSegment> existing = cube.getSegments();
-        if (existing.isEmpty()) {
-            return cube.getDescriptor().getPartitionDateStart();
-        } else {
-            return existing.get(existing.size() - 1).getDateRangeEnd();
-        }
-    }
-
     private void checkBuildingSegment(CubeInstance cube) {
         int maxBuldingSeg = cube.getConfig().getMaxBuildingSegments();
         if (cube.getBuildingSegments().size() >= maxBuldingSeg) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
index 89e2e9b..a6aeb96 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
@@ -26,9 +26,15 @@ import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.model.DimensionDesc;
+import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.dict.DistinctColumnValuesProvider;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TableRef;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.source.ReadableTable;
+import org.apache.kylin.source.SourceFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -51,7 +57,8 @@ public class DictionaryGeneratorCLI {
         // dictionary
         for (TblColRef col : cubeSeg.getCubeDesc().getAllColumnsNeedDictionaryBuilt()) {
             logger.info("Building dictionary for " + col);
-            cubeMgr.buildDictionary(cubeSeg, col, factTableValueProvider);
+            ReadableTable inpTable = decideInputTable(cubeSeg.getModel(), col, factTableValueProvider);
+            cubeMgr.buildDictionary(cubeSeg, col, inpTable);
         }
 
         // snapshot
@@ -67,4 +74,29 @@ public class DictionaryGeneratorCLI {
             cubeMgr.buildSnapshotTable(cubeSeg, tableIdentity);
         }
     }
+    
+    private static ReadableTable decideInputTable(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) {
+        KylinConfig config = model.getConfig();
+        DictionaryManager dictMgr = DictionaryManager.getInstance(config);
+        TblColRef srcCol = dictMgr.decideSourceData(model, col);
+        String srcTable = srcCol.getTable();
+        
+        ReadableTable inpTable;
+        if (model.isFactTable(srcTable)) {
+            inpTable = factTableValueProvider.getDistinctValuesFor(srcCol);
+        } else {
+            MetadataManager metadataManager = MetadataManager.getInstance(config);
+            TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable));
+            if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) {
+                TableDesc materializedTbl = new TableDesc();
+                materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable());
+                materializedTbl.setName(tableDesc.getMaterializedName());
+                inpTable = SourceFactory.createReadableTable(materializedTbl);
+            } else {
+                inpTable = SourceFactory.createReadableTable(tableDesc);
+            }
+        }
+
+        return inpTable;
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index 10dad2c..413b907 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -170,7 +170,7 @@ public class CubingUtils {
             signature.setLastModifiedTime(System.currentTimeMillis());
             signature.setPath(String.format("streaming_%s_%s", startOffset, endOffset));
             signature.setSize(endOffset - startOffset);
-            DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef.getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
+            DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
             logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
             DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
             try {

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
index 4fba59a..8526467 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
@@ -21,6 +21,7 @@ package org.apache.kylin.dict;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.RootPersistentEntity;
 import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.source.ReadableTable.TableSignature;
 
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
@@ -51,6 +52,10 @@ public class DictionaryInfo extends RootPersistentEntity {
     public DictionaryInfo() {
     }
 
+    public DictionaryInfo(ColumnDesc col, String dataType, TableSignature input) {
+        this(col.getTable().getIdentity(), col.getName(), col.getZeroBasedIndex(), dataType, input);
+    }
+    
     public DictionaryInfo(String sourceTable, String sourceColumn, int sourceColumnIndex, String dataType, TableSignature input) {
 
         this.updateRandomUuid();

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index c33cd28..37e4757 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -38,12 +38,10 @@ import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.DataModelDesc;
 import org.apache.kylin.metadata.model.JoinDesc;
-import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TableRef;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.source.ReadableTable;
 import org.apache.kylin.source.ReadableTable.TableSignature;
-import org.apache.kylin.source.SourceFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -273,45 +271,19 @@ public class DictionaryManager {
         }
     }
 
-    public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException {
-        return buildDictionary(model, col, factTableValueProvider, null);
+    public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException {
+        return buildDictionary(model, col, inpTable, null);
     }
 
-    public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider, String builderClass) throws IOException {
+    public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, String builderClass) throws IOException {
 
         logger.info("building dictionary for " + col);
 
-        TblColRef srcCol = decideSourceData(model, col);
-        String srcTable = srcCol.getTable();
-        String srcColName = srcCol.getName();
-        int srcColIdx = srcCol.getColumnDesc().getZeroBasedIndex();
-
-        ReadableTable inpTable;
-        if (model.isFactTable(srcTable)) {
-            inpTable = factTableValueProvider.getDistinctValuesFor(srcCol);
-        } else {
-            MetadataManager metadataManager = MetadataManager.getInstance(config);
-            TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable));
-            if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) {
-                TableDesc materializedTbl = new TableDesc();
-                materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable());
-                materializedTbl.setName(tableDesc.getMaterializedName());
-                inpTable = SourceFactory.createReadableTable(materializedTbl);
-            } else {
-                inpTable = SourceFactory.createReadableTable(tableDesc);
-            }
-        }
-
-        TableSignature inputSig = inpTable.getSignature();
-        if (inputSig == null) // table does not exists
-            return null;
-
-        DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcColName, srcColIdx, col.getDatatype(), inputSig);
-
-        String dupDict = checkDupByInfo(dictInfo);
-        if (dupDict != null) {
-            logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict);
-            return getDictionaryInfo(dupDict);
+        DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable);
+        String dupInfo = checkDupByInfo(dictInfo);
+        if (dupInfo != null) {
+            logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo);
+            return getDictionaryInfo(dupInfo);
         }
 
         logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo));
@@ -333,6 +305,27 @@ public class DictionaryManager {
         return trySaveNewDict(dictionary, dictInfo);
     }
 
+    public DictionaryInfo saveDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, Dictionary<String> dictionary) throws IOException {
+        DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable);
+        String dupInfo = checkDupByInfo(dictInfo);
+        if (dupInfo != null) {
+            logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo);
+            return getDictionaryInfo(dupInfo);
+        }
+
+        return trySaveNewDict(dictionary, dictInfo);
+    }
+
+    private DictionaryInfo createDictionaryInfo(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException {
+        TblColRef srcCol = decideSourceData(model, col);
+        TableSignature inputSig = inpTable.getSignature();
+        if (inputSig == null) // table does not exists
+            throw new IllegalStateException("Input table does not exist: " + inpTable);
+
+        DictionaryInfo dictInfo = new DictionaryInfo(srcCol.getColumnDesc(), col.getDatatype(), inputSig);
+        return dictInfo;
+    }
+
     /**
      * Decide a dictionary's source data, leverage PK-FK relationship.
      */

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
index 6f3b7c9..075ec80 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
@@ -119,7 +119,7 @@ public class MergeCuboidMapperTest extends LocalFileMetadataTestCase {
             signature.setLastModifiedTime(System.currentTimeMillis());
             signature.setPath("fake_dict_for" + lfn.getName() + segment.getName());
 
-            DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumnDesc().getName(), lfn.getColumnDesc().getZeroBasedIndex(), "string", signature);
+            DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getColumnDesc(), "string", signature);
 
             List<String> values = new ArrayList<>();
             values.add("aaa");

http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
index 22ffd26..188a97a 100644
--- a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
@@ -65,10 +65,10 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase {
 
         MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C");
 
-        DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData);
+        DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col));
         System.out.println(JsonUtil.writeValueAsIndentString(info1));
 
-        DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData);
+        DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col));
         System.out.println(JsonUtil.writeValueAsIndentString(info2));
 
         // test check duplicate
@@ -89,7 +89,7 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase {
 
         // test empty dictionary
         MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider();
-        DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty);
+        DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty.getDistinctValuesFor(col));
         System.out.println(JsonUtil.writeValueAsIndentString(info3));
         assertEquals(0, info3.getCardinality());
         assertEquals(0, info3.getDictionaryObject().getSize());