You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/11/25 07:09:50 UTC
[6/8] kylin git commit: KYLIN-2217 add saveDictionary() on
CubeManager & DictionaryManager
KYLIN-2217 add saveDictionary() on CubeManager & DictionaryManager
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/7f7417bd
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/7f7417bd
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/7f7417bd
Branch: refs/heads/KYLIN-2217-2
Commit: 7f7417bd2ed0eaa1766597d9dd4131cbe1d29757
Parents: b078dd9
Author: Li Yang <li...@apache.org>
Authored: Thu Nov 24 17:24:57 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Nov 25 15:05:36 2016 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/cube/CubeManager.java | 29 ++++++---
.../kylin/cube/cli/DictionaryGeneratorCLI.java | 34 +++++++++-
.../org/apache/kylin/cube/util/CubingUtils.java | 2 +-
.../org/apache/kylin/dict/DictionaryInfo.java | 5 ++
.../apache/kylin/dict/DictionaryManager.java | 65 +++++++++-----------
.../engine/mr/steps/MergeCuboidMapperTest.java | 2 +-
.../kylin/cube/ITDictionaryManagerTest.java | 6 +-
7 files changed, 93 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
index 307bb46..ea8ff81 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java
@@ -48,7 +48,6 @@ import org.apache.kylin.cube.model.DictionaryDesc;
import org.apache.kylin.cube.model.DimensionDesc;
import org.apache.kylin.dict.DictionaryInfo;
import org.apache.kylin.dict.DictionaryManager;
-import org.apache.kylin.dict.DistinctColumnValuesProvider;
import org.apache.kylin.dict.lookup.LookupStringTable;
import org.apache.kylin.dict.lookup.SnapshotManager;
import org.apache.kylin.dict.lookup.SnapshotTable;
@@ -214,25 +213,39 @@ public class CubeManager implements IRealizationProvider {
return result;
}
- public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException {
+ public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable) throws IOException {
CubeDesc cubeDesc = cubeSeg.getCubeDesc();
if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col))
return null;
- DictionaryManager dictMgr = getDictionaryManager();
String builderClass = cubeDesc.getDictionaryBuilderClass(col);
- DictionaryInfo dictInfo = dictMgr.buildDictionary(cubeDesc.getModel(), col, factTableValueProvider, builderClass);
+ DictionaryInfo dictInfo = getDictionaryManager().buildDictionary(cubeDesc.getModel(), col, inpTable, builderClass);
+ saveDictionaryInfo(cubeSeg, col, dictInfo);
+ return dictInfo;
+ }
+
+ public DictionaryInfo saveDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable, Dictionary<String> dict) throws IOException {
+ CubeDesc cubeDesc = cubeSeg.getCubeDesc();
+ if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col))
+ return null;
+
+ DictionaryInfo dictInfo = getDictionaryManager().saveDictionary(cubeDesc.getModel(), col, inpTable, dict);
+
+ saveDictionaryInfo(cubeSeg, col, dictInfo);
+ return dictInfo;
+ }
+
+ private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException {
if (dictInfo != null) {
Dictionary<?> dict = dictInfo.getDictionaryObject();
cubeSeg.putDictResPath(col, dictInfo.getResourcePath());
cubeSeg.getRowkeyStats().add(new Object[] { col.getName(), dict.getSize(), dict.getSizeOfId() });
- CubeUpdate cubeBuilder = new CubeUpdate(cubeSeg.getCubeInstance());
- cubeBuilder.setToUpdateSegs(cubeSeg);
- updateCube(cubeBuilder);
+ CubeUpdate update = new CubeUpdate(cubeSeg.getCubeInstance());
+ update.setToUpdateSegs(cubeSeg);
+ updateCube(update);
}
- return dictInfo;
}
/**
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
index 89e2e9b..a6aeb96 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java
@@ -26,9 +26,15 @@ import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.DimensionDesc;
+import org.apache.kylin.dict.DictionaryManager;
import org.apache.kylin.dict.DistinctColumnValuesProvider;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TableRef;
import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.source.ReadableTable;
+import org.apache.kylin.source.SourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,7 +57,8 @@ public class DictionaryGeneratorCLI {
// dictionary
for (TblColRef col : cubeSeg.getCubeDesc().getAllColumnsNeedDictionaryBuilt()) {
logger.info("Building dictionary for " + col);
- cubeMgr.buildDictionary(cubeSeg, col, factTableValueProvider);
+ ReadableTable inpTable = decideInputTable(cubeSeg.getModel(), col, factTableValueProvider);
+ cubeMgr.buildDictionary(cubeSeg, col, inpTable);
}
// snapshot
@@ -67,4 +74,29 @@ public class DictionaryGeneratorCLI {
cubeMgr.buildSnapshotTable(cubeSeg, tableIdentity);
}
}
+
+ private static ReadableTable decideInputTable(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) {
+ KylinConfig config = model.getConfig();
+ DictionaryManager dictMgr = DictionaryManager.getInstance(config);
+ TblColRef srcCol = dictMgr.decideSourceData(model, col);
+ String srcTable = srcCol.getTable();
+
+ ReadableTable inpTable;
+ if (model.isFactTable(srcTable)) {
+ inpTable = factTableValueProvider.getDistinctValuesFor(srcCol);
+ } else {
+ MetadataManager metadataManager = MetadataManager.getInstance(config);
+ TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable));
+ if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) {
+ TableDesc materializedTbl = new TableDesc();
+ materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable());
+ materializedTbl.setName(tableDesc.getMaterializedName());
+ inpTable = SourceFactory.createReadableTable(materializedTbl);
+ } else {
+ inpTable = SourceFactory.createReadableTable(tableDesc);
+ }
+ }
+
+ return inpTable;
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index 10dad2c..413b907 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -170,7 +170,7 @@ public class CubingUtils {
signature.setLastModifiedTime(System.currentTimeMillis());
signature.setPath(String.format("streaming_%s_%s", startOffset, endOffset));
signature.setSize(endOffset - startOffset);
- DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef.getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature);
+ DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
try {
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
index 4fba59a..8526467 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java
@@ -21,6 +21,7 @@ package org.apache.kylin.dict;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.RootPersistentEntity;
import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.source.ReadableTable.TableSignature;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
@@ -51,6 +52,10 @@ public class DictionaryInfo extends RootPersistentEntity {
public DictionaryInfo() {
}
+ public DictionaryInfo(ColumnDesc col, String dataType, TableSignature input) {
+ this(col.getTable().getIdentity(), col.getName(), col.getZeroBasedIndex(), dataType, input);
+ }
+
public DictionaryInfo(String sourceTable, String sourceColumn, int sourceColumnIndex, String dataType, TableSignature input) {
this.updateRandomUuid();
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index c33cd28..37e4757 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -38,12 +38,10 @@ import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.JoinDesc;
-import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TableRef;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.source.ReadableTable;
import org.apache.kylin.source.ReadableTable.TableSignature;
-import org.apache.kylin.source.SourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -273,45 +271,19 @@ public class DictionaryManager {
}
}
- public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException {
- return buildDictionary(model, col, factTableValueProvider, null);
+ public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException {
+ return buildDictionary(model, col, inpTable, null);
}
- public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider, String builderClass) throws IOException {
+ public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, String builderClass) throws IOException {
logger.info("building dictionary for " + col);
- TblColRef srcCol = decideSourceData(model, col);
- String srcTable = srcCol.getTable();
- String srcColName = srcCol.getName();
- int srcColIdx = srcCol.getColumnDesc().getZeroBasedIndex();
-
- ReadableTable inpTable;
- if (model.isFactTable(srcTable)) {
- inpTable = factTableValueProvider.getDistinctValuesFor(srcCol);
- } else {
- MetadataManager metadataManager = MetadataManager.getInstance(config);
- TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable));
- if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) {
- TableDesc materializedTbl = new TableDesc();
- materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable());
- materializedTbl.setName(tableDesc.getMaterializedName());
- inpTable = SourceFactory.createReadableTable(materializedTbl);
- } else {
- inpTable = SourceFactory.createReadableTable(tableDesc);
- }
- }
-
- TableSignature inputSig = inpTable.getSignature();
- if (inputSig == null) // table does not exists
- return null;
-
- DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcColName, srcColIdx, col.getDatatype(), inputSig);
-
- String dupDict = checkDupByInfo(dictInfo);
- if (dupDict != null) {
- logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict);
- return getDictionaryInfo(dupDict);
+ DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable);
+ String dupInfo = checkDupByInfo(dictInfo);
+ if (dupInfo != null) {
+ logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo);
+ return getDictionaryInfo(dupInfo);
}
logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo));
@@ -333,6 +305,27 @@ public class DictionaryManager {
return trySaveNewDict(dictionary, dictInfo);
}
+ public DictionaryInfo saveDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, Dictionary<String> dictionary) throws IOException {
+ DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable);
+ String dupInfo = checkDupByInfo(dictInfo);
+ if (dupInfo != null) {
+ logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo);
+ return getDictionaryInfo(dupInfo);
+ }
+
+ return trySaveNewDict(dictionary, dictInfo);
+ }
+
+ private DictionaryInfo createDictionaryInfo(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException {
+ TblColRef srcCol = decideSourceData(model, col);
+ TableSignature inputSig = inpTable.getSignature();
+ if (inputSig == null) // table does not exists
+ throw new IllegalStateException("Input table does not exist: " + inpTable);
+
+ DictionaryInfo dictInfo = new DictionaryInfo(srcCol.getColumnDesc(), col.getDatatype(), inputSig);
+ return dictInfo;
+ }
+
/**
* Decide a dictionary's source data, leverage PK-FK relationship.
*/
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
index 6f3b7c9..075ec80 100644
--- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
+++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java
@@ -119,7 +119,7 @@ public class MergeCuboidMapperTest extends LocalFileMetadataTestCase {
signature.setLastModifiedTime(System.currentTimeMillis());
signature.setPath("fake_dict_for" + lfn.getName() + segment.getName());
- DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumnDesc().getName(), lfn.getColumnDesc().getZeroBasedIndex(), "string", signature);
+ DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getColumnDesc(), "string", signature);
List<String> values = new ArrayList<>();
values.add("aaa");
http://git-wip-us.apache.org/repos/asf/kylin/blob/7f7417bd/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
index 22ffd26..188a97a 100644
--- a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java
@@ -65,10 +65,10 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase {
MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C");
- DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData);
+ DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col));
System.out.println(JsonUtil.writeValueAsIndentString(info1));
- DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData);
+ DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col));
System.out.println(JsonUtil.writeValueAsIndentString(info2));
// test check duplicate
@@ -89,7 +89,7 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase {
// test empty dictionary
MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider();
- DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty);
+ DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty.getDistinctValuesFor(col));
System.out.println(JsonUtil.writeValueAsIndentString(info3));
assertEquals(0, info3.getCardinality());
assertEquals(0, info3.getDictionaryObject().getSize());