You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/08/29 10:28:44 UTC
kylin git commit: KYLIN-1954 BuildInFunctionTransformer should be
executed per CubeSegmentScanner
Repository: kylin
Updated Branches:
refs/heads/master f48f7faa0 -> cc9acbc2e
KYLIN-1954 BuildInFunctionTransformer should be executed per CubeSegmentScanner
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/cc9acbc2
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/cc9acbc2
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/cc9acbc2
Branch: refs/heads/master
Commit: cc9acbc2e30b5a2c4a2a0d4b6bb056bc716f1fbb
Parents: f48f7fa
Author: Hongbin Ma <ma...@apache.org>
Authored: Mon Aug 29 18:28:30 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Mon Aug 29 18:28:37 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/job/dataGen/ColumnConfig.java | 9 ++++
.../kylin/job/dataGen/FactTableGenerator.java | 44 ++++++++++++++++++--
.../org/apache/kylin/job/dataGen/GenConfig.java | 11 +++++
.../storage/gtrecord/CubeSegmentScanner.java | 9 +++-
.../localmeta/data/data_gen_config.json | 4 +-
5 files changed, 72 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
index 44ba8f4..5e1c09f 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
@@ -35,6 +35,8 @@ public class ColumnConfig {
private boolean exclusive;
@JsonProperty("asRange")
private boolean asRange;
+ @JsonProperty("differentiateByDateBoundary")
+ private boolean differentiateByDateBoundary;
public boolean isAsRange() {
return asRange;
@@ -68,4 +70,11 @@ public class ColumnConfig {
this.valueSet = valueSet;
}
+ public boolean isDifferentiateByDateBoundary() {
+ return differentiateByDateBoundary;
+ }
+
+ public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) {
+ this.differentiateByDateBoundary = differentiateByDateBoundary;
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
index 368f509..9373a02 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
@@ -50,6 +50,8 @@ import org.apache.kylin.metadata.model.JoinDesc;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
+import com.google.common.collect.Lists;
+
/**
*/
public class FactTableGenerator {
@@ -70,6 +72,11 @@ public class FactTableGenerator {
double conflictRatio;
double linkableRatio;
+ long differentiateBoundary = -1;
+ List<Integer> differentiateColumns = Lists.newArrayList();
+
+ SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+
// the names of lookup table columns which is in relation with fact
// table(appear as fk in fact table)
TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
@@ -207,6 +214,25 @@ public class FactTableGenerator {
// load config
loadConfig();
+ int index = 0;
+ for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns()) {
+ ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName());
+
+ if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) {
+ if (!cDesc.getType().isStringFamily()) {
+ throw new IllegalStateException("differentiateByDateBoundary only applies to text types, actual:" + cDesc.getType());
+ }
+ if (genConf.getDifferentiateBoundary() == null) {
+ throw new IllegalStateException("differentiateBoundary not provided");
+ }
+ if (differentiateBoundary == -1) {
+ differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime();
+ }
+ differentiateColumns.add(index);
+ }
+ index++;
+ }
+
TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
for (DimensionDesc dim : desc.getDimensions()) {
@@ -359,7 +385,6 @@ public class FactTableGenerator {
throw new RuntimeException("Does not support " + type);
}
- SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
Date start = format.parse(range.get(0));
Date end = format.parse(range.get(1));
long diff = end.getTime() - start.getTime();
@@ -533,6 +558,8 @@ public class FactTableGenerator {
KylinConfig config = KylinConfig.getInstanceFromEnv();
LinkedList<String> columnValues = new LinkedList<String>();
+ long currentRowTime = -1;
+
for (ColumnDesc cDesc : MetadataManager.getInstance(config).getTableDesc(factTableName).getColumns()) {
String colName = cDesc.getName();
@@ -544,8 +571,7 @@ public class FactTableGenerator {
columnValues.add(candidates.get(r.nextInt(candidates.size())));
} else if (usedCols.contains(colName)) {
-
- // if the current column is a metric column in fact table
+ // if the current column is a metric or dimension column in fact table
columnValues.add(createCell(cDesc));
} else {
@@ -553,6 +579,18 @@ public class FactTableGenerator {
columnValues.add(createDefaultsCell(cDesc.getTypeName()));
defaultColumns.add(colName);
}
+
+ if (cDesc.getRef().equals(this.cube.getDescriptor().getModel().getPartitionDesc().getPartitionDateColumnRef())) {
+ currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime();
+ }
+ }
+
+ for (Integer index : differentiateColumns) {
+ if (currentRowTime >= differentiateBoundary) {
+ columnValues.set(index, columnValues.get(index) + "_B");
+ } else {
+ columnValues.set(index, columnValues.get(index) + "_A");
+ }
}
return columnValues;
http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
index c58cfb6..5204d2a 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
@@ -38,8 +38,19 @@ public class GenConfig {
@JsonProperty("columnConfigs")
private ArrayList<ColumnConfig> columnConfigs;
+ @JsonProperty("differentiateBoundary")
+ private String differentiateBoundary; //data before and after the provided date will be different, so that different segments will have different segments
+
private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>();
+ public String getDifferentiateBoundary() {
+ return differentiateBoundary;
+ }
+
+ public void setDifferentiateBoundary(String differentiateBoundary) {
+ this.differentiateBoundary = differentiateBoundary;
+ }
+
public ArrayList<ColumnConfig> getColumnConfigs() {
return columnConfigs;
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
----------------------------------------------------------------------
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
index 4365ee2..6ed7d3b 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
@@ -34,7 +34,9 @@ import org.apache.kylin.gridtable.GTScanRequest;
import org.apache.kylin.gridtable.IGTScanner;
import org.apache.kylin.gridtable.ScannerWorker;
import org.apache.kylin.metadata.filter.ITupleFilterTransformer;
+import org.apache.kylin.metadata.filter.StringCodeSystem;
import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilterSerializer;
import org.apache.kylin.metadata.model.FunctionDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.storage.StorageContext;
@@ -52,10 +54,15 @@ public class CubeSegmentScanner implements IGTScanner {
final GTScanRequest scanRequest;
public CubeSegmentScanner(CubeSegment cubeSeg, Cuboid cuboid, Set<TblColRef> dimensions, Set<TblColRef> groups, //
- Collection<FunctionDesc> metrics, TupleFilter filter, StorageContext context, String gtStorage) {
+ Collection<FunctionDesc> metrics, TupleFilter originalfilter, StorageContext context, String gtStorage) {
this.cuboid = cuboid;
this.cubeSeg = cubeSeg;
+ //the filter might be changed later in this CubeSegmentScanner (In ITupleFilterTransformer)
+ //to avoid issues like in https://issues.apache.org/jira/browse/KYLIN-1954, make sure each CubeSegmentScanner
+ //is working on its own copy
+ byte[] serialize = TupleFilterSerializer.serialize(originalfilter, StringCodeSystem.INSTANCE);
+ TupleFilter filter = TupleFilterSerializer.deserialize(serialize, StringCodeSystem.INSTANCE);
// translate FunctionTupleFilter to IN clause
ITupleFilterTransformer translator = new BuildInFunctionTransformer(cubeSeg.getDimensionEncodingMap());
filter = translator.transform(filter);
http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/examples/test_case_data/localmeta/data/data_gen_config.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/data/data_gen_config.json b/examples/test_case_data/localmeta/data/data_gen_config.json
index ff3f676..f730058 100644
--- a/examples/test_case_data/localmeta/data/data_gen_config.json
+++ b/examples/test_case_data/localmeta/data/data_gen_config.json
@@ -1,4 +1,5 @@
{
+ "differentiateBoundary": "2013-01-01",
"columnConfigs": [
{
"columnName": "lstg_format_name",
@@ -9,7 +10,8 @@
"Auction",
"Others"
],
- "exclusive": true
+ "exclusive": true,
+ "differentiateByDateBoundary": true
},
{
"columnName": "SELLER_ID",