You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/08/29 10:28:44 UTC

kylin git commit: KYLIN-1954 BuildInFunctionTransformer should be executed per CubeSegmentScanner

Repository: kylin
Updated Branches:
  refs/heads/master f48f7faa0 -> cc9acbc2e


KYLIN-1954 BuildInFunctionTransformer should be executed per CubeSegmentScanner


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/cc9acbc2
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/cc9acbc2
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/cc9acbc2

Branch: refs/heads/master
Commit: cc9acbc2e30b5a2c4a2a0d4b6bb056bc716f1fbb
Parents: f48f7fa
Author: Hongbin Ma <ma...@apache.org>
Authored: Mon Aug 29 18:28:30 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Mon Aug 29 18:28:37 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/job/dataGen/ColumnConfig.java  |  9 ++++
 .../kylin/job/dataGen/FactTableGenerator.java   | 44 ++++++++++++++++++--
 .../org/apache/kylin/job/dataGen/GenConfig.java | 11 +++++
 .../storage/gtrecord/CubeSegmentScanner.java    |  9 +++-
 .../localmeta/data/data_gen_config.json         |  4 +-
 5 files changed, 72 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
index 44ba8f4..5e1c09f 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
@@ -35,6 +35,8 @@ public class ColumnConfig {
     private boolean exclusive;
     @JsonProperty("asRange")
     private boolean asRange;
+    @JsonProperty("differentiateByDateBoundary")
+    private boolean differentiateByDateBoundary;
 
     public boolean isAsRange() {
         return asRange;
@@ -68,4 +70,11 @@ public class ColumnConfig {
         this.valueSet = valueSet;
     }
 
+    public boolean isDifferentiateByDateBoundary() {
+        return differentiateByDateBoundary;
+    }
+
+    public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) {
+        this.differentiateByDateBoundary = differentiateByDateBoundary;
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
index 368f509..9373a02 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
@@ -50,6 +50,8 @@ import org.apache.kylin.metadata.model.JoinDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 
+import com.google.common.collect.Lists;
+
 /**
  */
 public class FactTableGenerator {
@@ -70,6 +72,11 @@ public class FactTableGenerator {
     double conflictRatio;
     double linkableRatio;
 
+    long differentiateBoundary = -1;
+    List<Integer> differentiateColumns = Lists.newArrayList();
+
+    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+
     // the names of lookup table columns which is in relation with fact
     // table(appear as fk in fact table)
     TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
@@ -207,6 +214,25 @@ public class FactTableGenerator {
         // load config
         loadConfig();
 
+        int index = 0;
+        for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns()) {
+            ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName());
+
+            if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) {
+                if (!cDesc.getType().isStringFamily()) {
+                    throw new IllegalStateException("differentiateByDateBoundary only applies to text types, actual:" + cDesc.getType());
+                }
+                if (genConf.getDifferentiateBoundary() == null) {
+                    throw new IllegalStateException("differentiateBoundary not provided");
+                }
+                if (differentiateBoundary == -1) {
+                    differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime();
+                }
+                differentiateColumns.add(index);
+            }
+            index++;
+        }
+
         TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
 
         for (DimensionDesc dim : desc.getDimensions()) {
@@ -359,7 +385,6 @@ public class FactTableGenerator {
                 throw new RuntimeException("Does not support " + type);
             }
 
-            SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
             Date start = format.parse(range.get(0));
             Date end = format.parse(range.get(1));
             long diff = end.getTime() - start.getTime();
@@ -533,6 +558,8 @@ public class FactTableGenerator {
         KylinConfig config = KylinConfig.getInstanceFromEnv();
         LinkedList<String> columnValues = new LinkedList<String>();
 
+        long currentRowTime = -1;
+
         for (ColumnDesc cDesc : MetadataManager.getInstance(config).getTableDesc(factTableName).getColumns()) {
 
             String colName = cDesc.getName();
@@ -544,8 +571,7 @@ public class FactTableGenerator {
 
                 columnValues.add(candidates.get(r.nextInt(candidates.size())));
             } else if (usedCols.contains(colName)) {
-
-                // if the current column is a metric column in fact table
+                // if the current column is a metric or dimension column in fact table
                 columnValues.add(createCell(cDesc));
             } else {
 
@@ -553,6 +579,18 @@ public class FactTableGenerator {
                 columnValues.add(createDefaultsCell(cDesc.getTypeName()));
                 defaultColumns.add(colName);
             }
+
+            if (cDesc.getRef().equals(this.cube.getDescriptor().getModel().getPartitionDesc().getPartitionDateColumnRef())) {
+                currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime();
+            }
+        }
+
+        for (Integer index : differentiateColumns) {
+            if (currentRowTime >= differentiateBoundary) {
+                columnValues.set(index, columnValues.get(index) + "_B");
+            } else {
+                columnValues.set(index, columnValues.get(index) + "_A");
+            }
         }
 
         return columnValues;

http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
index c58cfb6..5204d2a 100644
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
+++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
@@ -38,8 +38,19 @@ public class GenConfig {
     @JsonProperty("columnConfigs")
     private ArrayList<ColumnConfig> columnConfigs;
 
+    @JsonProperty("differentiateBoundary")
+    private String differentiateBoundary; //data before and after the provided date will be different, so that different segments will have different segments
+
     private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>();
 
+    public String getDifferentiateBoundary() {
+        return differentiateBoundary;
+    }
+
+    public void setDifferentiateBoundary(String differentiateBoundary) {
+        this.differentiateBoundary = differentiateBoundary;
+    }
+
     public ArrayList<ColumnConfig> getColumnConfigs() {
         return columnConfigs;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
----------------------------------------------------------------------
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
index 4365ee2..6ed7d3b 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java
@@ -34,7 +34,9 @@ import org.apache.kylin.gridtable.GTScanRequest;
 import org.apache.kylin.gridtable.IGTScanner;
 import org.apache.kylin.gridtable.ScannerWorker;
 import org.apache.kylin.metadata.filter.ITupleFilterTransformer;
+import org.apache.kylin.metadata.filter.StringCodeSystem;
 import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilterSerializer;
 import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.storage.StorageContext;
@@ -52,10 +54,15 @@ public class CubeSegmentScanner implements IGTScanner {
     final GTScanRequest scanRequest;
 
     public CubeSegmentScanner(CubeSegment cubeSeg, Cuboid cuboid, Set<TblColRef> dimensions, Set<TblColRef> groups, //
-            Collection<FunctionDesc> metrics, TupleFilter filter, StorageContext context, String gtStorage) {
+            Collection<FunctionDesc> metrics, TupleFilter originalfilter, StorageContext context, String gtStorage) {
         this.cuboid = cuboid;
         this.cubeSeg = cubeSeg;
 
+        //the filter might be changed later in this CubeSegmentScanner (In ITupleFilterTransformer)
+        //to avoid issues like in https://issues.apache.org/jira/browse/KYLIN-1954, make sure each CubeSegmentScanner
+        //is working on its own copy
+        byte[] serialize = TupleFilterSerializer.serialize(originalfilter, StringCodeSystem.INSTANCE);
+        TupleFilter filter = TupleFilterSerializer.deserialize(serialize, StringCodeSystem.INSTANCE);
         // translate FunctionTupleFilter to IN clause
         ITupleFilterTransformer translator = new BuildInFunctionTransformer(cubeSeg.getDimensionEncodingMap());
         filter = translator.transform(filter);

http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/examples/test_case_data/localmeta/data/data_gen_config.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/data/data_gen_config.json b/examples/test_case_data/localmeta/data/data_gen_config.json
index ff3f676..f730058 100644
--- a/examples/test_case_data/localmeta/data/data_gen_config.json
+++ b/examples/test_case_data/localmeta/data/data_gen_config.json
@@ -1,4 +1,5 @@
 {
+  "differentiateBoundary": "2013-01-01",
   "columnConfigs": [
     {
       "columnName": "lstg_format_name",
@@ -9,7 +10,8 @@
         "Auction",
         "Others"
       ],
-      "exclusive": true
+      "exclusive": true,
+      "differentiateByDateBoundary": true
     },
     {
       "columnName": "SELLER_ID",