You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/17 02:43:11 UTC

kylin git commit: KYLIN-2283 bug fix

Repository: kylin
Updated Branches:
  refs/heads/KYLIN-2283 8c1add806 -> 5957ee118


KYLIN-2283 bug fix


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5957ee11
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5957ee11
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5957ee11

Branch: refs/heads/KYLIN-2283
Commit: 5957ee1181d5a021bab019fad96f24d32fd1227d
Parents: 8c1add8
Author: Yang Li <li...@apache.org>
Authored: Sat Dec 17 10:41:35 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Sat Dec 17 10:41:35 2016 +0800

----------------------------------------------------------------------
 .../kylin/source/datagen/ColumnGenConfig.java   |   2 +
 .../kylin/source/datagen/ColumnGenerator.java   |  58 ++++---
 .../source/datagen/ModelDataGenerator.java      | 163 ++++++++++++++-----
 .../kylin/source/datagen/TableGenConfig.java    |  16 +-
 .../org/apache/kylin/source/datagen/Util.java   |   4 +-
 .../kylin/source/datagen/DataGenTest.java       |  21 ++-
 .../table/DEFAULT.TEST_KYLIN_FACT.json          |   2 +-
 7 files changed, 195 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
index 3d04cf2..62da805 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
@@ -51,6 +51,7 @@ public class ColumnGenConfig {
     boolean genNull;
     double genNullPct;
     boolean order;
+    boolean unique;
     
     public ColumnGenConfig(ColumnDesc col, ModelDataGenerator modelGen) throws IOException {
         init(col, modelGen);
@@ -83,6 +84,7 @@ public class ColumnGenConfig {
         genNull = Util.parseBoolean(config, "null", guessGenNull(col.getName()));
         genNullPct = Util.parseDouble(config, "nullpct", 0.01);
         order = Util.parseBoolean(config, "order", false);
+        unique = Util.parseBoolean(config, "uniq", modelGen.isPK(col));
     }
 
     private int guessCardinality(String col) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
index fb7ec36..f171237 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
@@ -21,6 +21,7 @@ package org.apache.kylin.source.datagen;
 import java.io.IOException;
 import java.text.DecimalFormat;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
@@ -68,8 +69,8 @@ public class ColumnGenerator {
             result = new AddNullFilter(result, conf.genNullPct);
         }
 
-        if (conf.order) {
-            result = new OrderFilter(result, targetRows);
+        if (conf.order || conf.unique) {
+            result = new OrderFilter(result, conf.unique, targetRows);
         }
 
         return result;
@@ -82,7 +83,7 @@ public class ColumnGenerator {
         }
     }
 
-    private static class RandomGen extends Base {
+    private class RandomGen extends Base {
 
         private DataType type;
         private String format;
@@ -206,7 +207,7 @@ public class ColumnGenerator {
 
     }
 
-    private static class IDGen extends Base {
+    private class IDGen extends Base {
 
         int next;
 
@@ -225,7 +226,7 @@ public class ColumnGenerator {
         }
     }
 
-    private static class DiscreteGen extends Base {
+    private class DiscreteGen extends Base {
 
         private List<String> values;
         private Random rand;
@@ -254,7 +255,7 @@ public class ColumnGenerator {
         }
     }
 
-    private static class CardinalityFilter extends Base {
+    private class CardinalityFilter extends Base {
 
         private Iterator<String> input;
         private int card;
@@ -286,7 +287,7 @@ public class ColumnGenerator {
         }
     }
 
-    private static class AddNullFilter extends Base {
+    private class AddNullFilter extends Base {
 
         private Iterator<String> input;
         private double nullPct;
@@ -309,27 +310,38 @@ public class ColumnGenerator {
         }
     }
 
-    private static class OrderFilter extends Base {
+    final private Comparator<String> comp = new Comparator<String>() {
+        @Override
+        public int compare(String s1, String s2) {
+            if (s1 == null) {
+                return s2 == null ? 0 : -1;
+            } else if (s2 == null) {
+                return 1;
+            } else {
+                if (targetCol.getType().isNumberFamily())
+                    return Double.compare(Double.parseDouble(s1), Double.parseDouble(s2));
+                else
+                    return s1.compareTo(s2);
+            }
+        }
+    };
+
+    private class OrderFilter extends Base {
 
         private Iterator<String> iter;
 
-        public OrderFilter(Iterator<String> input, int targetRows) {
-            ArrayList<String> cache = new ArrayList<>(targetRows);
-            for (int i = 0; i < targetRows; i++) {
+        public OrderFilter(Iterator<String> input, boolean unique, int targetRows) {
+            Collection<String> cache = unique ? new TreeSet<String>(comp) : new ArrayList<String>(targetRows);
+            int cap = targetRows * 100;
+            for (int i = 0; cache.size() < targetRows; i++) {
                 cache.add(input.next());
+                if (i >= cap)
+                    throw new IllegalStateException();
+            }
+
+            if (cache instanceof List) {
+                Collections.sort((List<String>) cache, comp);
             }
-            Collections.sort(cache, new Comparator<String>() {
-                @Override
-                public int compare(String s1, String s2) {
-                    if (s1 == null) {
-                        return s2 == null ? 0 : -1;
-                    } else if (s2 == null) {
-                        return 1;
-                    } else {
-                        return s1.compareTo(s2);
-                    }
-                }
-            });
 
             iter = cache.iterator();
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
index b85703c..1319528 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
@@ -28,14 +28,18 @@ import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.JoinDesc;
 import org.apache.kylin.metadata.model.JoinTableDesc;
 import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TblColRef;
@@ -48,58 +52,55 @@ public class ModelDataGenerator {
     final private int targetRows;
     final private ResourceStore outputStore;
     final private String outputPath;
-    
+
     boolean outprint = false; // for debug
-    
+
     public ModelDataGenerator(DataModelDesc model, int nRows) {
         this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data");
     }
-    
+
     public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) {
         this.model = model;
         this.targetRows = nRows;
         this.outputStore = outputStore;
         this.outputPath = outputPath;
     }
-    
+
     public void generate() throws IOException {
         Set<TableDesc> generated = new HashSet<>();
-        
+        Set<TableDesc> allTableDesc = new LinkedHashSet<>();
+
         JoinTableDesc[] allTables = model.getJoinTables();
         for (int i = allTables.length - 1; i >= -1; i--) {
             TableDesc table = (i == -1) ? model.getRootFactTable().getTableDesc() : allTables[i].getTableRef().getTableDesc();
+            allTableDesc.add(table);
+            
             if (generated.contains(table))
                 continue;
-            
+
             boolean gen = generateTable(table);
-            
+
             if (gen)
                 generated.add(table);
         }
-        
-        generateDDL(generated);
+
+        generateDDL(allTableDesc);
     }
 
     private boolean generateTable(TableDesc table) throws IOException {
-        TableGenConfig config = new TableGenConfig(table);
+        TableGenConfig config = new TableGenConfig(table, this);
         if (!config.needGen)
             return false;
-        
+
         ByteArrayOutputStream bout = new ByteArrayOutputStream();
         PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
-        
+
         generateTableInternal(table, config, pout);
-        
+
         pout.close();
         bout.close();
-        
-        byte[] content = bout.toByteArray();
-        if (outprint) {
-            System.out.println("Generated " + path(table));
-            System.out.println(Bytes.toString(content));
-        }
-        
-        outputStore.putResource(path(table), new ByteArrayInputStream(content), System.currentTimeMillis());
+
+        saveResource(bout.toByteArray(), path(table));
         return true;
     }
 
@@ -107,33 +108,115 @@ public class ModelDataGenerator {
         ColumnDesc[] columns = table.getColumns();
         ColumnGenerator[] colGens = new ColumnGenerator[columns.length];
         Iterator<String>[] colIters = new Iterator[columns.length];
-        
+
         // config.rows is either a multiplier (0,1] or an absolute row number
         int tableRows = (int) ((config.rows > 1) ? config.rows : targetRows * config.rows);
         tableRows = Math.max(1, tableRows);
-        
+
         // same seed for all columns, to ensure composite FK columns generate correct pairs
         long seed = System.currentTimeMillis();
-        
+
         for (int i = 0; i < columns.length; i++) {
             colGens[i] = new ColumnGenerator(columns[i], tableRows, this);
             colIters[i] = colGens[i].generate(seed);
         }
-        
+
         for (int i = 0; i < tableRows; i++) {
             for (int c = 0; c < columns.length; c++) {
                 if (c > 0)
                     out.print(",");
-                
+
                 String v = colIters[c].next();
                 Preconditions.checkState(v == null || !v.contains(","));
-                
+
                 out.print(v == null ? "\\N" : v); // \N is null for hive
             }
             out.print("\n");
         }
     }
 
+    private void generateDDL(Set<TableDesc> tables) throws IOException {
+
+        ByteArrayOutputStream bout = new ByteArrayOutputStream();
+        PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
+
+        generateDatabaseDDL(tables, pout);
+        generateCreateTableDDL(tables, pout);
+        generateLoadDataDDL(tables, pout);
+
+        pout.close();
+        bout.close();
+
+        saveResource(bout.toByteArray(), path(model));
+    }
+
+    private void generateDatabaseDDL(Set<TableDesc> tables, PrintWriter out) {
+        Set<String> dbs = new HashSet<>();
+        for (TableDesc t : tables) {
+            String db = t.getDatabase();
+            if (StringUtils.isBlank(db) == false && "DEFAULT".equals(db) == false)
+                dbs.add(db);
+        }
+
+        for (String db : dbs) {
+            out.print("CREATE DATABASE IF NOT EXISTS " + db + ";\n");
+        }
+        out.print("\n");
+    }
+
+    private void generateCreateTableDDL(Set<TableDesc> tables, PrintWriter out) {
+        for (TableDesc t : tables) {
+            out.print("DROP TABLE IF EXISTS " + t.getIdentity() + ";\n");
+
+            out.print("CREATE TABLE " + t.getIdentity() + "(" + "\n");
+
+            for (int i = 0; i < t.getColumns().length; i++) {
+                ColumnDesc col = t.getColumns()[i];
+                out.print("    ");
+                if (i > 0) {
+                    out.print(",");
+                }
+                out.print(col.getName() + " " + hiveType(col.getType()) + "\n");
+            }
+
+            out.print(")" + "\n");
+            out.print("ROW FORMAT DELIMITED FIELDS TERMINATED BY ','" + "\n");
+            out.print("STORED AS TEXTFILE" + ";\n");
+            out.print("\n");
+        }
+    }
+
+    private String hiveType(DataType type) {
+        String t = type.toString();
+        if (t.startsWith("varchar"))
+            return "string";
+        else if (t.startsWith("integer"))
+            return "int";
+        else
+            return t;
+    }
+
+    private void generateLoadDataDDL(Set<TableDesc> tables, PrintWriter out) {
+        for (TableDesc t : tables) {
+            out.print("LOAD DATA LOCAL INPATH '" + t.getIdentity() + ".csv' OVERWRITE INTO TABLE " + t.getIdentity() + ";\n");
+        }
+    }
+
+    public boolean existsInStore(TableDesc table) throws IOException {
+        return outputStore.exists(path(table));
+    }
+    
+    public boolean isPK(ColumnDesc col) {
+        for (JoinTableDesc joinTable : model.getJoinTables()) {
+            JoinDesc join = joinTable.getJoin();
+            for (TblColRef pk : join.getPrimaryKeyColumns()) {
+                if (pk.getColumnDesc().equals(col))
+                    return true;
+            }
+        }
+        return false;
+    }
+    
     public List<String> getPkValuesIfIsFk(ColumnDesc fk) throws IOException {
         JoinTableDesc[] joinTables = model.getJoinTables();
         for (int i = 0; i < joinTables.length; i++) {
@@ -141,7 +224,7 @@ public class ModelDataGenerator {
             ColumnDesc pk = findPk(joinTable, fk);
             if (pk == null)
                 continue;
-            
+
             List<String> pkValues = getPkValues(pk);
             if (pkValues != null)
                 return pkValues;
@@ -157,15 +240,14 @@ public class ModelDataGenerator {
         }
         return null;
     }
-    
+
     private List<String> getPkValues(ColumnDesc pk) throws IOException {
-        String path = path(pk.getTable());
-        if (outputStore.exists(path) == false)
+        if (existsInStore(pk.getTable()) == false)
             return null;
 
         List<String> r = new ArrayList<>();
-        
-        BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path).inputStream, "UTF-8"));
+
+        BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path(pk.getTable())).inputStream, "UTF-8"));
         try {
             String line;
             while ((line = in.readLine()) != null) {
@@ -177,15 +259,22 @@ public class ModelDataGenerator {
         return r;
     }
 
-    private void generateDDL(Set<TableDesc> generated) {
-        // TODO Auto-generated method stub
-        
+    private void saveResource(byte[] content, String path) throws IOException {
+        if (outprint) {
+            System.out.println("Generated " + path);
+            System.out.println(Bytes.toString(content));
+        }
+        outputStore.putResource(path, new ByteArrayInputStream(content), System.currentTimeMillis());
     }
 
     private String path(TableDesc table) {
         return outputPath + "/" + table.getIdentity() + ".csv";
     }
-    
+
+    private String path(DataModelDesc model) {
+        return outputPath + "/" + "ddl_" + model.getName() + ".sql";
+    }
+
     public DataModelDesc getModle() {
         return model;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
index 1c00d3d..be948c1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
@@ -18,6 +18,7 @@
 
 package org.apache.kylin.source.datagen;
 
+import java.io.IOException;
 import java.util.Map;
 
 import org.apache.kylin.metadata.model.TableDesc;
@@ -27,18 +28,21 @@ public class TableGenConfig {
     boolean needGen;
     double rows;
     
-    public TableGenConfig(TableDesc table) {
-        init(table.getDataGen());
-    }
-
-    private void init(String dataGen) {
+    public TableGenConfig(TableDesc table, ModelDataGenerator modelGen) throws IOException {
+        String dataGen = table.getDataGen();
+        if (dataGen == null && modelGen.existsInStore(table) == false) {
+            dataGen = "";
+        }
+        
         if (dataGen == null)
             return;
         
         needGen = true;
         
         Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "rows");
-        rows = Util.parseDouble(config, "rows", 1.0);
+        
+        // config.rows is either a multiplier (0,1] or an absolute row number
+        rows = Util.parseDouble(config, "rows", modelGen.getModle().isFactTable(table.getIdentity()) ? 1.0 : 20);
     }
     
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
index f2e8dbf..ca27bbf 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
@@ -21,12 +21,14 @@ package org.apache.kylin.source.datagen;
 import java.util.LinkedHashMap;
 import java.util.Map;
 
+import org.apache.commons.lang3.StringUtils;
+
 public class Util {
 
     static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) {
         Map<String, String> r = new LinkedHashMap<>();
         
-        if (equalCommaPairs == null)
+        if (StringUtils.isBlank(equalCommaPairs))
             return r;
 
         for (String s : equalCommaPairs.split(",")) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
index 70aba04..82455ab 100644
--- a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
@@ -41,12 +41,27 @@ public class DataGenTest extends LocalFileMetadataTestCase {
     }
 
     @Test
-    public void testBasics() throws IOException {
-        MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
-        DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc");
+    public void testCIConfigured() throws IOException {
+        DataModelDesc model = getModel("test_kylin_inner_join_model_desc");
+        ModelDataGenerator gen = new ModelDataGenerator(model, 100);
+        gen.outprint = true;
+        
+        gen.generate();
+    }
+
+    @Test
+    public void testSSBNoConfig() throws IOException {
+        DataModelDesc model = getModel("ssb");
         ModelDataGenerator gen = new ModelDataGenerator(model, 100);
         gen.outprint = true;
         
         gen.generate();
     }
+
+    private DataModelDesc getModel(String name) {
+        MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+        DataModelDesc model = mgr.getDataModelDesc(name);
+        return model;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
index 37d8e56..74eb045 100644
--- a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
+++ b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
@@ -11,7 +11,7 @@
     "id" : "2",
     "name" : "CAL_DT",
     "datatype" : "date",
-    "data_gen" : "FK,order,null"
+    "data_gen" : "FK,order"
   }, {
     "id" : "3",
     "name" : "LSTG_FORMAT_NAME",