You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/17 02:43:11 UTC
kylin git commit: KYLIN-2283 bug fix
Repository: kylin
Updated Branches:
refs/heads/KYLIN-2283 8c1add806 -> 5957ee118
KYLIN-2283 bug fix
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/5957ee11
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/5957ee11
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/5957ee11
Branch: refs/heads/KYLIN-2283
Commit: 5957ee1181d5a021bab019fad96f24d32fd1227d
Parents: 8c1add8
Author: Yang Li <li...@apache.org>
Authored: Sat Dec 17 10:41:35 2016 +0800
Committer: Yang Li <li...@apache.org>
Committed: Sat Dec 17 10:41:35 2016 +0800
----------------------------------------------------------------------
.../kylin/source/datagen/ColumnGenConfig.java | 2 +
.../kylin/source/datagen/ColumnGenerator.java | 58 ++++---
.../source/datagen/ModelDataGenerator.java | 163 ++++++++++++++-----
.../kylin/source/datagen/TableGenConfig.java | 16 +-
.../org/apache/kylin/source/datagen/Util.java | 4 +-
.../kylin/source/datagen/DataGenTest.java | 21 ++-
.../table/DEFAULT.TEST_KYLIN_FACT.json | 2 +-
7 files changed, 195 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
index 3d04cf2..62da805 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
@@ -51,6 +51,7 @@ public class ColumnGenConfig {
boolean genNull;
double genNullPct;
boolean order;
+ boolean unique;
public ColumnGenConfig(ColumnDesc col, ModelDataGenerator modelGen) throws IOException {
init(col, modelGen);
@@ -83,6 +84,7 @@ public class ColumnGenConfig {
genNull = Util.parseBoolean(config, "null", guessGenNull(col.getName()));
genNullPct = Util.parseDouble(config, "nullpct", 0.01);
order = Util.parseBoolean(config, "order", false);
+ unique = Util.parseBoolean(config, "uniq", modelGen.isPK(col));
}
private int guessCardinality(String col) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
index fb7ec36..f171237 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
@@ -21,6 +21,7 @@ package org.apache.kylin.source.datagen;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@@ -68,8 +69,8 @@ public class ColumnGenerator {
result = new AddNullFilter(result, conf.genNullPct);
}
- if (conf.order) {
- result = new OrderFilter(result, targetRows);
+ if (conf.order || conf.unique) {
+ result = new OrderFilter(result, conf.unique, targetRows);
}
return result;
@@ -82,7 +83,7 @@ public class ColumnGenerator {
}
}
- private static class RandomGen extends Base {
+ private class RandomGen extends Base {
private DataType type;
private String format;
@@ -206,7 +207,7 @@ public class ColumnGenerator {
}
- private static class IDGen extends Base {
+ private class IDGen extends Base {
int next;
@@ -225,7 +226,7 @@ public class ColumnGenerator {
}
}
- private static class DiscreteGen extends Base {
+ private class DiscreteGen extends Base {
private List<String> values;
private Random rand;
@@ -254,7 +255,7 @@ public class ColumnGenerator {
}
}
- private static class CardinalityFilter extends Base {
+ private class CardinalityFilter extends Base {
private Iterator<String> input;
private int card;
@@ -286,7 +287,7 @@ public class ColumnGenerator {
}
}
- private static class AddNullFilter extends Base {
+ private class AddNullFilter extends Base {
private Iterator<String> input;
private double nullPct;
@@ -309,27 +310,38 @@ public class ColumnGenerator {
}
}
- private static class OrderFilter extends Base {
+ final private Comparator<String> comp = new Comparator<String>() {
+ @Override
+ public int compare(String s1, String s2) {
+ if (s1 == null) {
+ return s2 == null ? 0 : -1;
+ } else if (s2 == null) {
+ return 1;
+ } else {
+ if (targetCol.getType().isNumberFamily())
+ return Double.compare(Double.parseDouble(s1), Double.parseDouble(s2));
+ else
+ return s1.compareTo(s2);
+ }
+ }
+ };
+
+ private class OrderFilter extends Base {
private Iterator<String> iter;
- public OrderFilter(Iterator<String> input, int targetRows) {
- ArrayList<String> cache = new ArrayList<>(targetRows);
- for (int i = 0; i < targetRows; i++) {
+ public OrderFilter(Iterator<String> input, boolean unique, int targetRows) {
+ Collection<String> cache = unique ? new TreeSet<String>(comp) : new ArrayList<String>(targetRows);
+ int cap = targetRows * 100;
+ for (int i = 0; cache.size() < targetRows; i++) {
cache.add(input.next());
+ if (i >= cap)
+ throw new IllegalStateException();
+ }
+
+ if (cache instanceof List) {
+ Collections.sort((List<String>) cache, comp);
}
- Collections.sort(cache, new Comparator<String>() {
- @Override
- public int compare(String s1, String s2) {
- if (s1 == null) {
- return s2 == null ? 0 : -1;
- } else if (s2 == null) {
- return 1;
- } else {
- return s1.compareTo(s2);
- }
- }
- });
iter = cache.iterator();
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
index b85703c..1319528 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
@@ -28,14 +28,18 @@ import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.JoinDesc;
import org.apache.kylin.metadata.model.JoinTableDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TblColRef;
@@ -48,58 +52,55 @@ public class ModelDataGenerator {
final private int targetRows;
final private ResourceStore outputStore;
final private String outputPath;
-
+
boolean outprint = false; // for debug
-
+
public ModelDataGenerator(DataModelDesc model, int nRows) {
this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data");
}
-
+
public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) {
this.model = model;
this.targetRows = nRows;
this.outputStore = outputStore;
this.outputPath = outputPath;
}
-
+
public void generate() throws IOException {
Set<TableDesc> generated = new HashSet<>();
-
+ Set<TableDesc> allTableDesc = new LinkedHashSet<>();
+
JoinTableDesc[] allTables = model.getJoinTables();
for (int i = allTables.length - 1; i >= -1; i--) {
TableDesc table = (i == -1) ? model.getRootFactTable().getTableDesc() : allTables[i].getTableRef().getTableDesc();
+ allTableDesc.add(table);
+
if (generated.contains(table))
continue;
-
+
boolean gen = generateTable(table);
-
+
if (gen)
generated.add(table);
}
-
- generateDDL(generated);
+
+ generateDDL(allTableDesc);
}
private boolean generateTable(TableDesc table) throws IOException {
- TableGenConfig config = new TableGenConfig(table);
+ TableGenConfig config = new TableGenConfig(table, this);
if (!config.needGen)
return false;
-
+
ByteArrayOutputStream bout = new ByteArrayOutputStream();
PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
-
+
generateTableInternal(table, config, pout);
-
+
pout.close();
bout.close();
-
- byte[] content = bout.toByteArray();
- if (outprint) {
- System.out.println("Generated " + path(table));
- System.out.println(Bytes.toString(content));
- }
-
- outputStore.putResource(path(table), new ByteArrayInputStream(content), System.currentTimeMillis());
+
+ saveResource(bout.toByteArray(), path(table));
return true;
}
@@ -107,33 +108,115 @@ public class ModelDataGenerator {
ColumnDesc[] columns = table.getColumns();
ColumnGenerator[] colGens = new ColumnGenerator[columns.length];
Iterator<String>[] colIters = new Iterator[columns.length];
-
+
// config.rows is either a multiplier (0,1] or an absolute row number
int tableRows = (int) ((config.rows > 1) ? config.rows : targetRows * config.rows);
tableRows = Math.max(1, tableRows);
-
+
// same seed for all columns, to ensure composite FK columns generate correct pairs
long seed = System.currentTimeMillis();
-
+
for (int i = 0; i < columns.length; i++) {
colGens[i] = new ColumnGenerator(columns[i], tableRows, this);
colIters[i] = colGens[i].generate(seed);
}
-
+
for (int i = 0; i < tableRows; i++) {
for (int c = 0; c < columns.length; c++) {
if (c > 0)
out.print(",");
-
+
String v = colIters[c].next();
Preconditions.checkState(v == null || !v.contains(","));
-
+
out.print(v == null ? "\\N" : v); // \N is null for hive
}
out.print("\n");
}
}
+ private void generateDDL(Set<TableDesc> tables) throws IOException {
+
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
+
+ generateDatabaseDDL(tables, pout);
+ generateCreateTableDDL(tables, pout);
+ generateLoadDataDDL(tables, pout);
+
+ pout.close();
+ bout.close();
+
+ saveResource(bout.toByteArray(), path(model));
+ }
+
+ private void generateDatabaseDDL(Set<TableDesc> tables, PrintWriter out) {
+ Set<String> dbs = new HashSet<>();
+ for (TableDesc t : tables) {
+ String db = t.getDatabase();
+ if (StringUtils.isBlank(db) == false && "DEFAULT".equals(db) == false)
+ dbs.add(db);
+ }
+
+ for (String db : dbs) {
+ out.print("CREATE DATABASE IF NOT EXISTS " + db + ";\n");
+ }
+ out.print("\n");
+ }
+
+ private void generateCreateTableDDL(Set<TableDesc> tables, PrintWriter out) {
+ for (TableDesc t : tables) {
+ out.print("DROP TABLE IF EXISTS " + t.getIdentity() + ";\n");
+
+ out.print("CREATE TABLE " + t.getIdentity() + "(" + "\n");
+
+ for (int i = 0; i < t.getColumns().length; i++) {
+ ColumnDesc col = t.getColumns()[i];
+ out.print(" ");
+ if (i > 0) {
+ out.print(",");
+ }
+ out.print(col.getName() + " " + hiveType(col.getType()) + "\n");
+ }
+
+ out.print(")" + "\n");
+ out.print("ROW FORMAT DELIMITED FIELDS TERMINATED BY ','" + "\n");
+ out.print("STORED AS TEXTFILE" + ";\n");
+ out.print("\n");
+ }
+ }
+
+ private String hiveType(DataType type) {
+ String t = type.toString();
+ if (t.startsWith("varchar"))
+ return "string";
+ else if (t.startsWith("integer"))
+ return "int";
+ else
+ return t;
+ }
+
+ private void generateLoadDataDDL(Set<TableDesc> tables, PrintWriter out) {
+ for (TableDesc t : tables) {
+ out.print("LOAD DATA LOCAL INPATH '" + t.getIdentity() + ".csv' OVERWRITE INTO TABLE " + t.getIdentity() + ";\n");
+ }
+ }
+
+ public boolean existsInStore(TableDesc table) throws IOException {
+ return outputStore.exists(path(table));
+ }
+
+ public boolean isPK(ColumnDesc col) {
+ for (JoinTableDesc joinTable : model.getJoinTables()) {
+ JoinDesc join = joinTable.getJoin();
+ for (TblColRef pk : join.getPrimaryKeyColumns()) {
+ if (pk.getColumnDesc().equals(col))
+ return true;
+ }
+ }
+ return false;
+ }
+
public List<String> getPkValuesIfIsFk(ColumnDesc fk) throws IOException {
JoinTableDesc[] joinTables = model.getJoinTables();
for (int i = 0; i < joinTables.length; i++) {
@@ -141,7 +224,7 @@ public class ModelDataGenerator {
ColumnDesc pk = findPk(joinTable, fk);
if (pk == null)
continue;
-
+
List<String> pkValues = getPkValues(pk);
if (pkValues != null)
return pkValues;
@@ -157,15 +240,14 @@ public class ModelDataGenerator {
}
return null;
}
-
+
private List<String> getPkValues(ColumnDesc pk) throws IOException {
- String path = path(pk.getTable());
- if (outputStore.exists(path) == false)
+ if (existsInStore(pk.getTable()) == false)
return null;
List<String> r = new ArrayList<>();
-
- BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path).inputStream, "UTF-8"));
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path(pk.getTable())).inputStream, "UTF-8"));
try {
String line;
while ((line = in.readLine()) != null) {
@@ -177,15 +259,22 @@ public class ModelDataGenerator {
return r;
}
- private void generateDDL(Set<TableDesc> generated) {
- // TODO Auto-generated method stub
-
+ private void saveResource(byte[] content, String path) throws IOException {
+ if (outprint) {
+ System.out.println("Generated " + path);
+ System.out.println(Bytes.toString(content));
+ }
+ outputStore.putResource(path, new ByteArrayInputStream(content), System.currentTimeMillis());
}
private String path(TableDesc table) {
return outputPath + "/" + table.getIdentity() + ".csv";
}
-
+
+ private String path(DataModelDesc model) {
+ return outputPath + "/" + "ddl_" + model.getName() + ".sql";
+ }
+
public DataModelDesc getModle() {
return model;
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
index 1c00d3d..be948c1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
@@ -18,6 +18,7 @@
package org.apache.kylin.source.datagen;
+import java.io.IOException;
import java.util.Map;
import org.apache.kylin.metadata.model.TableDesc;
@@ -27,18 +28,21 @@ public class TableGenConfig {
boolean needGen;
double rows;
- public TableGenConfig(TableDesc table) {
- init(table.getDataGen());
- }
-
- private void init(String dataGen) {
+ public TableGenConfig(TableDesc table, ModelDataGenerator modelGen) throws IOException {
+ String dataGen = table.getDataGen();
+ if (dataGen == null && modelGen.existsInStore(table) == false) {
+ dataGen = "";
+ }
+
if (dataGen == null)
return;
needGen = true;
Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "rows");
- rows = Util.parseDouble(config, "rows", 1.0);
+
+ // config.rows is either a multiplier (0,1] or an absolute row number
+ rows = Util.parseDouble(config, "rows", modelGen.getModle().isFactTable(table.getIdentity()) ? 1.0 : 20);
}
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
index f2e8dbf..ca27bbf 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
@@ -21,12 +21,14 @@ package org.apache.kylin.source.datagen;
import java.util.LinkedHashMap;
import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
+
public class Util {
static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) {
Map<String, String> r = new LinkedHashMap<>();
- if (equalCommaPairs == null)
+ if (StringUtils.isBlank(equalCommaPairs))
return r;
for (String s : equalCommaPairs.split(",")) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
index 70aba04..82455ab 100644
--- a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java
@@ -41,12 +41,27 @@ public class DataGenTest extends LocalFileMetadataTestCase {
}
@Test
- public void testBasics() throws IOException {
- MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
- DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc");
+ public void testCIConfigured() throws IOException {
+ DataModelDesc model = getModel("test_kylin_inner_join_model_desc");
+ ModelDataGenerator gen = new ModelDataGenerator(model, 100);
+ gen.outprint = true;
+
+ gen.generate();
+ }
+
+ @Test
+ public void testSSBNoConfig() throws IOException {
+ DataModelDesc model = getModel("ssb");
ModelDataGenerator gen = new ModelDataGenerator(model, 100);
gen.outprint = true;
gen.generate();
}
+
+ private DataModelDesc getModel(String name) {
+ MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+ DataModelDesc model = mgr.getDataModelDesc(name);
+ return model;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/5957ee11/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
index 37d8e56..74eb045 100644
--- a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
+++ b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json
@@ -11,7 +11,7 @@
"id" : "2",
"name" : "CAL_DT",
"datatype" : "date",
- "data_gen" : "FK,order,null"
+ "data_gen" : "FK,order"
}, {
"id" : "3",
"name" : "LSTG_FORMAT_NAME",