You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/16 08:07:14 UTC

[13/14] kylin git commit: halfway

halfway


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6208f8a
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6208f8a
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6208f8a

Branch: refs/heads/KYLIN-2283
Commit: f6208f8ae330cf8260de63909c43a4ea5760c184
Parents: f2377db
Author: Li Yang <li...@apache.org>
Authored: Thu Dec 15 18:51:00 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Dec 16 16:06:48 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/metadata/model/ColumnDesc.java |   8 ++
 .../apache/kylin/metadata/model/TableDesc.java  |   9 ++
 .../kylin/source/datagen/ColumnGenConfig.java   |  86 ++++++++++++
 .../kylin/source/datagen/ColumnGenerator.java   |  41 ++++++
 .../source/datagen/ModelDataGenerator.java      | 130 +++++++++++++++++++
 .../kylin/source/datagen/TableGenConfig.java    |  41 ++++++
 .../org/apache/kylin/source/datagen/Util.java   |  68 ++++++++++
 7 files changed, 383 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
index 2da1f5e..7105ede 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
@@ -45,6 +45,10 @@ public class ColumnDesc implements Serializable {
     @JsonInclude(JsonInclude.Include.NON_NULL)
     private String comment;
 
+    @JsonProperty("data_gen")
+    @JsonInclude(JsonInclude.Include.NON_NULL)
+    private String dataGen;
+
     // parsed from data type
     private DataType type;
     private DataType upgradedType;
@@ -148,6 +152,10 @@ public class ColumnDesc implements Serializable {
     public void setNullable(boolean nullable) {
         this.isNullable = nullable;
     }
+    
+    public String getDataGen() {
+        return dataGen;
+    }
 
     public void init(TableDesc table) {
         this.table = table;

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
index ab8c465..e845da1 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
@@ -26,6 +26,7 @@ import org.apache.kylin.common.persistence.RootPersistentEntity;
 import org.apache.kylin.common.util.StringSplitter;
 
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
@@ -47,6 +48,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware {
     private int sourceType = ISourceAware.ID_HIVE;
     @JsonProperty("table_type")
     private String tableType;
+    
+    @JsonProperty("data_gen")
+    @JsonInclude(JsonInclude.Include.NON_NULL)
+    private String dataGen;
 
     private DatabaseDesc database = new DatabaseDesc();
 
@@ -160,6 +165,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware {
         return getMaxColumnIndex() + 1;
     }
 
+    public String getDataGen() {
+        return dataGen;
+    }
+
     public void init() {
         if (name != null)
             name = name.toUpperCase();

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
new file mode 100644
index 0000000..91f5366
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.source.datagen;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.metadata.model.ColumnDesc;
+
+public class ColumnGenConfig {
+
+    // discrete values
+    List<String> values;
+    
+    // random
+    boolean isRandom;
+    int randCardinality;
+    
+    // ID
+    boolean isID;
+    int idStart;
+    
+    // null handling
+    boolean genNull;
+    double genNullPct;
+    
+    public ColumnGenConfig(ColumnDesc col) {
+        init(col.getName(), col.getDataGen());
+    }
+
+    private void init(String col, String dataGen) {
+        
+        Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "values");
+        
+        values = Arrays.asList(Util.parseString(config, "values", "").split("|"));
+        
+        if ("ID".equals(values.get(0))) {
+            isID = true;
+            idStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0;
+        } else {
+            isRandom = true;
+            randCardinality = Util.parseInt(config, "cardinality", guessCardinality(col));
+        }
+        
+        genNull = Util.parseBoolean(config, "genNull", guessGenNull(col));
+        genNullPct = Util.parseDouble(config, "genNullPct", 0.01);
+    }
+
+    private int guessCardinality(String col) {
+        for (String s : col.split("_")) {
+            if (s.startsWith("C")) {
+                try {
+                    return Integer.parseInt(s.substring(1));
+                } catch (Exception ex) {
+                    // ok
+                }
+            }
+        }
+        return 0;
+    }
+
+    private boolean guessGenNull(String col) {
+        return col.contains("_NULL");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
new file mode 100644
index 0000000..c4c4fe7
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.source.datagen;
+
+import java.util.Iterator;
+
+import org.apache.kylin.metadata.model.ColumnDesc;
+
+public class ColumnGenerator {
+
+    final private ColumnDesc targetCol;
+    final private int targetRows;
+    final private ModelDataGenerator modelGen;
+
+    public ColumnGenerator(ColumnDesc col, int nRows, ModelDataGenerator modelGen) {
+        this.targetCol = col;
+        this.targetRows = nRows;
+        this.modelGen = modelGen;
+    }
+    
+    public Iterator<String> generate() {
+        return null;
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
new file mode 100644
index 0000000..0f67377
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.source.datagen;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.kylin.common.persistence.ResourceStore;
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.JoinTableDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+
+import com.google.common.base.Preconditions;
+
+public class ModelDataGenerator {
+
+    final private DataModelDesc model;
+    final private int targetRows;
+    final private ResourceStore outputStore;
+    final private String outputPath;
+    
+    public ModelDataGenerator(DataModelDesc model, int nRows) {
+        this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data");
+    }
+    
+    public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) {
+        this.model = model;
+        this.targetRows = nRows;
+        this.outputStore = outputStore;
+        this.outputPath = outputPath;
+    }
+    
+    public void generate() throws IOException {
+        Set<TableDesc> generated = new HashSet<>();
+        
+        JoinTableDesc[] allTables = model.getJoinTables();
+        for (int i = allTables.length - 1; i >= 0; i--) {
+            TableDesc table = allTables[i].getTableRef().getTableDesc();
+            if (generated.contains(table))
+                continue;
+            
+            boolean gen = generateTable(table);
+            
+            if (gen)
+                generated.add(table);
+        }
+        
+        generateDDL(generated);
+    }
+
+    private boolean generateTable(TableDesc table) throws IOException {
+        TableGenConfig config = new TableGenConfig(table);
+        if (!config.needGen)
+            return false;
+        
+        ByteArrayOutputStream bout = new ByteArrayOutputStream();
+        PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8"));
+        
+        generateTableInternal(table, config, pout);
+        
+        pout.close();
+        bout.close();
+        
+        outputStore.putResource(path(table), new ByteArrayInputStream(bout.toByteArray()), System.currentTimeMillis());
+        return true;
+    }
+
+    private void generateTableInternal(TableDesc table, TableGenConfig config, PrintWriter out) {
+        ColumnDesc[] columns = table.getColumns();
+        ColumnGenerator[] colGens = new ColumnGenerator[columns.length];
+        Iterator<String>[] colIters = new Iterator[columns.length];
+        
+        int tableRows = (int) (targetRows * config.scaleFactor);
+        tableRows = Math.max(1, tableRows);
+        
+        for (int i = 0; i < columns.length; i++) {
+            colGens[i] = new ColumnGenerator(columns[i], tableRows, this);
+            colIters[i] = colGens[i].generate();
+        }
+        
+        for (int i = 0; i < tableRows; i++) {
+            for (int c = 0; c < columns.length; c++) {
+                if (c > 0)
+                    out.print(",");
+                
+                String v = colIters[c].next();
+                Preconditions.checkState(v.contains(",") == false);
+                
+                out.print(v);
+            }
+            out.print("\n");
+        }
+    }
+
+    private void generateDDL(Set<TableDesc> generated) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    private String path(TableDesc table) {
+        return outputPath + "/" + table.getIdentity() + ".csv";
+    }
+    
+    public DataModelDesc getModle() {
+        return model;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
new file mode 100644
index 0000000..9e6fa73
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.source.datagen;
+
+import java.util.Map;
+
+import org.apache.kylin.metadata.model.TableDesc;
+
+public class TableGenConfig {
+    
+    boolean needGen;
+    double scaleFactor;
+    
+    public TableGenConfig(TableDesc table) {
+        init(table.getDataGen());
+    }
+
+    private void init(String dataGen) {
+        needGen = !dataGen.isEmpty();
+        
+        Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "scaleFactor");
+        scaleFactor = Util.parseDouble(config, "scaleFactor", 1.0);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
new file mode 100644
index 0000000..4b87ee2
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.source.datagen;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class Util {
+
+    static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) {
+        Map<String, String> r = new LinkedHashMap<>();
+        for (String s : equalCommaPairs.split(",")) {
+            int equal = s.indexOf("=");
+            if (equal < 0) {
+                if (r.containsKey(defaultKey))
+                    throw new IllegalStateException();
+                r.put(defaultKey, s.trim());
+            } else {
+                r.put(s.substring(0, equal).trim(), s.substring(equal + 1).trim());
+            }
+        }
+        return r;
+    }
+
+    static double parseDouble(Map<String, String> config, String key, double dft) {
+        if (config.containsKey(key))
+            return Double.parseDouble(config.get(key));
+        else
+            return dft;
+    }
+
+    static boolean parseBoolean(Map<String, String> config, String key, boolean dft) {
+        if (config.containsKey(key))
+            return Boolean.parseBoolean(config.get(key));
+        else
+            return dft;
+    }
+
+    public static int parseInt(Map<String, String> config, String key, int dft) {
+        if (config.containsKey(key))
+            return Integer.parseInt(config.get(key));
+        else
+            return dft;
+    }
+
+    public static String parseString(Map<String, String> config, String key, String dft) {
+        if (config.containsKey(key))
+            return config.get(key);
+        else
+            return dft;
+    }
+}