You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/12/16 08:07:15 UTC
[14/14] kylin git commit: halfway
halfway
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e79626e7
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e79626e7
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e79626e7
Branch: refs/heads/KYLIN-2283
Commit: e79626e7c35a914d011f0425025f03e8fac1fbf9
Parents: f6208f8
Author: Yang Li <li...@apache.org>
Authored: Fri Dec 16 06:24:31 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Fri Dec 16 16:06:48 2016 +0800
----------------------------------------------------------------------
.../kylin/source/datagen/ColumnGenConfig.java | 30 ++-
.../kylin/source/datagen/ColumnGenerator.java | 202 ++++++++++++++++++-
2 files changed, 222 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/e79626e7/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
index 91f5366..f174eef 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java
@@ -19,30 +19,34 @@
package org.apache.kylin.source.datagen;
import java.util.Arrays;
-import java.util.Collections;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.kylin.metadata.model.ColumnDesc;
public class ColumnGenConfig {
+ public static final String $RANDOM = "${RANDOM}";
+
// discrete values
+ boolean isDiscrete;
List<String> values;
// random
boolean isRandom;
- int randCardinality;
+ String randFormat;
+ int randStart;
+ int randEnd;
// ID
boolean isID;
int idStart;
- // null handling
+ // general
+ int cardinality;
boolean genNull;
double genNullPct;
+ boolean order;
public ColumnGenConfig(ColumnDesc col) {
init(col.getName(), col.getDataGen());
@@ -53,17 +57,25 @@ public class ColumnGenConfig {
Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "values");
values = Arrays.asList(Util.parseString(config, "values", "").split("|"));
+ if (values.size() == 1 && values.get(0).isEmpty())
+ values.set(0, $RANDOM);
if ("ID".equals(values.get(0))) {
isID = true;
idStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0;
- } else {
+ } else if (values.get(0).contains($RANDOM)) {
isRandom = true;
- randCardinality = Util.parseInt(config, "cardinality", guessCardinality(col));
+ randFormat = values.get(0);
+ randStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0;
+ randEnd = (values.size() > 2) ? Integer.parseInt(values.get(2)) : 0;
+ } else {
+ isDiscrete = true;
}
- genNull = Util.parseBoolean(config, "genNull", guessGenNull(col));
- genNullPct = Util.parseDouble(config, "genNullPct", 0.01);
+ cardinality = Util.parseInt(config, "card", guessCardinality(col));
+ genNull = Util.parseBoolean(config, "null", guessGenNull(col));
+ genNullPct = Util.parseDouble(config, "nullPct", 0.01);
+ order = Util.parseBoolean(config, "order", false);
}
private int guessCardinality(String col) {
http://git-wip-us.apache.org/repos/asf/kylin/blob/e79626e7/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
index c4c4fe7..409a1e9 100644
--- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
+++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java
@@ -18,24 +18,224 @@
package org.apache.kylin.source.datagen;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeSet;
+import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.ColumnDesc;
public class ColumnGenerator {
+ final private ColumnGenConfig conf;
final private ColumnDesc targetCol;
final private int targetRows;
final private ModelDataGenerator modelGen;
public ColumnGenerator(ColumnDesc col, int nRows, ModelDataGenerator modelGen) {
+ this.conf = new ColumnGenConfig(col);
this.targetCol = col;
this.targetRows = nRows;
this.modelGen = modelGen;
}
public Iterator<String> generate() {
- return null;
+ Iterator<String> result;
+ if (conf.isID) {
+ result = new IDIter(conf.idStart);
+ } else if (conf.isRandom) {
+ result = new RandomIter(targetCol.getType(), conf.randFormat, conf.randStart, Math.max(conf.randEnd, conf.randStart + conf.cardinality));
+ } else {
+ result = new DiscreteIter(conf.values);
+ }
+
+ if (conf.cardinality > 0) {
+ result = new CardinalityIter(result, conf.cardinality);
+ }
+
+ if (conf.genNull) {
+ result = new AddNullIter(result, conf.genNullPct);
+ }
+
+ if (conf.order) {
+ result = new OrderIter(result, targetRows);
+ }
+
+ return result;
}
+ private static class RandomIter implements Iterator<String> {
+
+ public RandomIter(DataType type, String format, int randStart, int randEnd) {
+ // TODO Auto-generated constructor stub
+ }
+
+ @Override
+ public boolean hasNext() {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ @Override
+ public String next() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+ private static class IDIter implements Iterator<String> {
+
+ int next;
+
+ public IDIter(int start) {
+ next = start;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return true;
+ }
+
+ @Override
+ public String next() {
+ return "" + (next++);
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class DiscreteIter implements Iterator<String> {
+
+ private List<String> values;
+ private Random rand = new Random();
+
+ public DiscreteIter(List<String> values) {
+ this.values = values;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return true;
+ }
+
+ @Override
+ public String next() {
+ if (values.isEmpty())
+ return null;
+ else
+ return values.get(rand.nextInt(values.size()));
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class CardinalityIter implements Iterator<String> {
+
+ private Iterator<String> input;
+ private int card;
+ private TreeSet<String> cache;
+
+ public CardinalityIter(Iterator<String> input, int card) {
+ assert card > 0;
+ this.input = input;
+ this.card = card;
+ this.cache = new TreeSet<String>();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return input.hasNext();
+ }
+
+ @Override
+ public String next() {
+ String r = input.next();
+
+ if (cache.size() < card) {
+ cache.add(r);
+ return r;
+ }
+
+ r = cache.floor(r);
+ return r == null ? cache.first() : r;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class AddNullIter implements Iterator<String> {
+
+ private Iterator<String> input;
+ private double nullPct;
+ private Random rand = new Random();
+
+ public AddNullIter(Iterator<String> input, double nullPct) {
+ this.input = input;
+ this.nullPct = nullPct;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return true;
+ }
+
+ @Override
+ public String next() {
+ return rand.nextDouble() < nullPct || !input.hasNext() ? null : input.next();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class OrderIter implements Iterator<String> {
+
+ private Iterator<String> iter;
+
+ public OrderIter(Iterator<String> input, int targetRows) {
+ ArrayList<String> cache = new ArrayList<>(targetRows);
+ for (int i = 0; i < targetRows; i++) {
+ cache.add(input.next());
+ }
+ Collections.sort(cache);
+
+ iter = cache.iterator();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+
+ @Override
+ public String next() {
+ return iter.next();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
}