You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/03/28 01:04:38 UTC
[01/50] incubator-kylin git commit: KYLIN-653 add a special
IIKeyValueCodecs and refactor FactDistinctColumnsMapper
Repository: incubator-kylin
Updated Branches:
refs/heads/streaming-localdict cc1fed44d -> 1ad301044
KYLIN-653 add a special IIKeyValueCodecs and refactor FactDistinctColumnsMapper
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8e0695b2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8e0695b2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8e0695b2
Branch: refs/heads/streaming-localdict
Commit: 8e0695b26517d08675d7417c40ee773561c9e3cf
Parents: 9dd1512
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 15:16:41 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 15:17:32 2015 +0800
----------------------------------------------------------------------
.../invertedindex/model/IIKeyValueCodec.java | 91 ++++++++-------
.../model/IIKeyValueCodecWithState.java | 68 +++++++++++
.../hadoop/cube/FactDistinctColumnsMapper.java | 115 ++++---------------
.../cube/FactDistinctColumnsMapperBase.java | 81 +++++++++++++
4 files changed, 216 insertions(+), 139 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
index eedda4b..d9e20c4 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
@@ -37,42 +37,40 @@ import java.util.*;
*/
public class IIKeyValueCodec implements KeyValueCodec {
- public static final int SHARD_LEN = 2;
- public static final int TIMEPART_LEN = 8;
- public static final int COLNO_LEN = 2;
- private final TableRecordInfoDigest digest;
+ public static final int SHARD_LEN = 2;
+ public static final int TIMEPART_LEN = 8;
+ public static final int COLNO_LEN = 2;
+ protected final TableRecordInfoDigest digest;
public IIKeyValueCodec(TableRecordInfoDigest digest) {
this.digest = digest;
- }
+ }
@Override
- public Collection<IIRow> encodeKeyValue(Slice slice) {
- ArrayList<IIRow> result = Lists
- .newArrayList();
- ColumnValueContainer[] containers = slice.getColumnValueContainers();
- for (int col = 0; col < containers.length; col++) {
- if (containers[col] instanceof CompressedValueContainer) {
+ public Collection<IIRow> encodeKeyValue(Slice slice) {
+ ArrayList<IIRow> result = Lists.newArrayList();
+ ColumnValueContainer[] containers = slice.getColumnValueContainers();
+ for (int col = 0; col < containers.length; col++) {
+ if (containers[col] instanceof CompressedValueContainer) {
final IIRow row = collectKeyValues(slice, col, (CompressedValueContainer) containers[col]);
result.add(row);
} else {
- throw new IllegalArgumentException("Unknown container class "
- + containers[col].getClass());
+ throw new IllegalArgumentException("Unknown container class " + containers[col].getClass());
}
}
- return result;
- }
+ return result;
+ }
- private IIRow collectKeyValues(Slice slice, int col, CompressedValueContainer container) {
- ImmutableBytesWritable key = encodeKey(slice.getShard(), slice.getTimestamp(), col);
- ImmutableBytesWritable value = container.toBytes();
+ private IIRow collectKeyValues(Slice slice, int col, CompressedValueContainer container) {
+ ImmutableBytesWritable key = encodeKey(slice.getShard(), slice.getTimestamp(), col);
+ ImmutableBytesWritable value = container.toBytes();
final Dictionary<?> dictionary = slice.getLocalDictionaries().get(col);
if (dictionary == null) {
return new IIRow(key, value, new ImmutableBytesWritable(BytesUtil.EMPTY_BYTE_ARRAY));
} else {
return new IIRow(key, value, serialize(dictionary));
}
- }
+ }
private static Dictionary<?> deserialize(ImmutableBytesWritable dictBytes) {
try {
@@ -98,31 +96,31 @@ public class IIKeyValueCodec implements KeyValueCodec {
}
}
- ImmutableBytesWritable encodeKey(short shard, long timestamp, int col) {
- byte[] bytes = new byte[20];
- int len = encodeKey(shard, timestamp, col, bytes, 0);
- return new ImmutableBytesWritable(bytes, 0, len);
- }
+ ImmutableBytesWritable encodeKey(short shard, long timestamp, int col) {
+ byte[] bytes = new byte[20];
+ int len = encodeKey(shard, timestamp, col, bytes, 0);
+ return new ImmutableBytesWritable(bytes, 0, len);
+ }
- int encodeKey(short shard, long timestamp, int col, byte[] buf, int offset) {
- int i = offset;
+ int encodeKey(short shard, long timestamp, int col, byte[] buf, int offset) {
+ int i = offset;
- BytesUtil.writeUnsigned(shard, buf, i, SHARD_LEN);
- i += SHARD_LEN;
- BytesUtil.writeLong(timestamp, buf, i, TIMEPART_LEN);
- i += TIMEPART_LEN;
+ BytesUtil.writeUnsigned(shard, buf, i, SHARD_LEN);
+ i += SHARD_LEN;
+ BytesUtil.writeLong(timestamp, buf, i, TIMEPART_LEN);
+ i += TIMEPART_LEN;
- BytesUtil.writeUnsigned(col, buf, i, COLNO_LEN);
- i += COLNO_LEN;
+ BytesUtil.writeUnsigned(col, buf, i, COLNO_LEN);
+ i += COLNO_LEN;
- return i - offset;
- }
+ return i - offset;
+ }
@Override
- public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+ public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
return new IIRowDecoder(digest, kvs.iterator());
-// return new Decoder(kvs, incompleteDigest);
- }
+ // return new Decoder(kvs, incompleteDigest);
+ }
private static TableRecordInfoDigest createDigest(int nColumns, boolean[] isMetric, String[] dataTypes, Map<Integer, Dictionary<?>> dictionaryMap) {
int[] dictMaxIds = new int[nColumns];
@@ -152,14 +150,16 @@ public class IIKeyValueCodec implements KeyValueCodec {
return new TableRecordInfoDigest(nColumns, byteFormLen, offsets, dictMaxIds, lengths, isMetric, dataTypes);
}
- private static class IIRowDecoder implements Iterable<Slice> {
+ protected static class IIRowDecoder implements Iterable<Slice> {
- private final TableRecordInfoDigest incompleteDigest;
- private final Iterator<IIRow> iterator;
+ protected final TableRecordInfoDigest incompleteDigest;
+ protected final Iterator<IIRow> iiRowIterator;
+ protected Iterator<IIRow> feedingIterator;//this is for extending
- private IIRowDecoder(TableRecordInfoDigest digest, Iterator<IIRow> iterator) {
+ protected IIRowDecoder(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
this.incompleteDigest = digest;
- this.iterator = iterator;
+ this.iiRowIterator = iiRowIterator;
+ this.feedingIterator = this.iiRowIterator;
}
@Override
@@ -167,7 +167,7 @@ public class IIKeyValueCodec implements KeyValueCodec {
return new Iterator<Slice>() {
@Override
public boolean hasNext() {
- return iterator.hasNext();
+ return iiRowIterator.hasNext();
}
@Override
@@ -181,8 +181,8 @@ public class IIKeyValueCodec implements KeyValueCodec {
short lastShard = 0;
long lastTimestamp = 0;
- while (iterator.hasNext() && columns < incompleteDigest.getColumnCount()) {
- final IIRow row = iterator.next();
+ while (feedingIterator.hasNext() && columns < incompleteDigest.getColumnCount()) {
+ final IIRow row = feedingIterator.next();
final ImmutableBytesWritable key = row.getKey();
int i = key.getOffset();
curShard = (short) BytesUtil.readUnsigned(key.get(), i, SHARD_LEN);
@@ -220,7 +220,6 @@ public class IIKeyValueCodec implements KeyValueCodec {
return slice;
}
-
@Override
public void remove() {
throw new UnsupportedOperationException();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
new file mode 100644
index 0000000..a8e149a
--- /dev/null
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -0,0 +1,68 @@
+package org.apache.kylin.invertedindex.model;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import com.google.common.base.Preconditions;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class IIKeyValueCodecWithState extends IIKeyValueCodec {
+
+ public IIKeyValueCodecWithState(TableRecordInfoDigest digest) {
+ super(digest);
+ }
+
+ @Override
+ public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+ return new IIRowDecoderWithState(digest, kvs.iterator());
+ }
+
+ protected static class IIRowDecoderWithState extends IIRowDecoder {
+
+ final ArrayList<IIRow> buffer = Lists.newArrayList();
+
+ private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
+ super(digest, iiRowIterator);
+ this.feedingIterator = buffer.iterator();
+ }
+
+ private Iterator<Slice> getSuperIterator() {
+ return super.iterator();
+ }
+
+ @Override
+ public Iterator<Slice> iterator() {
+ return new Iterator<Slice>() {
+ @Override
+ public boolean hasNext() {
+ while (buffer.size() < incompleteDigest.getColumnCount() && iiRowIterator.hasNext()) {
+ buffer.add(iiRowIterator.next());
+ }
+ return buffer.size() == incompleteDigest.getColumnCount();
+ }
+
+ @Override
+ public Slice next() {
+ while (buffer.size() < incompleteDigest.getColumnCount() && iiRowIterator.hasNext()) {
+ buffer.add(iiRowIterator.next());
+ }
+ Preconditions.checkArgument(buffer.size() == incompleteDigest.getColumnCount(), "not enough IIRows!");
+ Slice ret = IIRowDecoderWithState.this.getSuperIterator().next();
+ buffer.clear();
+ return ret;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
index d36fb95..3a50249 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
@@ -18,108 +18,53 @@
package org.apache.kylin.job.hadoop.cube;
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import java.util.Set;
-import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.ShortWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
-
-import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.common.mr.KylinMapper;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.cube.model.RowKeyDesc;
-import org.apache.kylin.dict.DictionaryManager;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
import org.apache.kylin.dict.lookup.HiveTableReader;
import org.apache.kylin.job.constant.BatchConstants;
-import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Lists;
/**
* @author yangli9
*/
-public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, ShortWritable, Text> {
-
- private String cubeName;
- private CubeInstance cube;
- private CubeDesc cubeDesc;
- private int[] factDictCols;
+public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
+ private HCatSchema schema = null;
private CubeJoinedFlatTableDesc intermediateTableDesc;
- private ShortWritable outputKey = new ShortWritable();
- private Text outputValue = new Text();
- private int errorRecordCounter;
-
- private HCatSchema schema = null;
- private CuboidScheduler cuboidScheduler = null;
- private List<String> rowKeyValues = null;
- private HyperLogLogPlusCounter hll;
- private long baseCuboidId;
- private int nRowKey;
- private boolean collectStatistics = false;
+ protected boolean collectStatistics = false;
+ protected CuboidScheduler cuboidScheduler = null;
+ protected List<String> rowKeyValues = null;
+ protected HyperLogLogPlusCounter hll;
+ protected int nRowKey;
@Override
protected void setup(Context context) throws IOException {
- super.publishConfiguration(context.getConfiguration());
-
- Configuration conf = context.getConfiguration();
+ super.setup(context);
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
- cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
- collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
- cube = CubeManager.getInstance(config).getCube(cubeName);
- cubeDesc = cube.getDescriptor();
+ schema = HCatInputFormat.getTableSchema(context.getConfiguration());
intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
- cuboidScheduler = new CuboidScheduler(cubeDesc);
-
- baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
- Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
- List<TblColRef> columns = baseCuboid.getColumns();
-
- ArrayList<Integer> factDictCols = new ArrayList<Integer>();
- RowKeyDesc rowkey = cubeDesc.getRowkey();
- DictionaryManager dictMgr = DictionaryManager.getInstance(config);
- for (int i = 0; i < columns.size(); i++) {
- TblColRef col = columns.get(i);
- if (rowkey.isUseDictionary(col) == false)
- continue;
-
- String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
- if (cubeDesc.getModel().isFactTable(scanTable)) {
- factDictCols.add(i);
- }
- }
- this.factDictCols = new int[factDictCols.size()];
- for (int i = 0; i < factDictCols.size(); i++)
- this.factDictCols[i] = factDictCols.get(i);
- schema = HCatInputFormat.getTableSchema(context.getConfiguration());
- rowKeyValues = Lists.newArrayList();
- nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
- if(collectStatistics) {
+ collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
+ if (collectStatistics) {
+ cuboidScheduler = new CuboidScheduler(cubeDesc);
hll = new HyperLogLogPlusCounter(16);
+ rowKeyValues = Lists.newArrayList();
+ nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
}
}
@@ -127,7 +72,7 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
try {
int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
- HCatFieldSchema fieldSchema = null;
+ HCatFieldSchema fieldSchema;
for (int i : factDictCols) {
outputKey.set((short) i);
fieldSchema = schema.get(flatTableIndexes[i]);
@@ -142,28 +87,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
handleErrorRecord(record, ex);
}
- if(collectStatistics) {
+ if (collectStatistics) {
String[] row = HiveTableReader.getRowAsStringArray(record);
putRowKeyToHLL(row, baseCuboidId);
}
}
- private void handleErrorRecord(HCatRecord record, Exception ex) throws IOException {
-
- System.err.println("Insane record: " + record.getAll());
- ex.printStackTrace(System.err);
-
- errorRecordCounter++;
- if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
- if (ex instanceof IOException)
- throw (IOException) ex;
- else if (ex instanceof RuntimeException)
- throw (RuntimeException) ex;
- else
- throw new RuntimeException("", ex);
- }
- }
-
private void putRowKeyToHLL(String[] row, long cuboidId) {
rowKeyValues.clear();
long mask = Long.highestOneBit(baseCuboidId);
@@ -184,8 +113,9 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
}
- protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
- if(collectStatistics) {
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ if (collectStatistics) {
// output hll to reducer, key is -1
// keyBuf = Bytes.toBytes(-1);
outputKey.set((short) -1);
@@ -196,5 +126,4 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
}
}
-
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
new file mode 100644
index 0000000..603277c
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -0,0 +1,81 @@
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.mr.KylinMapper;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.RowKeyDesc;
+import org.apache.kylin.dict.DictionaryManager;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.model.TblColRef;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, ShortWritable, Text> {
+
+ protected String cubeName;
+ protected CubeInstance cube;
+ protected CubeDesc cubeDesc;
+ protected long baseCuboidId;
+ protected List<TblColRef> columns;
+ protected ArrayList<Integer> factDictCols;
+
+ protected ShortWritable outputKey = new ShortWritable();
+ protected Text outputValue = new Text();
+ protected int errorRecordCounter =0;
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ Configuration conf = context.getConfiguration();
+ publishConfiguration(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+
+ cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
+ cube = CubeManager.getInstance(config).getCube(cubeName);
+ cubeDesc = cube.getDescriptor();
+ baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+ columns = Cuboid.findById(cubeDesc, baseCuboidId).getColumns();
+
+ factDictCols = new ArrayList<Integer>();
+ RowKeyDesc rowKey = cubeDesc.getRowkey();
+ DictionaryManager dictMgr = DictionaryManager.getInstance(config);
+ for (int i = 0; i < columns.size(); i++) {
+ TblColRef col = columns.get(i);
+ if (!rowKey.isUseDictionary(col))
+ continue;
+
+ String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
+ if (cubeDesc.getModel().isFactTable(scanTable)) {
+ factDictCols.add(i);
+ }
+ }
+ }
+
+ protected void handleErrorRecord(HCatRecord record, Exception ex) throws IOException {
+
+ System.err.println("Insane record: " + record.getAll());
+ ex.printStackTrace(System.err);
+
+ errorRecordCounter++;
+ if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
+ if (ex instanceof IOException)
+ throw (IOException) ex;
+ else if (ex instanceof RuntimeException)
+ throw (RuntimeException) ex;
+ else
+ throw new RuntimeException("", ex);
+ }
+ }
+}
[10/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/227edf72
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/227edf72
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/227edf72
Branch: refs/heads/streaming-localdict
Commit: 227edf7275b0261720a188b05181d35f85fb4f5a
Parents: 21b8f0f 3bf6b37
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 18:04:29 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 18:04:29 2015 +0800
----------------------------------------------------------------------
.../common/hll/HyperLogLogPlusCounter.java | 29 ++++++++++++++++++--
.../org/apache/kylin/common/util/BytesUtil.java | 16 +++++------
.../apache/kylin/common/util/BytesUtilTest.java | 20 ++++++++++++++
.../metadata/model_desc/kylin_sales_model.json | 17 ++++++++++++
.../kylin/metadata/model/DimensionDesc.java | 12 +++++---
5 files changed, 80 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
[20/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Conflicts:
invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
job/src/test/java/org/apache/kylin/job/BuildCubeWithStreamTest.java
storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7f73abe5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7f73abe5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7f73abe5
Branch: refs/heads/streaming-localdict
Commit: 7f73abe5c53fc165ff01b920850fe4caf8ab9e0d
Parents: 959d031 7088724
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 11:39:24 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 11:39:24 2015 +0800
----------------------------------------------------------------------
.../common/hll/HyperLogLogPlusCounter.java | 29 ++-
.../kylin/common/persistence/ResourceStore.java | 1 +
.../org/apache/kylin/common/util/ByteArray.java | 2 +-
.../org/apache/kylin/common/util/BytesUtil.java | 16 +-
.../apache/kylin/common/util/BytesUtilTest.java | 20 ++
.../java/org/apache/kylin/dict/Dictionary.java | 31 ++--
.../org/apache/kylin/dict/TrieDictionary.java | 48 ++---
.../apache/kylin/dict/NumberDictionaryTest.java | 2 +-
.../metadata/model_desc/kylin_sales_model.json | 17 ++
.../localmeta/streaming/kafka_test.json | 15 ++
.../apache/kylin/invertedindex/IIInstance.java | 12 ++
.../apache/kylin/invertedindex/IIManager.java | 17 +-
.../invertedindex/index/BatchSliceBuilder.java | 8 +-
.../model/IIJoinedFlatTableDesc.java | 12 +-
.../invertedindex/model/IIKeyValueCodec.java | 91 +++++----
.../model/IIKeyValueCodecWithState.java | 68 +++++++
.../apache/kylin/invertedindex/model/IIRow.java | 13 ++
.../org/apache/kylin/job/JoinedFlatTable.java | 1 -
.../kylin/job/constant/BatchConstants.java | 5 +
.../kylin/job/constant/ExecutableConstants.java | 1 +
.../apache/kylin/job/cube/CubingJobBuilder.java | 85 ++++++---
.../kylin/job/hadoop/AbstractHadoopJob.java | 5 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/CubeHFileMapper.java | 2 +-
.../kylin/job/hadoop/cube/CuboidReducer.java | 2 +-
.../cube/FactDistinctColumnsCombiner.java | 26 ++-
.../job/hadoop/cube/FactDistinctColumnsJob.java | 14 +-
.../hadoop/cube/FactDistinctColumnsMapper.java | 139 --------------
.../cube/FactDistinctColumnsMapperBase.java | 81 ++++++++
.../hadoop/cube/FactDistinctColumnsReducer.java | 143 ++++++++++++---
.../cube/FactDistinctHiveColumnsMapper.java | 148 +++++++++++++++
.../cube/FactDistinctIIColumnsMapper.java | 129 +++++++++++++
.../job/hadoop/cube/MergeCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/NDCuboidMapper.java | 2 +-
.../job/hadoop/cube/NewBaseCuboidMapper.java | 2 +-
.../hadoop/cubev2/BuildDictionaryMapper.java | 2 +-
.../kylin/job/hadoop/cubev2/InMemCuboidJob.java | 183 +++++++++++++++++++
.../job/hadoop/cubev2/InMemCuboidMapper.java | 163 ++++++++++-------
.../job/hadoop/cubev2/InMemCuboidReducer.java | 82 +++++++++
.../invertedindex/InvertedIndexMapper.java | 2 +-
.../invertedindex/InvertedIndexPartitioner.java | 2 +-
.../invertedindex/InvertedIndexReducer.java | 2 +-
.../kylin/job/streaming/StreamingBootstrap.java | 117 ++++++++++++
.../kylin/job/streaming/StreamingCLI.java | 71 +++++++
.../kylin/job/BuildCubeWithStreamTest.java | 4 +-
.../apache/kylin/job/IIStreamBuilderTest.java | 80 ++++++++
.../kylin/metadata/model/DimensionDesc.java | 12 +-
.../metadata/model/IJoinedFlatTableDesc.java | 2 -
.../metadata/model/IntermediateColumnDesc.java | 4 +
pom.xml | 1 +
.../gridtable/GTDictionaryCodeSystem.java | 72 +++++---
.../endpoint/HbaseServerKVIterator.java | 9 +-
streaming/pom.xml | 8 +
.../apache/kylin/streaming/BrokerConfig.java | 78 ++++++++
.../kylin/streaming/JsonStreamParser.java | 73 ++++++++
.../org/apache/kylin/streaming/KafkaConfig.java | 99 +++++-----
.../apache/kylin/streaming/KafkaConsumer.java | 22 +--
.../apache/kylin/streaming/KafkaRequester.java | 128 +++++++------
.../apache/kylin/streaming/StreamBuilder.java | 9 +
.../apache/kylin/streaming/StreamManager.java | 114 ++++++++++++
.../apache/kylin/streaming/StreamParser.java | 47 +++++
.../kylin/streaming/StringStreamParser.java | 55 ++++++
.../kylin/streaming/cube/CubeStreamBuilder.java | 37 ++--
.../invertedindex/IIStreamBuilder.java | 6 +-
.../kylin/streaming/EternalStreamProducer.java | 5 +-
.../apache/kylin/streaming/KafkaBaseTest.java | 23 ---
.../apache/kylin/streaming/KafkaConfigTest.java | 65 -------
.../kylin/streaming/KafkaConsumerTest.java | 8 +-
.../kylin/streaming/KafkaRequesterTest.java | 11 +-
.../kylin/streaming/Nous/NousMessageTest.java | 4 +-
.../kylin/streaming/OneOffStreamProducer.java | 3 +-
.../kylin/streaming/StreamManagerTest.java | 69 +++++++
.../invertedindex/IIStreamBuilderTest.java | 41 -----
.../invertedindex/PrintOutStreamBuilder.java | 67 +++++++
.../kafka_streaming_test/kafka.properties | 10 -
75 files changed, 2250 insertions(+), 731 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --cc dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index a931359,a931359..815b06d
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@@ -73,15 -73,15 +73,16 @@@ abstract public class Dictionary<T> imp
}
/**
-- * Returns the ID integer of given value. In case of not found - if
-- * roundingFlag=0, throw IllegalArgumentException; - if roundingFlag<0, the
-- * closest smaller ID integer if exist; - if roundingFlag>0, the closest
-- * bigger ID integer if exist. The implementation often has cache, thus
-- * faster than the byte[] version getIdFromValueBytes()
++ * Returns the ID integer of given value. In case of not found
++ * - if roundingFlag=0, throw IllegalArgumentException;
++ * - if roundingFlag<0, the closest smaller ID integer if exist;
++ * - if roundingFlag>0, the closest bigger ID integer if exist.
++ *
++ * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
*
* @throws IllegalArgumentException
-- * if value is not found in dictionary and rounding is off or
-- * failed
++ * if value is not found in dictionary and rounding is off;
++ * or if rounding cannot find a smaller or bigger ID
*/
final public int getIdFromValue(T value, int roundingFlag) {
if (isNullObjectForm(value))
@@@ -119,16 -119,16 +120,16 @@@
}
/**
-- * A lower level API, return ID integer from raw value bytes. In case of not
-- * found - if roundingFlag=0, throw IllegalArgumentException; - if
-- * roundingFlag<0, the closest smaller ID integer if exist; - if
-- * roundingFlag>0, the closest bigger ID integer if exist. Bypassing the
-- * cache layer, this could be significantly slower than getIdFromValue(T
-- * value).
++ * A lower level API, return ID integer from raw value bytes. In case of not found
++ * - if roundingFlag=0, throw IllegalArgumentException;
++ * - if roundingFlag<0, the closest smaller ID integer if exist;
++ * - if roundingFlag>0, the closest bigger ID integer if exist.
++ *
++ * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
*
* @throws IllegalArgumentException
-- * if value is not found in dictionary and rounding is off or
-- * failed
++ * if value is not found in dictionary and rounding is off;
++ * or if rounding cannot find a smaller or bigger ID
*/
final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
if (isNullByteForm(value, offset, len))
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --cc dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index ef845ce,ef845ce..bf40eac
--- a/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@@ -185,9 -185,9 +185,9 @@@ public class TrieDictionary<T> extends
* @param inpEnd
* -- end of input
* @param roundingFlag
-- * -- =0: return -1 if not found -- <0: return closest smaller if
-- * not found, might be -1 -- >0: return closest bigger if not
-- * found, might be nValues
++ * -- =0: return -1 if not found
++ * -- <0: return closest smaller if not found, return -1
++ * -- >0: return closest bigger if not found, return nValues
*/
private int lookupSeqNoFromValue(int n, byte[] inp, int o, int inpEnd, int roundingFlag) {
if (inp.length == 0) // special 'empty' value
@@@ -199,11 -199,11 +199,8 @@@
// match the current node, note [0] of node's value has been matched
// when this node is selected by its parent
int p = n + firstByteOffset; // start of node's value
-- int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of
-- // node's
-- // value
-- for (p++; p < end && o < inpEnd; p++, o++) { // note matching start
-- // from [1]
++ int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of node's value
++ for (p++; p < end && o < inpEnd; p++, o++) { // note matching start from [1]
if (trieBytes[p] != inp[o]) {
int comp = BytesUtil.compareByteUnsigned(trieBytes[p], inp[o]);
if (comp < 0) {
@@@ -216,9 -216,9 +213,7 @@@
// node completely matched, is input all consumed?
boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE);
if (o == inpEnd) {
-- return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input
-- // all
-- // matched
++ return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input all matched
}
if (isEndOfValue)
seq++;
@@@ -226,9 -226,9 +221,7 @@@
// find a child to continue
int c = headSize + (BytesUtil.readUnsigned(trieBytes, n, sizeChildOffset) & childOffsetMask);
if (c == headSize) // has no children
-- return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only
-- // partially
-- // matched
++ return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only partially matched
byte inpByte = inp[o];
int comp;
while (true) {
@@@ -242,26 -242,26 +235,10 @@@
} else if (comp < 0) { // try next child
seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
if (checkFlag(c, BIT_IS_LAST_CHILD))
-- return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no
-- // child
-- // can
-- // match
-- // the
-- // next
-- // byte
-- // of
-- // input
++ return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
} else { // children are ordered by their first value byte
-- return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no
-- // child
-- // can
-- // match
-- // the
-- // next
-- // byte
-- // of
-- // input
++ return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
}
}
}
@@@ -279,9 -279,9 +256,7 @@@
@Override
final protected T getValueFromIdImpl(int id) {
if (enableCache) {
-- Object[] cache = idToValueCache.get(); // SoftReference to skip
-- // cache gracefully when
-- // short of memory
++ Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory
if (cache != null) {
int seq = calcSeqNoFromId(id);
if (seq < 0 || seq >= nValues)
@@@ -347,8 -347,8 +322,7 @@@
int nValuesBeneath;
while (true) {
nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
-- if (seq - nValuesBeneath < 0) { // value is under this child,
-- // reset n and loop again
++ if (seq - nValuesBeneath < 0) { // value is under this child, reset n and loop again
n = c;
break;
} else { // go to next child
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
----------------------------------------------------------------------
diff --cc dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
index e6d2ee9,e6d2ee9..f9af244
--- a/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
+++ b/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
@@@ -122,7 -122,7 +122,7 @@@ public class NumberDictionaryTest
}
// test rounding
-- for (int i = 0; i < n; i++) {
++ for (int i = 0; i < n * 50; i++) {
String randStr = randNumber();
BigDecimal rand = new BigDecimal(randStr);
int binarySearch = Collections.binarySearch(sorted, rand);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
----------------------------------------------------------------------
diff --cc job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
index 4d66186,0000000..a2c2c3b
mode 100644,000000..100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
@@@ -1,184 -1,0 +1,184 @@@
+package org.apache.kylin.job.hadoop.cubev2;
+
+import com.google.common.base.Function;
+import com.google.common.collect.*;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.common.mr.KylinMapper;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.DictionaryGenerator;
+import org.apache.kylin.dict.DictionaryInfo;
+import org.apache.kylin.dict.DictionaryInfoSerializer;
+import org.apache.kylin.dict.lookup.HiveTableReader;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by shaoshi on 3/24/15.
+ */
+public class BuildDictionaryMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Text, Text> {
+
+ private static final Logger logger = LoggerFactory.getLogger(BuildDictionaryMapper.class);
+ private String cubeName;
+ private CubeInstance cube;
+ private CubeSegment cubeSegment;
+ private CubeDesc cubeDesc;
+
+ private HCatSchema schema = null;
+ private HyperLogLogPlusCounter hll;
+
+
+ private Text outputKey = new Text();
+ private Text outputValue = new Text();
+ private List<TblColRef> dimColumns;
+ private SetMultimap<Integer, String> columnDistinctValueMap;
+ private CuboidScheduler cuboidScheduler = null;
+ private CubeJoinedFlatTableDesc intermediateTableDesc;
+ private long baseCuboidId;
+ private List<String> rowKeyValues = null;
+ private int nRowKey;
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ super.publishConfiguration(context.getConfiguration());
+
+ Configuration conf = context.getConfiguration();
+
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
++ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+ cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
+ cube = CubeManager.getInstance(config).getCube(cubeName);
+ String segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
+ cubeDesc = cube.getDescriptor();
+ cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
+ dimColumns = cubeDesc.listDimensionColumnsExcludingDerived();
+ hll = new HyperLogLogPlusCounter(16);
+ columnDistinctValueMap = HashMultimap.create(); // key is col, value is a set of string values
+ cuboidScheduler = new CuboidScheduler(cubeDesc);
+ intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
+ baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+ nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
+
+ rowKeyValues = Lists.newArrayList();
+ }
+
+ @Override
+ public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
+ String[] row = HiveTableReader.getRowAsStringArray(record);
+ buildDictAndCount(row);
+ }
+
+ protected void buildDictAndCount(String[] row) {
+ for (int i = 0; i < intermediateTableDesc.getRowKeyColumnIndexes().length; i++) {
+ columnDistinctValueMap.put(i, row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
+ }
+
+ putRowKeyToHLL(row, baseCuboidId); // recursively put all possible row keys to hll
+ }
+
+ protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
+ Map<Integer, DictionaryInfo> dictionaries = buildDictionary();
+
+ DictionaryInfoSerializer dictionaryInfoSerializer = new DictionaryInfoSerializer();
+ Cuboid baseCuboid = Cuboid.findById(cubeDesc, this.baseCuboidId);
+ byte[] keyBuf;
+ // output dictionary to reducer, key is the index of the col on row key;
+ for (Integer rowKeyIndex : dictionaries.keySet()) {
+ keyBuf = Bytes.toBytes(rowKeyIndex);
+ outputKey.set(keyBuf);
+
+ //serialize the dictionary to bytes;
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
+ DataOutputStream dout = new DataOutputStream(buf);
+ dictionaryInfoSerializer.serialize(dictionaries.get(rowKeyIndex), dout);
+ dout.close();
+ buf.close();
+ byte[] dictionaryBytes = buf.toByteArray();
+ outputValue.set(dictionaryBytes);
+
+ context.write(outputKey, outputValue);
+ }
+
+ // output hll to reducer, key is -1
+ keyBuf = Bytes.toBytes(-1);
+ outputKey.set(keyBuf);
+ ByteBuffer hllBuf = ByteBuffer.allocate(1024 * 1024);
+ hll.writeRegisters(hllBuf);
+ outputValue.set(hllBuf.array());
+ outputKey.set(keyBuf, 0, keyBuf.length);
+ context.write(outputKey, outputValue);
+ }
+
+ private void putRowKeyToHLL(String[] row, long cuboidId) {
+ rowKeyValues.clear();
+ long mask = Long.highestOneBit(baseCuboidId);
+ // int actualLength = Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
+ for (int i = 0; i < nRowKey; i++) {
+ if ((mask & cuboidId) == 1) {
+ rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
+ }
+ mask = mask >> 1;
+ }
+
+ String key = StringUtils.join(rowKeyValues, ",");
+ hll.add(key);
+
+ Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
+ for (Long childId : children) {
+ putRowKeyToHLL(row, childId);
+ }
+
+ }
+
+ private Map<Integer, DictionaryInfo> buildDictionary() {
+ Map<Integer, DictionaryInfo> dictionaryMap = Maps.newHashMap();
+ for (int i = 0; i < intermediateTableDesc.getRowKeyColumnIndexes().length; i++) {
+ // dictionary
+ if (cubeDesc.getRowkey().isUseDictionary(i)) {
+ TblColRef col = cubeDesc.getRowkey().getRowKeyColumns()[i].getColRef();
+ Dictionary dict = DictionaryGenerator.buildDictionaryFromValueList(col.getType(), Collections2.transform(columnDistinctValueMap.get(i), new Function<String, byte[]>() {
+ @Nullable
+ @Override
+ public byte[] apply(String input) {
+ return input.getBytes();
+ }
+ }));
+
+ logger.info("Building dictionary for " + col);
+ DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
+ dictInfo.setDictionaryObject(dict);
+ dictInfo.setDictionaryClass(dict.getClass().getName());
+ dictionaryMap.put(i, dictInfo);
+ }
+ }
+
+ return dictionaryMap;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --cc storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index 03c7541,45b5d5f..6f2d9ce
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@@ -15,12 -13,11 +13,12 @@@ import java.util.Map
/**
* Created by shaoshi on 3/23/15.
*/
++@SuppressWarnings({ "rawtypes", "unchecked" })
public class GTDictionaryCodeSystem implements IGTCodeSystem {
private GTInfo info;
- private BitSet encodedColumns = null;
private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
- private Map<Integer, DataTypeSerializer> serializerMap = null; // column index; value: serializer for this column;
private IFilterCodeSystem<ByteArray> filterCS;
+ private DataTypeSerializer[] serializers;
public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
this.dictionaryMaps = dictionaryMaps;
@@@ -95,12 -87,7 +88,7 @@@
@Override
public Object decodeColumnValue(int col, ByteBuffer buf) {
- if (useDictionary(col)) {
- int id = BytesUtil.readUnsigned(buf, dictionaryMaps.get(col).getSizeOfId());
- return dictionaryMaps.get(col).getValueFromId(id);
- } else {
- return serializerMap.get(col).deserialize(buf);
- }
- return serializers[col].deserialize(buf);
++ return serializers[col].deserialize(buf);
}
@Override
[19/50] incubator-kylin git commit: KYLIN-625,
consider null & code system in GTRecord comparison
Posted by li...@apache.org.
KYLIN-625, consider null & code system in GTRecord comparison
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/959d031c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/959d031c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/959d031c
Branch: refs/heads/streaming-localdict
Commit: 959d031ce310921b0c77f173f3a55b449df54c60
Parents: 5dda35f
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 10:35:19 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 10:35:19 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/ByteArray.java | 25 ++++++++++++++++----
.../kylin/storage/gridtable/GTRecord.java | 5 +++-
2 files changed, 24 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/959d031c/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
index df107a5..8856fe8 100644
--- a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
+++ b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
@@ -30,7 +30,7 @@ public class ByteArray implements Comparable<ByteArray> {
public static ByteArray allocate(int length) {
return new ByteArray(new byte[length]);
}
-
+
public static ByteArray copyOf(byte[] array, int offset, int length) {
byte[] space = new byte[length];
System.arraycopy(array, offset, space, 0, length);
@@ -88,7 +88,7 @@ public class ByteArray implements Comparable<ByteArray> {
public void setLength(int length) {
this.length = length;
}
-
+
public ByteArray copy() {
ByteArray copy = new ByteArray(length);
copy.copyFrom(this);
@@ -111,7 +111,10 @@ public class ByteArray implements Comparable<ByteArray> {
@Override
public int hashCode() {
- return Bytes.hashCode(data, offset, length);
+ if (data == null)
+ return 0;
+ else
+ return Bytes.hashCode(data, offset, length);
}
@Override
@@ -123,12 +126,24 @@ public class ByteArray implements Comparable<ByteArray> {
if (getClass() != obj.getClass())
return false;
ByteArray o = (ByteArray) obj;
- return Bytes.equals(this.data, this.offset, this.length, o.data, o.offset, o.length);
+ if (this.data == null && o.data == null)
+ return true;
+ else if (this.data == null || o.data == null)
+ return false;
+ else
+ return Bytes.equals(this.data, this.offset, this.length, o.data, o.offset, o.length);
}
@Override
public int compareTo(ByteArray o) {
- return Bytes.compareTo(this.data, this.offset, this.length, o.data, o.offset, o.length);
+ if (this.data == null && o.data == null)
+ return 0;
+ else if (this.data == null)
+ return -1;
+ else if (o.data == null)
+ return 1;
+ else
+ return Bytes.compareTo(this.data, this.offset, this.length, o.data, o.offset, o.length);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/959d031c/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
index 6eb38a9..605a469 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
@@ -5,6 +5,7 @@ import java.util.Arrays;
import java.util.BitSet;
import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.IFilterCodeSystem;
public class GTRecord implements Comparable<GTRecord> {
@@ -128,11 +129,13 @@ public class GTRecord implements Comparable<GTRecord> {
@Override
public int compareTo(GTRecord o) {
+ assert this.info == o.info;
assert this.maskForEqualHashComp == o.maskForEqualHashComp; // reference equal for performance
+ IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
int comp = 0;
for (int i = maskForEqualHashComp.nextSetBit(0); i >= 0; i = maskForEqualHashComp.nextSetBit(i + 1)) {
- comp = this.cols[i].compareTo(o.cols[i]);
+ comp = cs.compare(cols[i], o.cols[i]);
if (comp != 0)
return comp;
}
[39/50] incubator-kylin git commit: KYLIN-653 minor fix
Posted by li...@apache.org.
KYLIN-653 minor fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/f3a592b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/f3a592b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/f3a592b3
Branch: refs/heads/streaming-localdict
Commit: f3a592b33ab6d171eaf9062fac3025c893f576b2
Parents: 929b986
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 16:13:22 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 16:13:22 2015 +0800
----------------------------------------------------------------------
.../hadoop/cube/FactDistinctHiveColumnsMapper.java | 16 ++++++++--------
.../kylin/job/hadoop/invertedindex/II2CubeTest.java | 8 ++++----
2 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/f3a592b3/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
index 9e9c096..654bf4e 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -18,8 +18,12 @@
package org.apache.kylin.job.hadoop.cube;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hive.hcatalog.data.HCatRecord;
@@ -27,17 +31,13 @@ import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
import org.apache.kylin.dict.lookup.HiveTableReader;
import org.apache.kylin.job.constant.BatchConstants;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
/**
* @author yangli9
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/f3a592b3/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 62cf6e8..97c71f8 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -13,7 +13,7 @@ import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
-import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
@@ -120,7 +120,7 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
@Test
public void factDistinctIIColumnsMapperTest() throws IOException {
- MapDriver<ImmutableBytesWritable, Result, ShortWritable, Text> mapDriver;
+ MapDriver<ImmutableBytesWritable, Result, LongWritable, Text> mapDriver;
FactDistinctIIColumnsMapper mapper = new FactDistinctIIColumnsMapper();
mapDriver = MapDriver.newMapDriver(mapper);
@@ -135,9 +135,9 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
}
})));
- List<Pair<ShortWritable, Text>> result = mapDriver.run();
+ List<Pair<LongWritable, Text>> result = mapDriver.run();
Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
- for(Pair<ShortWritable, Text> pair : result)
+ for(Pair<LongWritable, Text> pair : result)
{
Assert.assertEquals(pair.getFirst().get(),6);
Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
[11/50] incubator-kylin git commit: fix
Posted by li...@apache.org.
fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c3ff4f44
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c3ff4f44
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c3ff4f44
Branch: refs/heads/streaming-localdict
Commit: c3ff4f447f0884da9635c783ab5aa1d25243887b
Parents: 227edf7
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 19:38:38 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 19:38:38 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/job/IIStreamBuilderTest.java | 80 ++++++++++++++++++++
.../kylin/streaming/StreamingBootstrap.java | 23 ++++--
.../apache/kylin/streaming/StreamingCLI.java | 3 +-
.../invertedindex/IIStreamBuilderTest.java | 41 ----------
4 files changed, 98 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
new file mode 100644
index 0000000..35a0fe9
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -0,0 +1,80 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.AbstractKylinTestCase;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.streaming.StreamingBootstrap;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+
+/**
+ * Created by qianzhou on 3/6/15.
+ */
+public class IIStreamBuilderTest extends HBaseMetadataTestCase {
+
+ private KylinConfig kylinConfig;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ ClassUtil.addClasspath(new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
+ System.setProperty("hdp.version", "2.2.0.0-2041"); // mapred-site.xml ref this
+ }
+
+ @Before
+ public void before() throws Exception {
+ HBaseMetadataTestCase.staticCreateTestMetadata(AbstractKylinTestCase.SANDBOX_TEST_DATA);
+ kylinConfig = KylinConfig.getInstanceFromEnv();
+ DeployUtil.initCliWorkDir();
+ DeployUtil.deployMetadata();
+ DeployUtil.overrideJobJarLocations();
+ }
+
+ @After
+ public void after() {
+ this.cleanupTestMetadata();
+ }
+
+ @Test
+ public void test() throws Exception {
+ StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
index 4b7c6b7..bd1ab42 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -39,7 +39,6 @@ import kafka.api.OffsetRequest;
import kafka.cluster.Broker;
import kafka.javaapi.PartitionMetadata;
import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.invertedindex.IIDescManager;
import org.apache.kylin.invertedindex.IIInstance;
import org.apache.kylin.invertedindex.IIManager;
import org.apache.kylin.invertedindex.model.IIDesc;
@@ -54,11 +53,19 @@ import java.util.concurrent.Future;
*/
public class StreamingBootstrap {
- private static KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
- private static StreamManager streamManager = StreamManager.getInstance(kylinConfig);
- private static IIManager iiManager = IIManager.getInstance(kylinConfig);
- private static IIDescManager iiDescManager = IIDescManager.getInstance(kylinConfig);
+ private KylinConfig kylinConfig;
+ private StreamManager streamManager;
+ private IIManager iiManager;
+ public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
+ return new StreamingBootstrap(kylinConfig);
+ }
+
+ private StreamingBootstrap(KylinConfig kylinConfig) {
+ this.kylinConfig = kylinConfig;
+ this.streamManager = StreamManager.getInstance(kylinConfig);
+ this.iiManager = IIManager.getInstance(kylinConfig);
+ }
private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
@@ -69,7 +76,7 @@ public class StreamingBootstrap {
}
}
- public static void startStreaming(String streamingConf, int partitionId) throws Exception {
+ public void startStreaming(String streamingConf, int partitionId) throws Exception {
final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
@@ -94,7 +101,9 @@ public class StreamingBootstrap {
};
final IIDesc desc = ii.getDescriptor();
Executors.newSingleThreadExecutor().submit(consumer);
- final Future<?> future = Executors.newSingleThreadExecutor().submit(new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId));
+ final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId);
+ task.setStreamParser(JsonStreamParser.instance);
+ final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
future.get();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
index 70290f1..dac8ce0 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
@@ -35,6 +35,7 @@
package org.apache.kylin.streaming;
import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -53,7 +54,7 @@ public class StreamingCLI {
}
if (args[0].equals("start")) {
String kafkaConfName = args[1];
- StreamingBootstrap.startStreaming(kafkaConfName, 0);
+ StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
} else if (args.equals("stop")) {
} else {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
deleted file mode 100644
index 11b8868..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- *
- * contributor license agreements. See the NOTICE file distributed with
- *
- * this work for additional information regarding copyright ownership.
- *
- * The ASF licenses this file to You under the Apache License, Version 2.0
- *
- * (the "License"); you may not use this file except in compliance with
- *
- * the License. You may obtain a copy of the License at
- *
- *
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- *
- * distributed under the License is distributed on an "AS IS" BASIS,
- *
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- * See the License for the specific language governing permissions and
- *
- * limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming.invertedindex;
-
-/**
- * Created by qianzhou on 3/6/15.
- */
-public class IIStreamBuilderTest {
-}
[28/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/dee29553
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/dee29553
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/dee29553
Branch: refs/heads/streaming-localdict
Commit: dee295531f255f616abc4ae36575b75632a0d37b
Parents: b201040 bbbcae8
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 14:06:32 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 14:06:32 2015 +0800
----------------------------------------------------------------------
.../kylin/streaming/cube/CubeStreamBuilder.java | 4 +-
.../kylin/streaming/EternalStreamProducer.java | 2 +-
.../Nous/NousEternalStreamProducer.java | 46 --------
.../kylin/streaming/Nous/NousMessage.java | 118 -------------------
.../kylin/streaming/Nous/NousMessageTest.java | 31 -----
.../kylin/streaming/OneOffStreamProducer.java | 5 +-
.../nous/NousEternalStreamProducer.java | 46 ++++++++
.../kylin/streaming/nous/NousMessage.java | 118 +++++++++++++++++++
.../kylin/streaming/nous/NousMessageTest.java | 31 +++++
9 files changed, 202 insertions(+), 199 deletions(-)
----------------------------------------------------------------------
[07/50] incubator-kylin git commit: Bug fix in BytesUtil.writeUnsigned
Posted by li...@apache.org.
Bug fix in BytesUtil.writeUnsigned
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d564876c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d564876c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d564876c
Branch: refs/heads/streaming-localdict
Commit: d564876c6776c91895f150e0b9512a18c1e34d35
Parents: 1153150
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 18:03:33 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 18:03:33 2015 +0800
----------------------------------------------------------------------
.../common/hll/HyperLogLogPlusCounter.java | 29 ++++++++++++++++++--
.../org/apache/kylin/common/util/BytesUtil.java | 16 +++++------
.../apache/kylin/common/util/BytesUtilTest.java | 20 ++++++++++++++
3 files changed, 55 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
index 686321b..d817bd2 100644
--- a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
+++ b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
@@ -186,7 +186,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
BytesUtil.writeVInt(size, out);
for (int i = 0; i < m; i++) {
if (registers[i] > 0) {
- BytesUtil.writeUnsigned(i, indexLen, out);
+ writeUnsigned(i, indexLen, out);
out.put(registers[i]);
}
}
@@ -207,7 +207,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
int indexLen = getRegisterIndexSize();
for (int i = 0; i < size; i++) {
- int key = BytesUtil.readUnsigned(in, indexLen);
+ int key = readUnsigned(in, indexLen);
registers[key] = in.get();
}
} else { // array scheme
@@ -306,4 +306,29 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
}
}
+
+ /**
+ *
+ * @param num
+ * @param size
+ * @param out
+ */
+ public static void writeUnsigned(int num, int size, ByteBuffer out) {
+ for (int i = 0; i < size; i++) {
+ out.put((byte) num);
+ num >>>= 8;
+ }
+ }
+
+ public static int readUnsigned(ByteBuffer in, int size) {
+ int integer = 0;
+ int mask = 0xff;
+ int shift = 0;
+ for (int i = 0; i < size; i++) {
+ integer |= (in.get() << shift) & mask;
+ mask = mask << 8;
+ shift += 8;
+ }
+ return integer;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
index dbd459d..ca1deaf 100644
--- a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
+++ b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
@@ -184,21 +184,21 @@ public class BytesUtil {
}
public static void writeUnsigned(int num, int size, ByteBuffer out) {
- for (int i = 0; i < size; i++) {
- out.put((byte) num);
- num >>>= 8;
+ int mask = 0xff << ((size - 1) * 8);
+ for (int i = size; i > 0; i--) {
+ int v = (num & mask) >> (i - 1) * 8;
+ out.put((byte) v);
+ mask = mask >> 8;
}
}
public static int readUnsigned(ByteBuffer in, int size) {
int integer = 0;
- int mask = 0xff;
- int shift = 0;
for (int i = 0; i < size; i++) {
- integer |= (in.get() << shift) & mask;
- mask = mask << 8;
- shift += 8;
+ integer = integer << 8;
+ integer += in.get();
}
+
return integer;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java b/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
index 4c88cbe..e34f391 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
@@ -22,6 +22,7 @@ import junit.framework.TestCase;
import org.junit.Test;
import java.nio.ByteBuffer;
+import java.util.Arrays;
/**
* by honma
@@ -53,4 +54,23 @@ public class BytesUtilTest extends TestCase {
assertEquals(y[1], false);
}
+ @Test
+ public void testWriteReadUnsignedInt() {
+
+ int testInt = 735033;
+ ByteArray ba = new ByteArray(new byte[3]);
+ BytesUtil.writeUnsigned(testInt, 3, ba.asBuffer());
+
+ byte[] newBytes = new byte[3];
+ System.arraycopy(ba.array(), 0, newBytes, 0, 3);
+ int value = BytesUtil.readUnsigned(new ByteArray(newBytes).asBuffer(), 3);
+
+ assertEquals(value, testInt);
+
+ byte[] anOtherNewBytes = new byte[3];
+ BytesUtil.writeUnsigned(testInt, anOtherNewBytes, 0, 3);
+
+ assertTrue(Arrays.equals(anOtherNewBytes, ba.array()));
+ }
+
}
[38/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/12920dc2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/12920dc2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/12920dc2
Branch: refs/heads/streaming-localdict
Commit: 12920dc236a61a3956d718151909e797c83c715e
Parents: 5837af0 929b986
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:10:23 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:10:23 2015 +0800
----------------------------------------------------------------------
.../invertedindex/index/RawTableRecord.java | 2 +
.../kylin/job/hadoop/cube/BaseCuboidJob.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 246 -------------------
.../job/hadoop/cube/BaseCuboidMapperBase.java | 205 ++++++++++++++++
.../job/hadoop/cube/HiveToBaseCuboidMapper.java | 49 ++++
.../job/hadoop/cube/IIToBaseCuboidMapper.java | 109 ++++++++
.../kylin/job/hadoop/cubev2/InMemCuboidJob.java | 5 -
.../cube/BaseCuboidMapperPerformanceTest.java | 65 -----
.../job/hadoop/cube/BaseCuboidMapperTest.java | 145 -----------
.../HiveToBaseCuboidMapperPerformanceTest.java | 65 +++++
.../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
11 files changed, 576 insertions(+), 462 deletions(-)
----------------------------------------------------------------------
[24/50] incubator-kylin git commit: KYLIN-625,
refactor interface to use GTScanRange
Posted by li...@apache.org.
KYLIN-625, refactor interface to use GTScanRange
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d1369339
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d1369339
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d1369339
Branch: refs/heads/streaming-localdict
Commit: d1369339d458dfc974de3f63ef3d7c496e910c8a
Parents: b38206d
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 13:25:47 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 13:25:47 2015 +0800
----------------------------------------------------------------------
.../job/hadoop/cubev2/InMemCuboidMapper.java | 2 +-
.../kylin/storage/gridtable/GTScanRange.java | 61 +++
.../storage/gridtable/GTScanRangePlanner.java | 474 +++++++++++++++++++
.../kylin/storage/gridtable/GTScanRequest.java | 22 +-
.../kylin/storage/gridtable/GridTableTest.java | 2 +-
5 files changed, 548 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
index 5a3565a..ebc65a1 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
@@ -164,7 +164,7 @@ public class InMemCuboidMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Tex
System.arraycopy(Bytes.toBytes(cuboidId), 0, keyBuf, 0, Bytes.toBytes(cuboidId).length);
GridTable gt = cuboidsMap.get(cuboidId);
- GTScanRequest req = new GTScanRequest(gt.getInfo(), null, null, null, null);
+ GTScanRequest req = new GTScanRequest(gt.getInfo(), null, null, null);
IGTScanner scanner = gt.scan(req);
int offSet = 0;
for (GTRecord record : scanner) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
new file mode 100644
index 0000000..08513f7
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
@@ -0,0 +1,61 @@
+package org.apache.kylin.storage.gridtable;
+
+import java.util.Collections;
+import java.util.List;
+
+public class GTScanRange {
+
+ final public GTRecord pkStart; // inclusive
+ final public GTRecord pkEnd; // inclusive
+ final public List<GTRecord> hbaseFuzzyKeys; // partial matching primary keys
+
+ public GTScanRange(GTRecord pkStart, GTRecord pkEnd) {
+ this(pkStart, pkEnd, null);
+ }
+
+ public GTScanRange(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> hbaseFuzzyKeys) {
+ assert pkStart.info == pkEnd.info;
+ assert pkStart.maskForEqualHashComp() == pkStart.info.primaryKey;
+ assert pkEnd.maskForEqualHashComp() == pkEnd.info.primaryKey;
+ this.pkStart = pkStart;
+ this.pkEnd = pkEnd;
+ this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord>emptyList() : hbaseFuzzyKeys;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((hbaseFuzzyKeys == null) ? 0 : hbaseFuzzyKeys.hashCode());
+ result = prime * result + ((pkEnd == null) ? 0 : pkEnd.hashCode());
+ result = prime * result + ((pkStart == null) ? 0 : pkStart.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ GTScanRange other = (GTScanRange) obj;
+ if (hbaseFuzzyKeys == null) {
+ if (other.hbaseFuzzyKeys != null)
+ return false;
+ } else if (!hbaseFuzzyKeys.equals(other.hbaseFuzzyKeys))
+ return false;
+ if (pkEnd == null) {
+ if (other.pkEnd != null)
+ return false;
+ } else if (!pkEnd.equals(other.pkEnd))
+ return false;
+ if (pkStart == null) {
+ if (other.pkStart != null)
+ return false;
+ } else if (!pkStart.equals(other.pkStart))
+ return false;
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
new file mode 100644
index 0000000..cc58253
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
@@ -0,0 +1,474 @@
+package org.apache.kylin.storage.gridtable;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.IFilterCodeSystem;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class GTScanRangePlanner {
+
+ private static final int MAX_HBASE_FUZZY_KEYS = 100;
+
+ final private GTInfo info;
+ final private ComparatorEx<ByteArray> byteUnknownIsSmaller;
+ final private ComparatorEx<ByteArray> byteUnknownIsBigger;
+ final private ComparatorEx<GTRecord> recordUnknownIsSmaller;
+ final private ComparatorEx<GTRecord> recordUnknownIsBigger;
+
+ public GTScanRangePlanner(GTInfo info) {
+ this.info = info;
+
+ IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
+ this.byteUnknownIsSmaller = byteComparatorTreatsUnknownSmaller(cs);
+ this.byteUnknownIsBigger = byteComparatorTreatsUnknownBigger(cs);
+ this.recordUnknownIsSmaller = recordComparatorTreatsUnknownSmaller(cs);
+ this.recordUnknownIsBigger = recordComparatorTreatsUnknownBigger(cs);
+ }
+
+ public List<GTScanRange> planScanRanges(TupleFilter filter, int maxRanges) {
+
+ TupleFilter flatFilter = flattenToOrAndFilter(filter);
+
+ List<Collection<ColumnRange>> orAndDimRanges = translateToOrAndDimRanges(flatFilter);
+
+ List<GTScanRange> scanRanges = Lists.newArrayListWithCapacity(orAndDimRanges.size());
+ for (Collection<ColumnRange> andDimRanges : orAndDimRanges) {
+ GTScanRange scanRange = newScanRange(andDimRanges);
+ scanRanges.add(scanRange);
+ }
+
+ List<GTScanRange> mergedRanges = mergeOverlapRanges(scanRanges);
+ mergedRanges = mergeTooManyRanges(mergedRanges, maxRanges);
+
+ return mergedRanges;
+ }
+
+ private GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) {
+ GTRecord pkStart = new GTRecord(info);
+ GTRecord pkEnd = new GTRecord(info);
+ List<GTRecord> hbaseFuzzyKeys = Lists.newArrayList();
+
+ for (ColumnRange range : andDimRanges) {
+ int col = range.column.getColumn().getZeroBasedIndex();
+ if (info.primaryKey.get(col) == false)
+ continue;
+
+ pkStart.set(col, range.begin);
+ pkEnd.set(col, range.end);
+
+ BitSet fuzzyMask = new BitSet();
+ fuzzyMask.set(col);
+ for (ByteArray v : range.equals) {
+ GTRecord fuzzy = new GTRecord(info);
+ fuzzy.set(col, v);
+ fuzzy.maskForEqualHashComp(fuzzyMask);
+ hbaseFuzzyKeys.add(fuzzy);
+ }
+ }
+
+ pkStart.maskForEqualHashComp(info.primaryKey);
+ pkEnd.maskForEqualHashComp(info.primaryKey);
+ return new GTScanRange(pkStart, pkEnd, hbaseFuzzyKeys);
+ }
+
+ private TupleFilter flattenToOrAndFilter(TupleFilter filter) {
+ if (filter == null)
+ return null;
+
+ TupleFilter flatFilter = filter.flatFilter();
+
+ // normalize to OR-AND filter
+ if (flatFilter.getOperator() == FilterOperatorEnum.AND) {
+ LogicalTupleFilter f = new LogicalTupleFilter(FilterOperatorEnum.OR);
+ f.addChild(flatFilter);
+ flatFilter = f;
+ }
+
+ if (flatFilter.getOperator() != FilterOperatorEnum.OR)
+ throw new IllegalStateException();
+
+ return flatFilter;
+ }
+
+ private List<Collection<ColumnRange>> translateToOrAndDimRanges(TupleFilter flatFilter) {
+ List<Collection<ColumnRange>> result = Lists.newArrayList();
+
+ if (flatFilter == null) {
+ result.add(Collections.<ColumnRange> emptyList());
+ return result;
+ }
+
+ for (TupleFilter andFilter : flatFilter.getChildren()) {
+ if (andFilter.getOperator() != FilterOperatorEnum.AND)
+ throw new IllegalStateException("Filter should be AND instead of " + andFilter);
+
+ Collection<ColumnRange> andRanges = translateToAndDimRanges(andFilter.getChildren());
+ result.add(andRanges);
+ }
+
+ return preprocessConstantConditions(result);
+ }
+
+ private Collection<ColumnRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters) {
+ Map<TblColRef, ColumnRange> rangeMap = new HashMap<TblColRef, ColumnRange>();
+ for (TupleFilter filter : andFilters) {
+ if ((filter instanceof CompareTupleFilter) == false) {
+ continue;
+ }
+
+ CompareTupleFilter comp = (CompareTupleFilter) filter;
+ if (comp.getColumn() == null) {
+ continue;
+ }
+
+ @SuppressWarnings("unchecked")
+ ColumnRange newRange = new ColumnRange(comp.getColumn(), (Set<ByteArray>) comp.getValues(), comp.getOperator());
+ ColumnRange existing = rangeMap.get(newRange.column);
+ if (existing == null) {
+ rangeMap.put(newRange.column, newRange);
+ } else {
+ existing.andMerge(newRange);
+ }
+ }
+ return rangeMap.values();
+ }
+
+ private List<Collection<ColumnRange>> preprocessConstantConditions(List<Collection<ColumnRange>> orAndRanges) {
+ boolean globalAlwaysTrue = false;
+ Iterator<Collection<ColumnRange>> iterator = orAndRanges.iterator();
+ while (iterator.hasNext()) {
+ Collection<ColumnRange> andRanges = iterator.next();
+ Iterator<ColumnRange> iterator2 = andRanges.iterator();
+ boolean hasAlwaysFalse = false;
+ while (iterator2.hasNext()) {
+ ColumnRange range = iterator2.next();
+ if (range.satisfyAll())
+ iterator2.remove();
+ else if (range.satisfyNone())
+ hasAlwaysFalse = true;
+ }
+ if (hasAlwaysFalse) {
+ iterator.remove();
+ } else if (andRanges.isEmpty()) {
+ globalAlwaysTrue = true;
+ break;
+ }
+ }
+ if (globalAlwaysTrue) {
+ orAndRanges.clear();
+ orAndRanges.add(Collections.<ColumnRange> emptyList());
+ }
+ return orAndRanges;
+ }
+
+ private List<GTScanRange> mergeOverlapRanges(List<GTScanRange> ranges) {
+ if (ranges.size() <= 1) {
+ return ranges;
+ }
+
+ // sort ranges by start key
+ Collections.sort(ranges, new Comparator<GTScanRange>() {
+ @Override
+ public int compare(GTScanRange a, GTScanRange b) {
+ return recordUnknownIsSmaller.compare(a.pkStart, b.pkStart);
+ }
+ });
+
+ // merge the overlap range
+ List<GTScanRange> mergedRanges = new ArrayList<GTScanRange>();
+ int mergeBeginIndex = 0;
+ GTRecord mergeEnd = ranges.get(0).pkEnd;
+ for (int index = 0; index < ranges.size(); index++) {
+ GTScanRange range = ranges.get(index);
+
+ // if overlap, swallow it
+ if (recordUnknownIsSmaller.min(range.pkStart, mergeEnd) == range.pkStart //
+ || recordUnknownIsBigger.max(mergeEnd, range.pkStart) == mergeEnd) {
+ mergeEnd = recordUnknownIsBigger.max(mergeEnd, range.pkEnd);
+ continue;
+ }
+
+ // not overlap, split here
+ GTScanRange mergedRange = mergeKeyRange(ranges.subList(mergeBeginIndex, index));
+ mergedRanges.add(mergedRange);
+
+ // start new split
+ mergeBeginIndex = index;
+ mergeEnd = recordUnknownIsBigger.max(mergeEnd, range.pkEnd);
+ }
+
+ // don't miss the last range
+ GTScanRange mergedRange = mergeKeyRange(ranges.subList(mergeBeginIndex, ranges.size()));
+ mergedRanges.add(mergedRange);
+
+ return mergedRanges;
+ }
+
+ private GTScanRange mergeKeyRange(List<GTScanRange> ranges) {
+ GTScanRange first = ranges.get(0);
+ if (ranges.size() == 1)
+ return first;
+
+ GTRecord start = first.pkStart;
+ GTRecord end = first.pkEnd;
+ List<GTRecord> newFuzzyKeys = new ArrayList<GTRecord>();
+
+ boolean hasNonFuzzyRange = false;
+ for (GTScanRange range : ranges) {
+ hasNonFuzzyRange = hasNonFuzzyRange || range.hbaseFuzzyKeys.isEmpty();
+ newFuzzyKeys.addAll(range.hbaseFuzzyKeys);
+ end = recordUnknownIsBigger.max(end, range.pkEnd);
+ }
+
+ // if any range is non-fuzzy, then all fuzzy keys must be cleared
+ // also too many fuzzy keys will slow down HBase scan
+ if (hasNonFuzzyRange || newFuzzyKeys.size() > MAX_HBASE_FUZZY_KEYS) {
+ newFuzzyKeys.clear();
+ }
+
+ return new GTScanRange(start, end, newFuzzyKeys);
+ }
+
+ private List<GTScanRange> mergeTooManyRanges(List<GTScanRange> ranges, int maxRanges) {
+ if (ranges.size() < maxRanges) {
+ return ranges;
+ }
+
+ // TODO: check the distance between range and merge the large distance range
+ List<GTScanRange> result = new ArrayList<GTScanRange>(1);
+ GTScanRange mergedRange = mergeKeyRange(ranges);
+ result.add(mergedRange);
+ return result;
+ }
+
+ private class ColumnRange {
+ private TblColRef column;
+ private ByteArray begin = new ByteArray();
+ private ByteArray end = new ByteArray();
+ private Set<ByteArray> equals;
+
+ public ColumnRange(TblColRef column, Set<ByteArray> values, FilterOperatorEnum op) {
+ this.column = column;
+
+ switch (op) {
+ case EQ:
+ case IN:
+ equals = new HashSet<ByteArray>(values);
+ refreshBeginEndFromEquals();
+ break;
+ case LT:
+ case LTE:
+ end = byteUnknownIsBigger.max(values);
+ break;
+ case GT:
+ case GTE:
+ begin = byteUnknownIsSmaller.min(values);
+ break;
+ case NEQ:
+ case NOTIN:
+ case ISNULL:
+ case ISNOTNULL:
+ // let Optiq filter it!
+ break;
+ default:
+ throw new UnsupportedOperationException(op.name());
+ }
+ }
+
+ void copy(TblColRef column, ByteArray beginValue, ByteArray endValue, Set<ByteArray> equalValues) {
+ this.column = column;
+ this.begin = beginValue;
+ this.end = endValue;
+ this.equals = equalValues;
+ }
+
+ private void refreshBeginEndFromEquals() {
+ this.begin = byteUnknownIsSmaller.min(this.equals);
+ this.end = byteUnknownIsBigger.max(this.equals);
+ }
+
+ public boolean satisfyAll() {
+ return begin.array() == null && end.array() == null; // the NEQ case
+ }
+
+ public boolean satisfyNone() {
+ if (equals != null) {
+ return equals.isEmpty();
+ } else if (begin.array() != null && end.array() != null) {
+ return info.codeSystem.getFilterCodeSystem().compare(begin, end) > 0;
+ } else {
+ return false;
+ }
+ }
+
+ public void andMerge(ColumnRange another) {
+ assert this.column.equals(another.column);
+
+ if (another.satisfyAll()) {
+ return;
+ }
+
+ if (this.satisfyAll()) {
+ copy(another.column, another.begin, another.end, another.equals);
+ return;
+ }
+
+ if (this.equals != null && another.equals != null) {
+ this.equals.retainAll(another.equals);
+ refreshBeginEndFromEquals();
+ return;
+ }
+
+ if (this.equals != null) {
+ this.equals = filter(this.equals, another.begin, another.end);
+ refreshBeginEndFromEquals();
+ return;
+ }
+
+ if (another.equals != null) {
+ this.equals = filter(another.equals, this.begin, this.end);
+ refreshBeginEndFromEquals();
+ return;
+ }
+
+ this.begin = byteUnknownIsSmaller.min(this.begin, another.begin);
+ this.end = byteUnknownIsBigger.max(this.end, another.end);
+ }
+
+ private Set<ByteArray> filter(Set<ByteArray> equalValues, ByteArray beginValue, ByteArray endValue) {
+ Set<ByteArray> result = Sets.newHashSetWithExpectedSize(equalValues.size());
+ for (ByteArray v : equalValues) {
+ if (byteUnknownIsSmaller.compare(beginValue, v) <= 0 && byteUnknownIsBigger.compare(v, endValue) <= 0) {
+ result.add(v);
+ }
+ }
+ return equalValues;
+ }
+
+ public String toString() {
+ if (equals == null) {
+ return column.getName() + " between " + begin + " and " + end;
+ } else {
+ return column.getName() + " in " + equals;
+ }
+ }
+ }
+
+ public static abstract class ComparatorEx<T> implements Comparator<T> {
+
+ public T min(Collection<T> v) {
+ if (v.size() < 0) {
+ return null;
+ }
+
+ Iterator<T> iterator = v.iterator();
+ T min = iterator.next();
+ while (iterator.hasNext()) {
+ min = min(min, iterator.next());
+ }
+ return min;
+ }
+
+ public T max(Collection<T> v) {
+ if (v.size() < 0) {
+ return null;
+ }
+
+ Iterator<T> iterator = v.iterator();
+ T max = iterator.next();
+ while (iterator.hasNext()) {
+ max = max(max, iterator.next());
+ }
+ return max;
+ }
+
+ public T min(T a, T b) {
+ return compare(a, b) <= 0 ? a : b;
+ }
+
+ public T max(T a, T b) {
+ return compare(a, b) >= 0 ? a : b;
+ }
+
+ public boolean between(T v, T start, T end) {
+ return compare(start, v) <= 0 && compare(v, end) <= 0;
+ }
+ }
+
+ public static ComparatorEx<ByteArray> byteComparatorTreatsUnknownSmaller(final IFilterCodeSystem<ByteArray> cs) {
+ return new ComparatorEx<ByteArray>() {
+ @Override
+ public int compare(ByteArray a, ByteArray b) {
+ if (a.array() == null)
+ return -1;
+ else if (b.array() == null)
+ return 1;
+ else
+ return cs.compare(a, b);
+ }
+ };
+ }
+
+ public static ComparatorEx<ByteArray> byteComparatorTreatsUnknownBigger(final IFilterCodeSystem<ByteArray> cs) {
+ return new ComparatorEx<ByteArray>() {
+ @Override
+ public int compare(ByteArray a, ByteArray b) {
+ if (a.array() == null)
+ return 1;
+ else if (b.array() == null)
+ return -1;
+ else
+ return cs.compare(a, b);
+ }
+ };
+ }
+
+ public static ComparatorEx<GTRecord> recordComparatorTreatsUnknownSmaller(IFilterCodeSystem<ByteArray> cs) {
+ return new RecordComparator(byteComparatorTreatsUnknownSmaller(cs));
+ }
+
+ public static ComparatorEx<GTRecord> recordComparatorTreatsUnknownBigger(IFilterCodeSystem<ByteArray> cs) {
+ return new RecordComparator(byteComparatorTreatsUnknownBigger(cs));
+ }
+
+ private static class RecordComparator extends ComparatorEx<GTRecord> {
+ final ComparatorEx<ByteArray> comparator;
+
+ RecordComparator(ComparatorEx<ByteArray> byteComparator) {
+ this.comparator = byteComparator;
+ }
+
+ @Override
+ public int compare(GTRecord a, GTRecord b) {
+ assert a.info == b.info;
+ assert a.maskForEqualHashComp() == b.maskForEqualHashComp();
+ BitSet mask = a.maskForEqualHashComp();
+
+ int comp = 0;
+ for (int i = mask.nextSetBit(0); i >= 0; i = mask.nextSetBit(i + 1)) {
+ comp = comparator.compare(a.cols[i], b.cols[i]);
+ if (comp != 0)
+ return comp;
+ }
+ return 0; // equals
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
index 977363c..c92cba4 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
@@ -12,8 +12,7 @@ public class GTScanRequest {
// basic
private GTInfo info;
- private GTRecord pkStart; // inclusive
- private GTRecord pkEnd; // inclusive
+ private GTScanRange range;
private BitSet columns;
// optional filtering
@@ -25,23 +24,21 @@ public class GTScanRequest {
private String[] aggrMetricsFuncs;
public GTScanRequest(GTInfo info) {
- this(info, null, null, null, null);
+ this(info, null, null, null);
}
- public GTScanRequest(GTInfo info, GTRecord pkStart, GTRecord pkEnd, BitSet columns, TupleFilter filterPushDown) {
+ public GTScanRequest(GTInfo info, GTScanRange range, BitSet columns, TupleFilter filterPushDown) {
this.info = info;
- this.pkStart = pkStart;
- this.pkEnd = pkEnd;
+ this.range = range;
this.columns = columns;
this.filterPushDown = filterPushDown;
validate();
}
- public GTScanRequest(GTInfo info, GTRecord pkStart, GTRecord pkEnd, BitSet aggrGroupBy, BitSet aggrMetrics, //
+ public GTScanRequest(GTInfo info, GTScanRange range, BitSet aggrGroupBy, BitSet aggrMetrics, //
String[] aggrMetricsFuncs, TupleFilter filterPushDown) {
this.info = info;
- this.pkStart = pkStart;
- this.pkEnd = pkEnd;
+ this.range = range;
this.columns = new BitSet();
this.filterPushDown = filterPushDown;
@@ -53,6 +50,9 @@ public class GTScanRequest {
}
private void validate() {
+ if (range == null)
+ range = new GTScanRange(null, null);
+
if (columns == null)
columns = (BitSet) info.colAll.clone();
@@ -111,11 +111,11 @@ public class GTScanRequest {
}
public GTRecord getPkStart() {
- return pkStart;
+ return range.pkStart;
}
public GTRecord getPkEnd() {
- return pkEnd;
+ return range.pkEnd;
}
public BitSet getColumns() {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
index e2071d6..1a69138 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
@@ -80,7 +80,7 @@ public class GridTableTest {
}
private IGTScanner scanAndAggregate(GridTable table) throws IOException {
- GTScanRequest req = new GTScanRequest(table.getInfo(), null, null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
+ GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
IGTScanner scanner = table.scan(req);
int i = 0;
for (GTRecord r : scanner) {
[23/50] incubator-kylin git commit: KYLIN-625,
bug fix about dealing rounding when convert filter
Posted by li...@apache.org.
KYLIN-625, bug fix about dealing rounding when convert filter
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b38206db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b38206db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b38206db
Branch: refs/heads/streaming-localdict
Commit: b38206db488e6aef66a4e224adf0acebd56612b7
Parents: 7f73abe
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 11:58:31 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 11:58:31 2015 +0800
----------------------------------------------------------------------
.../gridtable/GTDictionaryCodeSystem.java | 16 +++++++++---
.../apache/kylin/storage/gridtable/GTUtil.java | 26 ++++++++++----------
.../kylin/storage/gridtable/IGTCodeSystem.java | 25 +++++++++++++------
3 files changed, 44 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index 6f2d9ce..ada4ed7 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -83,7 +83,12 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
@Override
public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
- throw new UnsupportedOperationException();
+ DataTypeSerializer serializer = serializers[col];
+ if (serializer instanceof DictionarySerializer) {
+ ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf);
+ } else {
+ serializer.serialize(value, buf);
+ }
}
@Override
@@ -103,10 +108,15 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
this.dictionary = dictionary;
}
+ public void serializeWithRounding(Object value, int roundingFlag, ByteBuffer buf) {
+ int id = dictionary.getIdFromValue(value, roundingFlag);
+ BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf);
+ }
+
@Override
- public void serialize(Object value, ByteBuffer out) {
+ public void serialize(Object value, ByteBuffer buf) {
int id = dictionary.getIdFromValue(value);
- BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), out);
+ BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index cf970b1..1fb0376 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -122,7 +122,7 @@ public class GTUtil {
Set newValues = Sets.newHashSet();
for (Object value : constValues) {
code = translate(col, value, 0);
- if (!isDictNull(code))
+ if (code != null)
newValues.add(code);
}
if (newValues.isEmpty()) {
@@ -134,7 +134,7 @@ public class GTUtil {
break;
case NEQ:
code = translate(col, firstValue, 0);
- if (isDictNull(code)) {
+ if (code == null) {
result = ConstantTupleFilter.TRUE;
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -143,7 +143,7 @@ public class GTUtil {
break;
case LT:
code = translate(col, firstValue, 1);
- if (isDictNull(code)) {
+ if (code == null) {
result = ConstantTupleFilter.TRUE;
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -152,7 +152,7 @@ public class GTUtil {
break;
case LTE:
code = translate(col, firstValue, -1);
- if (isDictNull(code)) {
+ if (code == null) {
result = ConstantTupleFilter.FALSE;
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -161,7 +161,7 @@ public class GTUtil {
break;
case GT:
code = translate(col, firstValue, -1);
- if (isDictNull(code)) {
+ if (code == null) {
result = ConstantTupleFilter.TRUE;
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -170,7 +170,7 @@ public class GTUtil {
break;
case GTE:
code = translate(col, firstValue, 1);
- if (isDictNull(code)) {
+ if (code == null) {
result = ConstantTupleFilter.FALSE;
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -183,16 +183,16 @@ public class GTUtil {
return result;
}
- private boolean isDictNull(ByteArray code) {
- return info.codeSystem.getFilterCodeSystem().isNull(code);
- }
-
transient ByteBuffer buf = ByteBuffer.allocate(info.maxRecordLength);
private ByteArray translate(int col, Object value, int roundingFlag) {
- buf.clear();
- info.codeSystem.encodeColumnValue(col, value, roundingFlag, buf);
- return ByteArray.copyOf(buf.array(), 0, buf.position());
+ try {
+ buf.clear();
+ info.codeSystem.encodeColumnValue(col, value, roundingFlag, buf);
+ return ByteArray.copyOf(buf.array(), 0, buf.position());
+ } catch (IllegalArgumentException ex) {
+ return null;
+ }
}
}, info.codeSystem.getFilterCodeSystem());
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
index 87b6643..4182604 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
@@ -12,19 +12,30 @@ public interface IGTCodeSystem {
IFilterCodeSystem<ByteArray> getFilterCodeSystem();
- /** return the length of code starting at the specified buffer, buffer position must not change after return */
+ /** Return the length of code starting at the specified buffer, buffer position must not change after return */
int codeLength(int col, ByteBuffer buf);
- /** encode a value into code */
- void encodeColumnValue(int col, Object value, ByteBuffer buf);
+ /**
+ * Encode a value into code.
+ *
+ * @throws IllegalArgumentException if the value is not in dictionary
+ */
+ void encodeColumnValue(int col, Object value, ByteBuffer buf) throws IllegalArgumentException;
- /** encode a value into code, with option to floor rounding -1, no rounding 0, or ceiling rounding 1 */
- void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf);
+ /**
+ * Encode a value into code, with option to floor rounding -1, no rounding 0, or ceiling rounding 1
+ *
+ * @throws IllegalArgumentException
+ * - if rounding=0 and the value is not in dictionary
+ * - if rounding=-1 and there's no equal or smaller value in dictionary
+ * - if rounding=1 and there's no equal or bigger value in dictionary
+ */
+ void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) throws IllegalArgumentException;
- /** decode a code into value */
+ /** Decode a code into value */
Object decodeColumnValue(int col, ByteBuffer buf);
- /** return an aggregator for metrics */
+ /** Return an aggregator for metrics */
MeasureAggregator<?> newMetricsAggregator(String aggrFunction, int col);
}
[16/50] incubator-kylin git commit: Small change.
Posted by li...@apache.org.
Small change.
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/0edf4004
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/0edf4004
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/0edf4004
Branch: refs/heads/streaming-localdict
Commit: 0edf4004a2125c01b2f3b8c151b899a03db4ae0c
Parents: 8d40a57
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Fri Mar 27 00:01:59 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Fri Mar 27 00:01:59 2015 +0800
----------------------------------------------------------------------
.../job/hadoop/cube/FactDistinctColumnsReducer.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0edf4004/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index e1529d3..165f66c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -42,10 +42,7 @@ import org.apache.kylin.job.hadoop.AbstractHadoopJob;
import org.apache.kylin.metadata.model.TblColRef;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
* @author yangli9
@@ -167,8 +164,13 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
out.write(msg.getBytes());
out.write('\n');
out.write('\n');
-
- for (long i = 0; i < baseCuboidId; i++) {
+
+ List<Long> allCuboids = new ArrayList<Long>();
+ allCuboids.addAll(rowKeyCountInCuboids.keySet());
+ Collections.sort(allCuboids);
+ for (long i : allCuboids) {
+ if (i > baseCuboidId)
+ continue;
msg = "Cuboid " + i + " has " + rowKeyCountInCuboids.get(i) + " rows.";
out.write(msg.getBytes());
out.write('\n');
[30/50] incubator-kylin git commit: KYLIN-653 use a FIFOIterable to
solve ConcurrentModificationException
Posted by li...@apache.org.
KYLIN-653 use a FIFOIterable to solve ConcurrentModificationException
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d09e00d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d09e00d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d09e00d6
Branch: refs/heads/streaming-localdict
Commit: d09e00d6c60e6d0e88c2512041c76456dd5fb64d
Parents: 4df0531
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 11:23:11 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/common/util/FIFOIterable.java | 20 ++++++++++++
.../apache/kylin/common/util/FIFOIterator.java | 34 ++++++++++++++++++++
.../org/apache/kylin/common/util/BasicTest.java | 12 +++++--
.../model/IIKeyValueCodecWithState.java | 6 ++--
.../IIKeyValueCodecWithStateTest.java | 16 ++++++---
5 files changed, 80 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java b/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
new file mode 100644
index 0000000..c0f7d68
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
@@ -0,0 +1,20 @@
+package org.apache.kylin.common.util;
+
+import java.util.Iterator;
+import java.util.Queue;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ */
+public class FIFOIterable<T> implements Iterable<T> {
+ private Queue<T> q;
+
+ public FIFOIterable(Queue<T> q) {
+ this.q = q;
+ }
+
+ @Override
+ public Iterator<T> iterator() {
+ return new FIFOIterator<T>(q);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java b/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
new file mode 100644
index 0000000..6751cb0
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
@@ -0,0 +1,34 @@
+package org.apache.kylin.common.util;
+
+import java.util.Iterator;
+import java.util.Queue;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ *
+ * Normal iterators in Collections are fail-safe,
+ * i.e. adding elements to a queue will break current iterator.
+ * The FIFOIterator is stateless, it only check the first element of a Queue
+ */
+public class FIFOIterator<T> implements Iterator<T> {
+ private Queue<T> q;
+
+ public FIFOIterator(Queue<T> q) {
+ this.q = q;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !q.isEmpty();
+ }
+
+ @Override
+ public T next() {
+ return q.poll();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
index a480ebd..0b92bf9 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
@@ -22,9 +22,10 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
-import java.util.Calendar;
+import java.util.*;
import java.util.concurrent.*;
+import com.google.common.collect.Lists;
import org.apache.commons.configuration.ConfigurationException;
import org.junit.Ignore;
import org.junit.Test;
@@ -75,7 +76,7 @@ public class BasicTest {
a.setTimeInMillis(current);
b.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY), a.get(Calendar.MINUTE));
- c.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY),0);
+ c.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY), 0);
System.out.println(time(b.getTimeInMillis()));
System.out.println(time(c.getTimeInMillis()));
@@ -85,6 +86,13 @@ public class BasicTest {
@Test
@Ignore("fix it later")
public void test2() throws IOException, ConfigurationException {
+ Queue<String> a = new LinkedList<>();
+ Iterator<String> i = new FIFOIterator<String>(a);
+ System.out.println(i.hasNext());
+ a.add("1");
+ System.out.println(i.hasNext());
+ System.out.println(i.next());
+
}
private static String time(long t) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index e838283..82f1020 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -2,8 +2,10 @@ package org.apache.kylin.invertedindex.model;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedList;
import com.google.common.base.Preconditions;
+import org.apache.kylin.common.util.FIFOIterator;
import org.apache.kylin.invertedindex.index.Slice;
import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
@@ -25,12 +27,12 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
protected static class IIRowDecoderWithState extends IIRowDecoder {
- final ArrayList<IIRow> buffer = Lists.newArrayList();
+ final LinkedList<IIRow> buffer = Lists.newLinkedList();
private Iterator<Slice> superIterator = null;
private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
super(digest, iiRowIterator);
- this.feedingIterator = buffer.iterator();
+ this.feedingIterator = new FIFOIterator<>(buffer);
}
private Iterator<Slice> getSuperIterator() {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
index 25e250c..416d31a 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -8,6 +8,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import javax.annotation.Nullable;
+import org.apache.kylin.common.util.FIFOIterable;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.invertedindex.IIInstance;
import org.apache.kylin.invertedindex.IIManager;
@@ -19,6 +20,7 @@ import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
import org.apache.kylin.invertedindex.model.IIRow;
import org.apache.kylin.invertedindex.model.KeyValueCodec;
import org.apache.kylin.streaming.Stream;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
@@ -38,7 +40,7 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
final String[] inputs = new String[] { //
"FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
- "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,2012-08-16,43479,10000807,26.2474,0", //
+ "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
"ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
@Before
@@ -64,14 +66,19 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
future.get();
}
+ @After
+ public void after() throws Exception {
+ cleanupTestMetadata();
+ }
+
@Test
public void basicTest() {
- ArrayList<IIRow> buffer = Lists.newArrayList();
+ Queue<IIRow> buffer = Lists.newLinkedList();
+ FIFOIterable bufferIterable = new FIFOIterable(buffer);
TableRecordInfo info = new TableRecordInfo(iiDesc);
TableRecordInfoDigest digest = info.getDigest();
- int columnCount = digest.getColumnCount();
KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
- Iterator<Slice> slices = codec.decodeKeyValue(buffer).iterator();
+ Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
Assert.assertTrue(!slices.hasNext());
Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
@@ -87,5 +94,6 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
}
Slice newSlice = slices.next();
+ Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
}
}
[40/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8e6afbf4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8e6afbf4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8e6afbf4
Branch: refs/heads/streaming-localdict
Commit: 8e6afbf44d5d4a96b0e55cf8c617fbcdb21a582e
Parents: 12920dc
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:50:50 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:50:50 2015 +0800
----------------------------------------------------------------------
.../invertedindex/ToyIIStreamBuilder.java | 36 ------
.../kylin/streaming/JsonStreamParser.java | 12 +-
.../apache/kylin/streaming/StreamParser.java | 4 +-
.../kylin/streaming/StringStreamParser.java | 2 +-
.../kylin/streaming/cube/CubeStreamBuilder.java | 2 +-
.../invertedindex/IIStreamBuilder.java | 120 +++++-------------
.../streaming/invertedindex/SliceBuilder.java | 126 +++++++++++++++++++
.../invertedindex/PrintOutStreamBuilder.java | 5 +-
8 files changed, 167 insertions(+), 140 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
deleted file mode 100644
index 3e2a892..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package org.apache.kylin.job.hadoop.invertedindex;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.streaming.Stream;
-import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- *
- * A IIStreamBuilder that can hold all the built slices in form of IIRow
- * This is only for test use
- */
-public class ToyIIStreamBuilder extends IIStreamBuilder {
- private List<IIRow> result;
-
- public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
- super(queue, null, desc, partitionId);
- this.result = result;
- }
-
- protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
- IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
- for (IIRow iiRow : codec.encodeKeyValue(slice)) {
- result.add(iiRow);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
index 5c8b49d..2912aa7 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
@@ -50,15 +50,17 @@ import java.util.Map;
*/
public final class JsonStreamParser implements StreamParser {
- public static final JsonStreamParser instance = new JsonStreamParser();
+ private final List<TblColRef> allColumns;
- private final JsonParser jsonParser = new JsonParser();
+ private static final JsonParser JSON_PARSER = new JsonParser();
- private JsonStreamParser(){}
+ public JsonStreamParser(List<TblColRef> allColumns){
+ this.allColumns = allColumns;
+ }
@Override
- public List<String> parse(Stream stream, List<TblColRef> allColumns) {
- final JsonObject root = jsonParser.parse(new String(stream.getRawData())).getAsJsonObject();
+ public List<String> parse(Stream stream) {
+ final JsonObject root = JSON_PARSER.parse(new String(stream.getRawData())).getAsJsonObject();
ArrayList<String> result = Lists.newArrayList();
for (TblColRef column : allColumns) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
index 9b41c95..c6b23ff 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
@@ -34,8 +34,6 @@
package org.apache.kylin.streaming;
-import org.apache.kylin.metadata.model.TblColRef;
-
import java.util.List;
/**
@@ -43,5 +41,5 @@ import java.util.List;
*/
public interface StreamParser {
- List<String> parse(Stream stream, List<TblColRef> allColumns);
+ List<String> parse(Stream stream);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
index 3c62a3a..4fb26fa 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
@@ -49,7 +49,7 @@ public final class StringStreamParser implements StreamParser {
private StringStreamParser(){}
@Override
- public List<String> parse(Stream stream, List<TblColRef> allColumns) {
+ public List<String> parse(Stream stream) {
return Lists.newArrayList(new String(stream.getRawData()).split(","));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 5c2efdc..ba3f495 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -403,7 +403,7 @@ public class CubeStreamBuilder extends StreamBuilder {
}
private List<String> parseStream(Stream stream, CubeDesc desc) {
- return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
+ return getStreamParser().parse(stream);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index 0cf3c77..72e23ff 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -34,36 +34,27 @@
package org.apache.kylin.streaming.invertedindex;
-import com.google.common.base.Function;
import com.google.common.base.Stopwatch;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.kylin.dict.Dictionary;
-import org.apache.kylin.dict.DictionaryGenerator;
-import org.apache.kylin.invertedindex.index.BatchSliceBuilder;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecord;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
import org.apache.kylin.invertedindex.model.IIDesc;
import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.streaming.Stream;
import org.apache.kylin.streaming.StreamBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import javax.annotation.Nullable;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
@@ -75,98 +66,36 @@ public class IIStreamBuilder extends StreamBuilder {
private static Logger logger = LoggerFactory.getLogger(IIStreamBuilder.class);
private final IIDesc desc;
+ private final IIInstance ii;
private final HTableInterface hTable;
- private final BatchSliceBuilder sliceBuilder;
-
- public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
- super(queue, desc.getSliceSize());
- this.desc = desc;
+ private final SliceBuilder sliceBuilder;
+ private final int partitionId;
+
+ public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIInstance iiInstance, int partitionId) {
+ super(queue, iiInstance.getDescriptor().getSliceSize());
+ this.ii = iiInstance;
+ this.desc = iiInstance.getDescriptor();
+ this.partitionId = partitionId;
try {
- if (hTableName != null) {
- this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
- } else {
- this.hTable = null;
- }
+ this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
} catch (IOException e) {
logger.error("cannot open htable name:" + hTableName, e);
throw new RuntimeException("cannot open htable name:" + hTableName, e);
}
- sliceBuilder = new BatchSliceBuilder(desc, (short) partitionId);
+ sliceBuilder = new SliceBuilder(desc, (short) partitionId);
}
@Override
protected void build(List<Stream> streamsToBuild) throws IOException {
logger.info("stream build start, size:" + streamsToBuild.size());
Stopwatch stopwatch = new Stopwatch().start();
- List<List<String>> table = Lists.transform(streamsToBuild, new Function<Stream, List<String>>() {
- @Nullable
- @Override
- public List<String> apply(@Nullable Stream input) {
- return parseStream(input, desc);
- }
- });
- final Map<Integer, Dictionary<?>> dictionaryMap = buildDictionary(table, desc);
- TableRecordInfo tableRecordInfo = new TableRecordInfo(desc, dictionaryMap);
- final Slice slice = buildSlice(table, sliceBuilder, tableRecordInfo, dictionaryMap);
+ final Slice slice = sliceBuilder.buildSlice(streamsToBuild, getStreamParser());
logger.info("slice info, shard:" + slice.getShard() + " timestamp:" + slice.getTimestamp() + " record count:" + slice.getRecordCount());
- outputSlice(slice, tableRecordInfo);
- submitOffset();
-
+ loadToHBase(hTable, slice, new IIKeyValueCodec(slice.getInfo()));
+ submitOffset(0);
stopwatch.stop();
- logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + TimeUnit.MILLISECONDS);
- }
-
- protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
- loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
- }
-
- private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
- HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
- final List<TblColRef> allColumns = desc.listAllColumns();
- for (List<String> row : table) {
- for (int i = 0; i < row.size(); i++) {
- String cell = row.get(i);
- if (!desc.isMetricsCol(i)) {
- valueMap.put(allColumns.get(i), cell);
- }
- }
- }
-
- Map<Integer, Dictionary<?>> result = Maps.newHashMap();
- for (TblColRef tblColRef : valueMap.keySet()) {
- result.put(desc.findColumn(tblColRef), //
- DictionaryGenerator.buildDictionaryFromValueList(//
- tblColRef.getType(), //
- Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
- @Nullable
- @Override
- public byte[] apply(String input) {
- return input.getBytes();
- }
- })));
- }
- return result;
- }
-
- private List<String> parseStream(Stream stream, IIDesc desc) {
- return getStreamParser().parse(stream, desc.listAllColumns());
- }
-
- private Slice buildSlice(List<List<String>> table, BatchSliceBuilder sliceBuilder, final TableRecordInfo tableRecordInfo, Map<Integer, Dictionary<?>> localDictionary) {
- final Slice slice = sliceBuilder.build(tableRecordInfo.getDigest(), Lists.transform(table, new Function<List<String>, TableRecord>() {
- @Nullable
- @Override
- public TableRecord apply(@Nullable List<String> input) {
- TableRecord result = tableRecordInfo.createTableRecord();
- for (int i = 0; i < input.size(); i++) {
- result.setValueString(i, input.get(i));
- }
- return result;
- }
- }));
- slice.setLocalDictionaries(localDictionary);
- return slice;
+ logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + " " + TimeUnit.MILLISECONDS);
}
private void loadToHBase(HTableInterface hTable, Slice slice, IIKeyValueCodec codec) throws IOException {
@@ -192,8 +121,17 @@ public class IIStreamBuilder extends StreamBuilder {
}
}
- private void submitOffset() {
-
+ private void submitOffset(long offset) {
+ final IIManager iiManager = IIManager.getInstance(KylinConfig.getInstanceFromEnv());
+ final IIInstance instance = iiManager.getII(ii.getName());
+ instance.getStreamOffsets().set(partitionId, offset);
+ try {
+ iiManager.updateII(instance);
+ logger.info("submit offset");
+ } catch (IOException e) {
+ logger.error("error submit offset: + " + offset, e);
+ throw new RuntimeException(e);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
new file mode 100644
index 0000000..ac2ce0f
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
@@ -0,0 +1,126 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming.invertedindex;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.DictionaryGenerator;
+import org.apache.kylin.invertedindex.index.BatchSliceBuilder;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecord;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.StreamParser;
+
+import javax.annotation.Nullable;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by qianzhou on 3/27/15.
+ */
+public final class SliceBuilder {
+
+ public SliceBuilder(IIDesc desc, short shard){
+ this.iiDesc = desc;
+ this.sliceBuilder = new BatchSliceBuilder(desc, shard);
+ }
+
+ private final BatchSliceBuilder sliceBuilder;
+ private final IIDesc iiDesc;
+
+ public Slice buildSlice(List<Stream> streams, final StreamParser streamParser) {
+ List<List<String>> table = Lists.transform(streams, new Function<Stream, List<String>>() {
+ @Nullable
+ @Override
+ public List<String> apply(@Nullable Stream input) {
+ return streamParser.parse(input);
+ }
+ });
+ final Map<Integer, Dictionary<?>> dictionaryMap = buildDictionary(table, iiDesc);
+ TableRecordInfo tableRecordInfo = new TableRecordInfo(iiDesc, dictionaryMap);
+ return build(table, sliceBuilder, tableRecordInfo, dictionaryMap);
+ }
+
+ private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
+ HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
+ final List<TblColRef> allColumns = desc.listAllColumns();
+ for (List<String> row : table) {
+ for (int i = 0; i < row.size(); i++) {
+ String cell = row.get(i);
+ if (!desc.isMetricsCol(i)) {
+ valueMap.put(allColumns.get(i), cell);
+ }
+ }
+ }
+
+ Map<Integer, Dictionary<?>> result = Maps.newHashMap();
+ for (TblColRef tblColRef : valueMap.keySet()) {
+ final Collection<byte[]> bytes = Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
+ @Nullable
+ @Override
+ public byte[] apply(String input) {
+ return input.getBytes();
+ }
+ });
+ final Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(tblColRef.getType(), bytes);
+ result.put(desc.findColumn(tblColRef), dict);
+ }
+ return result;
+ }
+
+ private Slice build(List<List<String>> table, BatchSliceBuilder sliceBuilder, final TableRecordInfo tableRecordInfo, Map<Integer, Dictionary<?>> localDictionary) {
+ final Slice slice = sliceBuilder.build(tableRecordInfo.getDigest(), Lists.transform(table, new Function<List<String>, TableRecord>() {
+ @Nullable
+ @Override
+ public TableRecord apply(@Nullable List<String> input) {
+ TableRecord result = tableRecordInfo.createTableRecord();
+ for (int i = 0; i < input.size(); i++) {
+ result.setValueString(i, input.get(i));
+ }
+ return result;
+ }
+ }));
+ slice.setLocalDictionaries(localDictionary);
+ return slice;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
index e83bdc5..e5873c0 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
@@ -40,7 +40,6 @@ import org.apache.kylin.streaming.JsonStreamParser;
import org.apache.kylin.streaming.Stream;
import org.apache.kylin.streaming.StreamBuilder;
-import java.util.Collection;
import java.util.List;
import java.util.concurrent.BlockingQueue;
@@ -53,14 +52,14 @@ public class PrintOutStreamBuilder extends StreamBuilder {
public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, List<TblColRef> allColumns) {
super(streamQueue, sliceSize);
- setStreamParser(JsonStreamParser.instance);
+ setStreamParser(new JsonStreamParser(allColumns));
this.allColumns = allColumns;
}
@Override
protected void build(List<Stream> streamsToBuild) throws Exception {
for (Stream stream : streamsToBuild) {
- final List<String> row = getStreamParser().parse(stream, allColumns);
+ final List<String> row = getStreamParser().parse(stream);
System.out.println("offset:" + stream.getOffset() + " " + StringUtils.join(row, ","));
}
}
[06/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9a1c4cb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9a1c4cb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9a1c4cb6
Branch: refs/heads/streaming-localdict
Commit: 9a1c4cb6b3dcb967ab017c23de76cb910a103cb9
Parents: 71324f4
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 17:49:39 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 17:49:39 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/invertedindex/IIInstance.java | 12 +++
.../apache/kylin/invertedindex/IIManager.java | 17 ++--
.../invertedindex/index/BatchSliceBuilder.java | 6 +-
.../org/apache/kylin/streaming/KafkaConfig.java | 22 ++--
.../apache/kylin/streaming/KafkaConsumer.java | 17 ++--
.../kylin/streaming/StreamingBootstrap.java | 102 +++++++++++++++++++
.../apache/kylin/streaming/StreamingCLI.java | 70 +++++++++++++
.../kylin/streaming/KafkaConsumerTest.java | 2 +-
8 files changed, 210 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
index 7684699..fd300e0 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonManagedReference;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Lists;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.RootPersistentEntity;
@@ -79,6 +80,9 @@ public class IIInstance extends RootPersistentEntity implements IRealization {
@JsonProperty("segments")
private List<IISegment> segments = new ArrayList<IISegment>();
+ @JsonProperty("stream_offset")
+ private List<Long> streamOffsets = Lists.newArrayList();
+
@JsonProperty("create_time_utc")
private long createTimeUTC;
@@ -357,4 +361,12 @@ public class IIInstance extends RootPersistentEntity implements IRealization {
public void setCost(int cost) {
this.cost = cost;
}
+
+ public List<Long> getStreamOffsets() {
+ return streamOffsets;
+ }
+
+ public void setStreamOffsets(List<Long> streamOffsets) {
+ this.streamOffsets = streamOffsets;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
index 6ebfbf8..b086d5d 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
@@ -240,19 +240,18 @@ public class IIManager implements IRealizationProvider {
}
private String generateStorageLocation() {
- String namePrefix = IRealizationConstants.IIHbaseStorageLocationPrefix;
- String tableName = "";
- do {
- StringBuffer sb = new StringBuffer();
- sb.append(namePrefix);
+ while (true) {
+ StringBuilder sb = new StringBuilder(IRealizationConstants.IIHbaseStorageLocationPrefix);
for (int i = 0; i < HBASE_TABLE_LENGTH; i++) {
int idx = (int) (Math.random() * ALPHA_NUM.length());
sb.append(ALPHA_NUM.charAt(idx));
}
- tableName = sb.toString();
- } while (this.usedStorageLocation.contains(tableName));
-
- return tableName;
+ if (usedStorageLocation.contains(sb.toString())) {
+ continue;
+ } else {
+ return sb.toString();
+ }
+ }
}
private void loadAllIIInstance() throws IOException {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
index 94b70c1..6ba328c 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
@@ -86,11 +86,7 @@ public class BatchSliceBuilder {
}
private long increaseSliceTimestamp(long timestamp) {
- if (timestamp < sliceTimestamp) {
- throw new IllegalStateException();
- }
-
- if (timestamp == sliceTimestamp) {
+ if (timestamp <= sliceTimestamp) {
return ++timestamp; // ensure slice timestamp increases
} else {
return timestamp;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
index b22c7e0..5194e9d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
@@ -76,10 +76,8 @@ public class KafkaConfig extends RootPersistentEntity {
@JsonProperty("bufferSize")
private int bufferSize;
- @JsonProperty("iiDesc")
- private String iiDesc;
-
- private int partitionId;
+ @JsonProperty("iiName")
+ private String iiName;
public int getTimeout() {
return timeout;
@@ -121,14 +119,6 @@ public class KafkaConfig extends RootPersistentEntity {
this.topic = topic;
}
- public int getPartitionId() {
- return partitionId;
- }
-
- public void setPartitionId(int partitionId) {
- this.partitionId = partitionId;
- }
-
public void setBrokerConfigs(List<BrokerConfig> brokerConfigs) {
this.brokerConfigs = brokerConfigs;
}
@@ -143,6 +133,14 @@ public class KafkaConfig extends RootPersistentEntity {
});
}
+ public String getIiName() {
+ return iiName;
+ }
+
+ public void setIiName(String iiName) {
+ this.iiName = iiName;
+ }
+
public String getName() {
return name;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 42a0f1f..910041c 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -58,15 +58,16 @@ public abstract class KafkaConsumer implements Runnable {
private KafkaConfig kafkaConfig;
private List<Broker> replicaBrokers;
- private AtomicLong offset = new AtomicLong();
+ private long offset;
private BlockingQueue<Stream> streamQueue;
private Logger logger;
- public KafkaConsumer(String topic, int partitionId, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
+ public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
this.topic = topic;
this.partitionId = partitionId;
this.kafkaConfig = kafkaConfig;
+ offset = startOffset;
this.replicaBrokers = initialBrokers;
logger = LoggerFactory.getLogger("KafkaConsumer_" + topic + "_" + partitionId);
streamQueue = new ArrayBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
@@ -90,12 +91,6 @@ public abstract class KafkaConsumer implements Runnable {
public void run() {
try {
Broker leadBroker = getLeadBroker();
- if (leadBroker == null) {
- logger.warn("cannot find lead broker");
- } else {
- final long lastOffset = KafkaRequester.getLastOffset(topic, partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
- offset.set(lastOffset);
- }
while (true) {
if (leadBroker == null) {
leadBroker = getLeadBroker();
@@ -105,9 +100,9 @@ public abstract class KafkaConsumer implements Runnable {
continue;
}
- final FetchResponse fetchResponse = KafkaRequester.fetchResponse(topic, partitionId, offset.get(), leadBroker, kafkaConfig);
+ final FetchResponse fetchResponse = KafkaRequester.fetchResponse(topic, partitionId, offset, leadBroker, kafkaConfig);
if (fetchResponse.errorCode(topic, partitionId) != 0) {
- logger.warn("fetch response offset:" + offset.get() + " errorCode:" + fetchResponse.errorCode(topic, partitionId));
+ logger.warn("fetch response offset:" + offset + " errorCode:" + fetchResponse.errorCode(topic, partitionId));
continue;
}
for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(topic, partitionId)) {
@@ -117,7 +112,7 @@ public abstract class KafkaConsumer implements Runnable {
logger.error("error put streamQueue", e);
break;
}
- offset.incrementAndGet();
+ offset++;
}
}
} catch (Exception e) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
new file mode 100644
index 0000000..4528a72
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -0,0 +1,102 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming;
+
+import com.google.common.base.Preconditions;
+import kafka.api.OffsetRequest;
+import kafka.cluster.Broker;
+import kafka.javaapi.PartitionMetadata;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIDescManager;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.model.IIDesc;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingBootstrap {
+
+ private static KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
+ private static StreamManager streamManager = StreamManager.getInstance(kylinConfig);
+ private static IIManager iiManager = IIManager.getInstance(kylinConfig);
+ private static IIDescManager iiDescManager = IIDescManager.getInstance(kylinConfig);
+
+
+ private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
+ final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
+ if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
+ return partitionMetadata.leader();
+ } else {
+ return null;
+ }
+ }
+
+ public static void startStreaming(String streamingConf, int partitionId) throws Exception {
+ final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
+ Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+ final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
+ Preconditions.checkNotNull(ii);
+
+ final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
+ Preconditions.checkState(leadBroker != null, "cannot find lead broker");
+ final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
+ long streamOffset = ii.getStreamOffsets().get(partitionId);
+ if (streamOffset < earliestOffset) {
+ streamOffset = earliestOffset;
+ }
+
+
+ KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
+ @Override
+ protected void consume(long offset, ByteBuffer payload) throws Exception {
+ byte[] bytes = new byte[payload.limit()];
+ payload.get(bytes);
+ getStreamQueue().put(new Stream(offset, bytes));
+ }
+ };
+ final IIDesc desc = ii.getDescriptor();
+ Executors.newSingleThreadExecutor().execute(consumer);
+ while (true) {
+ final Stream stream = consumer.getStreamQueue().poll();
+ if (stream != null) {
+ System.out.println("offset:" + stream.getOffset() + " content:" + new String(stream.getRawData()));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
new file mode 100644
index 0000000..70290f1
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
@@ -0,0 +1,70 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingCLI {
+
+ private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
+
+ public static void main(String[] args) {
+ try {
+ if (args.length < 2) {
+ printArgsError(args);
+ return;
+ }
+ if (args[0].equals("start")) {
+ String kafkaConfName = args[1];
+ StreamingBootstrap.startStreaming(kafkaConfName, 0);
+ } else if (args.equals("stop")) {
+
+ } else {
+ printArgsError(args);
+ }
+ } catch (Exception e) {
+ }
+ }
+
+ private static void printArgsError(String[] args) {
+ logger.warn("invalid args:" + StringUtils.join(args, " "));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java b/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
index c824c48..337dfc7 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
@@ -90,7 +90,7 @@ public class KafkaConsumerTest extends KafkaBaseTest {
final ExecutorService executorService = Executors.newFixedThreadPool(kafkaTopicMeta.getPartitionIds().size());
List<BlockingQueue<Stream>> queues = Lists.newArrayList();
for (Integer partitionId : kafkaTopicMeta.getPartitionIds()) {
- KafkaConsumer consumer = new KafkaConsumer(kafkaTopicMeta.getName(), partitionId, kafkaConfig.getBrokers(), kafkaConfig) {
+ KafkaConsumer consumer = new KafkaConsumer(kafkaTopicMeta.getName(), partitionId, 0, kafkaConfig.getBrokers(), kafkaConfig) {
@Override
protected void consume(long offset, ByteBuffer payload) throws Exception {
//TODO use ByteBuffer maybe
[50/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Conflicts:
invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/1ad30104
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/1ad30104
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/1ad30104
Branch: refs/heads/streaming-localdict
Commit: 1ad301044e7ae2de2f05d55b98f54e7006de71f1
Parents: 48a7971 c043b85
Author: liyang@apache.org <ya...@D-SHC-00801746.corp.ebay.com>
Authored: Fri Mar 27 14:07:40 2015 +0100
Committer: liyang@apache.org <ya...@D-SHC-00801746.corp.ebay.com>
Committed: Fri Mar 27 14:07:40 2015 +0100
----------------------------------------------------------------------
bin/kylin.sh | 26 ++
.../apache/kylin/common/util/FIFOIterable.java | 20 ++
.../apache/kylin/common/util/FIFOIterator.java | 34 +++
.../org/apache/kylin/common/util/BasicTest.java | 12 +-
.../test_kylin_cube_with_slr_desc.json | 2 +-
.../invertedindex/index/RawTableRecord.java | 2 +
.../invertedindex/index/TableRecordInfo.java | 10 +-
.../kylin/invertedindex/model/IIDesc.java | 1 +
.../model/IIKeyValueCodecWithState.java | 24 +-
.../apache/kylin/invertedindex/model/IIRow.java | 10 +
.../apache/kylin/job/cube/CubingJobBuilder.java | 2 -
.../kylin/job/hadoop/cube/BaseCuboidJob.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 242 -------------------
.../job/hadoop/cube/BaseCuboidMapperBase.java | 205 ++++++++++++++++
.../cube/FactDistinctHiveColumnsMapper.java | 16 +-
.../cube/FactDistinctIIColumnsMapper.java | 28 +--
.../job/hadoop/cube/HiveToBaseCuboidMapper.java | 49 ++++
.../job/hadoop/cube/IIToBaseCuboidMapper.java | 109 +++++++++
.../kylin/job/hadoop/cubev2/InMemCuboidJob.java | 5 -
.../kylin/job/streaming/StreamingBootstrap.java | 45 +++-
.../kylin/job/streaming/StreamingCLI.java | 6 +-
.../kylin/job/BuildCubeWithEngineTest.java | 8 +-
.../apache/kylin/job/BuildIIWithStreamTest.java | 30 +--
.../apache/kylin/job/IIStreamBuilderTest.java | 48 +++-
.../cube/BaseCuboidMapperPerformanceTest.java | 65 -----
.../job/hadoop/cube/BaseCuboidMapperTest.java | 145 -----------
.../HiveToBaseCuboidMapperPerformanceTest.java | 65 +++++
.../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
.../job/hadoop/invertedindex/II2CubeTest.java | 138 +++++++++++
streaming/pom.xml | 7 +
.../kylin/streaming/JsonStreamParser.java | 12 +-
.../apache/kylin/streaming/KafkaConsumer.java | 12 +-
.../java/org/apache/kylin/streaming/Stream.java | 2 +
.../apache/kylin/streaming/StreamParser.java | 4 +-
.../kylin/streaming/StringStreamParser.java | 2 +-
.../kylin/streaming/cube/CubeStreamBuilder.java | 22 +-
.../invertedindex/IIStreamBuilder.java | 107 +++-----
.../streaming/invertedindex/SliceBuilder.java | 126 ++++++++++
.../invertedindex/PrintOutStreamBuilder.java | 5 +-
39 files changed, 1136 insertions(+), 657 deletions(-)
----------------------------------------------------------------------
[27/50] incubator-kylin git commit: fix compile with level 1.6
Posted by li...@apache.org.
fix compile with level 1.6
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/bbbcae8f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/bbbcae8f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/bbbcae8f
Branch: refs/heads/streaming-localdict
Commit: bbbcae8f6730540e615764533c32a7c12693d5a0
Parents: 71bbd0c
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 14:02:44 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 14:02:44 2015 +0800
----------------------------------------------------------------------
.../kylin/streaming/cube/CubeStreamBuilder.java | 4 +-
.../kylin/streaming/EternalStreamProducer.java | 2 +-
.../Nous/NousEternalStreamProducer.java | 46 --------
.../kylin/streaming/Nous/NousMessage.java | 118 -------------------
.../kylin/streaming/Nous/NousMessageTest.java | 31 -----
.../kylin/streaming/OneOffStreamProducer.java | 5 +-
.../nous/NousEternalStreamProducer.java | 46 ++++++++
.../kylin/streaming/nous/NousMessage.java | 118 +++++++++++++++++++
.../kylin/streaming/nous/NousMessageTest.java | 31 +++++
9 files changed, 202 insertions(+), 199 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 912c3cd..9554797 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -178,7 +178,7 @@ public class CubeStreamBuilder extends StreamBuilder {
}
private void outputGT(GridTable gridTable) throws IOException {
- GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, null, null);
+ GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, null);
IGTScanner scanner = gridTable.scan(req);
for (GTRecord record : scanner) {
logger.debug(record.toString());
@@ -248,7 +248,7 @@ public class CubeStreamBuilder extends StreamBuilder {
}
private GridTable scanAndAggregateGridTable(GridTable gridTable, long cuboidId, BitSet aggregationColumns, BitSet measureColumns) throws IOException {
- GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, aggregationColumns, measureColumns, metricsAggrFuncs, null);
+ GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, aggregationColumns, measureColumns, metricsAggrFuncs, null);
IGTScanner scanner = gridTable.scan(req);
GridTable newGridTable = newGridTableByCuboidID(cuboidId);
GTBuilder builder = newGridTable.rebuild();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
index 7406c4c..07660d3 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
@@ -67,7 +67,7 @@ public class EternalStreamProducer {
scheduledExecutorService.scheduleAtFixedRate(new Thread(new Runnable() {
@Override
public void run() {
- final KeyedMessage<String, String> message = new KeyedMessage<>(kafkaConfig.getTopic(), getOneMessage());
+ final KeyedMessage<String, String> message = new KeyedMessage<String, String>(kafkaConfig.getTopic(), getOneMessage());
producer.send(message);
try {
Thread.sleep(100);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
deleted file mode 100644
index a93128f..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import java.util.Calendar;
-import java.util.Random;
-
-import org.apache.commons.lang3.RandomStringUtils;
-import org.apache.kylin.common.util.JsonUtil;
-import org.apache.kylin.streaming.EternalStreamProducer;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- */
-public class NousEternalStreamProducer extends EternalStreamProducer {
-
- /**
- * @param frequency records added per second, 100 for recommendation
- */
- public NousEternalStreamProducer(int frequency) {
- super(frequency);
- }
-
- @Override
- protected String getOneMessage() {
-
- Calendar currentTime = Calendar.getInstance();
- Calendar minuteStart = Calendar.getInstance();
- Calendar hourStart = Calendar.getInstance();
-
- currentTime.setTimeInMillis(System.currentTimeMillis());
- minuteStart.clear();
- hourStart.clear();
-
- minuteStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), currentTime.get(Calendar.MINUTE));
- hourStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), 0);
-
- Random r = new Random();
- NousMessage temp = new NousMessage(minuteStart.getTimeInMillis(), hourStart.getTimeInMillis(), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), r.nextInt(5), r.nextDouble() * 100, r.nextInt(2));
- try {
- return JsonUtil.writeValueAsIndentString(temp);
- } catch (JsonProcessingException e) {
- return "";
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
deleted file mode 100644
index 3606514..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- *
- * The kafka message format for Nous
- */
-@JsonAutoDetect
-public class NousMessage {
- private long minute_start;
- private long hour_start;
- private String itm;
- private String t;
- private String sid;
- private String p;
- private String m;
- private long click;
- private double gmv;
- private long qty;
-
- public NousMessage() {
- }
-
- public NousMessage(long minute_start, long hour_start, String itm, String t, String sid, String p, String m, long click, double gmv, long qty) {
- this.minute_start = minute_start;
- this.hour_start = hour_start;
- this.itm = itm;
- this.t = t;
- this.sid = sid;
- this.p = p;
- this.m = m;
- this.click = click;
- this.gmv = gmv;
- this.qty = qty;
- }
-
- public long getMinute_start() {
- return minute_start;
- }
-
- public void setMinute_start(long minute_start) {
- this.minute_start = minute_start;
- }
-
- public long getHour_start() {
- return hour_start;
- }
-
- public void setHour_start(long hour_start) {
- this.hour_start = hour_start;
- }
-
- public String getItm() {
- return itm;
- }
-
- public void setItm(String itm) {
- this.itm = itm;
- }
-
- public String getT() {
- return t;
- }
-
- public void setT(String t) {
- this.t = t;
- }
-
- public String getSid() {
- return sid;
- }
-
- public void setSid(String sid) {
- this.sid = sid;
- }
-
- public String getP() {
- return p;
- }
-
- public void setP(String p) {
- this.p = p;
- }
-
- public String getM() {
- return m;
- }
-
- public void setM(String m) {
- this.m = m;
- }
-
- public long getClick() {
- return click;
- }
-
- public void setClick(long click) {
- this.click = click;
- }
-
- public double getGmv() {
- return gmv;
- }
-
- public void setGmv(double gmv) {
- this.gmv = gmv;
- }
-
- public long getQty() {
- return qty;
- }
-
- public void setQty(long qty) {
- this.qty = qty;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
deleted file mode 100644
index 6cfc8f5..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import java.io.IOException;
-
-import org.apache.kylin.common.util.JsonUtil;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- */
-public class NousMessageTest {
- @Test
- public void testJson() throws IOException {
- NousMessage a = new NousMessage(1, 2, "a", "b", "c", "d", "e", 100, 200.0, 300);
- String x = JsonUtil.writeValueAsIndentString(a);
- NousMessage b = JsonUtil.readValue(x, NousMessage.class);
- assertEquals(100, b.getClick());
- }
-
- @Ignore("disable this producer since it will make number of messages in a topic agnostic ")
- @Test
- public void testProducer() throws IOException, InterruptedException {
- NousEternalStreamProducer p = new NousEternalStreamProducer(10);
- p.start();
- Thread.sleep(5000);
- p.stop();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
index fbcf0a5..1f45cdb 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
@@ -36,16 +36,19 @@ package org.apache.kylin.streaming;
import com.google.common.base.Function;
import com.google.common.collect.Iterators;
+
import kafka.cluster.Broker;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
+
import java.io.IOException;
import java.util.Properties;
@@ -87,7 +90,7 @@ public class OneOffStreamProducer {
public void run() {
int count = 0;
while (!stopped && count < sendCount) {
- final KeyedMessage<String, String> message = new KeyedMessage<>(kafkaConfig.getTopic(), "current time is:" + System.currentTimeMillis());
+ final KeyedMessage<String, String> message = new KeyedMessage<String, String>(kafkaConfig.getTopic(), "current time is:" + System.currentTimeMillis());
producer.send(message);
count++;
try {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
new file mode 100644
index 0000000..a93128f
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
@@ -0,0 +1,46 @@
+package org.apache.kylin.streaming.nous;
+
+import java.util.Calendar;
+import java.util.Random;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.streaming.EternalStreamProducer;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ */
+public class NousEternalStreamProducer extends EternalStreamProducer {
+
+ /**
+ * @param frequency records added per second, 100 for recommendation
+ */
+ public NousEternalStreamProducer(int frequency) {
+ super(frequency);
+ }
+
+ @Override
+ protected String getOneMessage() {
+
+ Calendar currentTime = Calendar.getInstance();
+ Calendar minuteStart = Calendar.getInstance();
+ Calendar hourStart = Calendar.getInstance();
+
+ currentTime.setTimeInMillis(System.currentTimeMillis());
+ minuteStart.clear();
+ hourStart.clear();
+
+ minuteStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), currentTime.get(Calendar.MINUTE));
+ hourStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), 0);
+
+ Random r = new Random();
+ NousMessage temp = new NousMessage(minuteStart.getTimeInMillis(), hourStart.getTimeInMillis(), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), r.nextInt(5), r.nextDouble() * 100, r.nextInt(2));
+ try {
+ return JsonUtil.writeValueAsIndentString(temp);
+ } catch (JsonProcessingException e) {
+ return "";
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
new file mode 100644
index 0000000..3606514
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
@@ -0,0 +1,118 @@
+package org.apache.kylin.streaming.nous;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ *
+ * The kafka message format for Nous
+ */
+@JsonAutoDetect
+public class NousMessage {
+ private long minute_start;
+ private long hour_start;
+ private String itm;
+ private String t;
+ private String sid;
+ private String p;
+ private String m;
+ private long click;
+ private double gmv;
+ private long qty;
+
+ public NousMessage() {
+ }
+
+ public NousMessage(long minute_start, long hour_start, String itm, String t, String sid, String p, String m, long click, double gmv, long qty) {
+ this.minute_start = minute_start;
+ this.hour_start = hour_start;
+ this.itm = itm;
+ this.t = t;
+ this.sid = sid;
+ this.p = p;
+ this.m = m;
+ this.click = click;
+ this.gmv = gmv;
+ this.qty = qty;
+ }
+
+ public long getMinute_start() {
+ return minute_start;
+ }
+
+ public void setMinute_start(long minute_start) {
+ this.minute_start = minute_start;
+ }
+
+ public long getHour_start() {
+ return hour_start;
+ }
+
+ public void setHour_start(long hour_start) {
+ this.hour_start = hour_start;
+ }
+
+ public String getItm() {
+ return itm;
+ }
+
+ public void setItm(String itm) {
+ this.itm = itm;
+ }
+
+ public String getT() {
+ return t;
+ }
+
+ public void setT(String t) {
+ this.t = t;
+ }
+
+ public String getSid() {
+ return sid;
+ }
+
+ public void setSid(String sid) {
+ this.sid = sid;
+ }
+
+ public String getP() {
+ return p;
+ }
+
+ public void setP(String p) {
+ this.p = p;
+ }
+
+ public String getM() {
+ return m;
+ }
+
+ public void setM(String m) {
+ this.m = m;
+ }
+
+ public long getClick() {
+ return click;
+ }
+
+ public void setClick(long click) {
+ this.click = click;
+ }
+
+ public double getGmv() {
+ return gmv;
+ }
+
+ public void setGmv(double gmv) {
+ this.gmv = gmv;
+ }
+
+ public long getQty() {
+ return qty;
+ }
+
+ public void setQty(long qty) {
+ this.qty = qty;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
new file mode 100644
index 0000000..6cfc8f5
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
@@ -0,0 +1,31 @@
+package org.apache.kylin.streaming.nous;
+
+import java.io.IOException;
+
+import org.apache.kylin.common.util.JsonUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ */
+public class NousMessageTest {
+ @Test
+ public void testJson() throws IOException {
+ NousMessage a = new NousMessage(1, 2, "a", "b", "c", "d", "e", 100, 200.0, 300);
+ String x = JsonUtil.writeValueAsIndentString(a);
+ NousMessage b = JsonUtil.readValue(x, NousMessage.class);
+ assertEquals(100, b.getClick());
+ }
+
+ @Ignore("disable this producer since it will make number of messages in a topic agnostic ")
+ @Test
+ public void testProducer() throws IOException, InterruptedException {
+ NousEternalStreamProducer p = new NousEternalStreamProducer(10);
+ p.start();
+ Thread.sleep(5000);
+ p.stop();
+ }
+}
[47/50] incubator-kylin git commit: KYLIN-625, filter convert pass
Posted by li...@apache.org.
KYLIN-625, filter convert pass
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d7fc2312
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d7fc2312
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d7fc2312
Branch: refs/heads/streaming-localdict
Commit: d7fc2312c4c800cb2fec5264b6997feda6527521
Parents: 24acccc
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 18:26:39 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 18:26:39 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/ByteArray.java | 9 +-
.../metadata/filter/ColumnTupleFilter.java | 6 +-
.../metadata/filter/CompareTupleFilter.java | 2 +-
.../metadata/filter/ConstantTupleFilter.java | 4 +-
.../metadata/filter/LogicalTupleFilter.java | 2 +-
.../metadata/serializer/DataTypeSerializer.java | 3 +-
.../gridtable/GTDictionaryCodeSystem.java | 47 +++-
.../kylin/storage/gridtable/GTRecord.java | 29 ++-
.../storage/gridtable/GTSampleCodeSystem.java | 5 +-
.../kylin/storage/gridtable/GTScanRange.java | 11 +-
.../kylin/storage/gridtable/GTScanRequest.java | 8 +-
.../apache/kylin/storage/gridtable/GTUtil.java | 35 ++-
.../storage/gridtable/DictGridTableTest.java | 214 +++++++++++++++++++
.../storage/gridtable/GTInvertedIndexTest.java | 165 --------------
.../kylin/storage/gridtable/GridTableTest.java | 208 ------------------
.../storage/gridtable/SimpleGridTableTest.java | 208 ++++++++++++++++++
.../gridtable/SimpleInvertedIndexTest.java | 165 ++++++++++++++
17 files changed, 690 insertions(+), 431 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
index d09b350..8c6ae91 100644
--- a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
+++ b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
@@ -30,7 +30,7 @@ public class ByteArray implements Comparable<ByteArray> {
public static ByteArray allocate(int length) {
return new ByteArray(new byte[length]);
}
-
+
public static ByteArray copyOf(byte[] array, int offset, int length) {
byte[] space = new byte[length];
System.arraycopy(array, offset, space, 0, length);
@@ -52,7 +52,7 @@ public class ByteArray implements Comparable<ByteArray> {
}
public ByteArray(byte[] data) {
- set(data, 0, data.length);
+ set(data, 0, data == null ? 0 : data.length);
}
public ByteArray(byte[] data, int offset, int length) {
@@ -148,7 +148,10 @@ public class ByteArray implements Comparable<ByteArray> {
@Override
public String toString() {
- return Bytes.toString(data, offset, length);
+ if (data == null)
+ return null;
+ else
+ return Bytes.toStringBinary(data, offset, length);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
index f689ccb..fde41b1 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
@@ -63,7 +63,7 @@ public class ColumnTupleFilter extends TupleFilter {
@Override
public String toString() {
- return "ColumnFilter [column=" + columnRef + "]";
+ return "" + columnRef;
}
@Override
@@ -79,7 +79,7 @@ public class ColumnTupleFilter extends TupleFilter {
@Override
public Collection<?> getValues() {
- this.values.set(0, (String) this.tupleValue);
+ this.values.set(0, this.tupleValue);
return this.values;
}
@@ -114,7 +114,7 @@ public class ColumnTupleFilter extends TupleFilter {
table = new TableDesc();
table.setName(tableName);
}
-
+
column.setId(BytesUtil.readUTFString(buffer));
column.setName(BytesUtil.readUTFString(buffer));
column.setDatatype(BytesUtil.readUTFString(buffer));
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
index 2b68469..57b50b7 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
@@ -126,7 +126,7 @@ public class CompareTupleFilter extends TupleFilter {
@Override
public String toString() {
- return "CompareFilter [" + column + " " + operator + " " + conditionValues + ", children=" + children + "]";
+ return column + " " + operator + " " + conditionValues;
}
// TODO requires generalize, currently only evaluates COLUMN {op} CONST
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
index f372b4a..cc3add2 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
@@ -34,7 +34,7 @@ import org.apache.kylin.metadata.tuple.IEvaluatableTuple;
public class ConstantTupleFilter extends TupleFilter {
public static final ConstantTupleFilter FALSE = new ConstantTupleFilter();
- public static final ConstantTupleFilter TRUE = new ConstantTupleFilter("TRUE");
+ public static final ConstantTupleFilter TRUE = new ConstantTupleFilter((Object) null); // not sure of underlying code system, null is the only value that applies to all types
private Collection<Object> constantValues;
@@ -60,7 +60,7 @@ public class ConstantTupleFilter extends TupleFilter {
@Override
public String toString() {
- return "ConstantFilter [constant=" + constantValues + "]";
+ return "" + constantValues;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
index 1844392..4d38565 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
@@ -67,7 +67,7 @@ public class LogicalTupleFilter extends TupleFilter {
@Override
public String toString() {
- return "LogicalFilter [operator=" + operator + ", children=" + children + "]";
+ return operator + " " + children;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
index 63d4ddd..aafb1c2 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
@@ -70,9 +70,10 @@ abstract public class DataTypeSerializer<T> implements BytesSerializer<T> {
/** peek into buffer and return the length of serialization */
abstract public int peekLength(ByteBuffer in);
- /** convert from String to obj */
+ /** convert from String to obj (string often come as byte[] in mapred) */
abstract public T valueOf(byte[] value);
+ /** convert from String to obj */
public T valueOf(String value) {
try {
return valueOf(value.getBytes("UTF-8"));
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index ada4ed7..c94c604 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -2,10 +2,13 @@ package org.apache.kylin.storage.gridtable;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.metadata.filter.IFilterCodeSystem;
import org.apache.kylin.metadata.measure.MeasureAggregator;
import org.apache.kylin.metadata.serializer.DataTypeSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.nio.ByteBuffer;
import java.util.Map;
@@ -15,13 +18,15 @@ import java.util.Map;
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public class GTDictionaryCodeSystem implements IGTCodeSystem {
+ private static final Logger logger = LoggerFactory.getLogger(CubeManager.class);
+
private GTInfo info;
- private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
+ private Map<Integer, Dictionary> dictionaryMap = null; // key: column index; value: dictionary for this column;
private IFilterCodeSystem<ByteArray> filterCS;
private DataTypeSerializer[] serializers;
- public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
- this.dictionaryMaps = dictionaryMaps;
+ public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMap) {
+ this.dictionaryMap = dictionaryMap;
}
@Override
@@ -30,8 +35,8 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
serializers = new DataTypeSerializer[info.nColumns];
for (int i = 0; i < info.nColumns; i++) {
- if (dictionaryMaps.get(i) != null) {
- serializers[i] = new DictionarySerializer(dictionaryMaps.get(i));
+ if (dictionaryMap.get(i) != null) {
+ serializers[i] = new DictionarySerializer(dictionaryMap.get(i));
} else {
serializers[i] = DataTypeSerializer.create(info.colTypes[i]);
}
@@ -56,7 +61,10 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
@Override
public void serialize(ByteArray code, ByteBuffer buffer) {
- BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
+ if (code == null)
+ BytesUtil.writeByteArray(null, 0, 0, buffer);
+ else
+ BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
}
@Override
@@ -78,16 +86,33 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
@Override
public void encodeColumnValue(int col, Object value, ByteBuffer buf) {
- serializers[col].serialize(value, buf);
+ encodeColumnValue(col, value, 0, buf);
}
@Override
public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
+ // this is a bit too complicated, but encoding only happens once at build time, so it is OK
DataTypeSerializer serializer = serializers[col];
- if (serializer instanceof DictionarySerializer) {
- ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf);
- } else {
- serializer.serialize(value, buf);
+ try {
+ if (serializer instanceof DictionarySerializer) {
+ ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf);
+ } else {
+ serializer.serialize(value, buf);
+ }
+ } catch (ClassCastException ex) {
+ // try convert string into a correct object type
+ try {
+ if (value instanceof String) {
+ Object converted = serializer.valueOf((String) value);
+ if ((converted instanceof String) == false) {
+ encodeColumnValue(col, converted, roundingFlag, buf);
+ return;
+ }
+ }
+ } catch (Throwable e) {
+ logger.error("Fail to encode value '" + value + "'", e);
+ }
+ throw ex;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
index 605a469..aeefc2b 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
@@ -21,7 +21,7 @@ public class GTRecord implements Comparable<GTRecord> {
this.cols[i] = new ByteArray();
this.maskForEqualHashComp = info.colAll;
}
-
+
public ByteArray get(int i) {
return cols[i];
}
@@ -51,16 +51,17 @@ public class GTRecord implements Comparable<GTRecord> {
/** decode and return the values of this record */
public Object[] getValues() {
- return getValues(new Object[info.nColumns]);
+ return getValues(info.colAll, new Object[info.nColumns]);
}
/** decode and return the values of this record */
- public Object[] getValues(Object[] result) {
- for (int i = 0; i < info.nColumns; i++) {
- if (cols[i].array() == null)
+ public Object[] getValues(BitSet selectedColumns, Object[] result) {
+ assert selectedColumns.cardinality() <= result.length;
+ for (int i = 0, c = selectedColumns.nextSetBit(0); c >= 0; i++, c = selectedColumns.nextSetBit(c + 1)) {
+ if (cols[c].array() == null)
result[i] = null;
else
- result[i] = info.codeSystem.decodeColumnValue(i, cols[i].asBuffer());
+ result[i] = info.codeSystem.decodeColumnValue(c, cols[c].asBuffer());
}
return result;
}
@@ -92,11 +93,11 @@ public class GTRecord implements Comparable<GTRecord> {
public BitSet maskForEqualHashComp() {
return maskForEqualHashComp;
}
-
+
public void maskForEqualHashComp(BitSet set) {
this.maskForEqualHashComp = set;
}
-
+
@Override
public boolean equals(Object obj) {
if (this == obj)
@@ -132,7 +133,7 @@ public class GTRecord implements Comparable<GTRecord> {
assert this.info == o.info;
assert this.maskForEqualHashComp == o.maskForEqualHashComp; // reference equal for performance
IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
-
+
int comp = 0;
for (int i = maskForEqualHashComp.nextSetBit(0); i >= 0; i = maskForEqualHashComp.nextSetBit(i + 1)) {
comp = cs.compare(cols[i], o.cols[i]);
@@ -141,10 +142,16 @@ public class GTRecord implements Comparable<GTRecord> {
}
return comp;
}
-
+
@Override
public String toString() {
- return Arrays.toString(getValues());
+ return toString(info.colAll);
+ }
+
+ public String toString(BitSet selectedColumns) {
+ Object[] values = new Object[selectedColumns.cardinality()];
+ getValues(selectedColumns, values);
+ return Arrays.toString(values);
}
// ============================================================================
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
index 083d8c2..aea4e49 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
@@ -54,7 +54,10 @@ public class GTSampleCodeSystem implements IGTCodeSystem {
@Override
public void serialize(ByteArray code, ByteBuffer buffer) {
- BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
+ if (code == null)
+ BytesUtil.writeByteArray(null, 0, 0, buffer);
+ else
+ BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
index 08513f7..b09a01d 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
@@ -14,12 +14,9 @@ public class GTScanRange {
}
public GTScanRange(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> hbaseFuzzyKeys) {
- assert pkStart.info == pkEnd.info;
- assert pkStart.maskForEqualHashComp() == pkStart.info.primaryKey;
- assert pkEnd.maskForEqualHashComp() == pkEnd.info.primaryKey;
this.pkStart = pkStart;
this.pkEnd = pkEnd;
- this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord>emptyList() : hbaseFuzzyKeys;
+ this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord> emptyList() : hbaseFuzzyKeys;
}
@Override
@@ -58,4 +55,10 @@ public class GTScanRange {
return false;
return true;
}
+
+ @Override
+ public String toString() {
+ return (pkStart == null ? "null" : pkStart.toString(pkStart.info.primaryKey)) //
+ + "-" + (pkEnd == null ? "null" : pkEnd.toString(pkEnd.info.primaryKey));
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
index c92cba4..b71032c 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
@@ -1,5 +1,6 @@
package org.apache.kylin.storage.gridtable;
+import java.util.Arrays;
import java.util.BitSet;
import java.util.Set;
@@ -87,7 +88,7 @@ public class GTScanRequest {
// un-evaluatable filter must be removed
if (TupleFilter.isEvaluableRecursively(filterPushDown) == false) {
Set<TblColRef> unevaluableColumns = Sets.newHashSet();
- filterPushDown = GTUtil.convertFilterUnevaluatable(filterPushDown, unevaluableColumns);
+ filterPushDown = GTUtil.convertFilterUnevaluatable(filterPushDown, info, unevaluableColumns);
// columns in un-evaluatable filter must be returned without loss so upper layer can do final evaluation
if (hasAggregation()) {
@@ -138,4 +139,9 @@ public class GTScanRequest {
return aggrMetricsFuncs;
}
+ @Override
+ public String toString() {
+ return "GTScanRequest [range=" + range + ", columns=" + columns + ", filterPushDown=" + filterPushDown + ", aggrGroupBy=" + aggrGroupBy + ", aggrMetrics=" + aggrMetrics + ", aggrMetricsFuncs=" + Arrays.toString(aggrMetricsFuncs) + "]";
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index 1fb0376..7d042eb 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -34,18 +34,18 @@ public class GTUtil {
return new TblColRef(desc);
}
- public static TupleFilter convertFilterUnevaluatable(TupleFilter rootFilter, //
- final Set<TblColRef> unevaluatableColumnCollector) {
- return convertFilter(rootFilter, null, null, false, unevaluatableColumnCollector);
+ public static TupleFilter convertFilterUnevaluatable(TupleFilter rootFilter, GTInfo info, //
+ Set<TblColRef> unevaluatableColumnCollector) {
+ return convertFilter(rootFilter, info, null, false, unevaluatableColumnCollector);
}
- public static TupleFilter convertFilterConstants(TupleFilter rootFilter, final GTInfo info) {
+ public static TupleFilter convertFilterConstants(TupleFilter rootFilter, GTInfo info) {
return convertFilter(rootFilter, info, null, true, null);
}
- public static TupleFilter convertFilterColumnsAndConstants(TupleFilter rootFilter, final GTInfo info, //
- final Map<TblColRef, Integer> colMapping, //
- final Set<TblColRef> unevaluatableColumnCollector) {
+ public static TupleFilter convertFilterColumnsAndConstants(TupleFilter rootFilter, GTInfo info, //
+ Map<TblColRef, Integer> colMapping, //
+ Set<TblColRef> unevaluatableColumnCollector) {
return convertFilter(rootFilter, info, colMapping, true, unevaluatableColumnCollector);
}
@@ -68,6 +68,12 @@ public class GTUtil {
return ConstantTupleFilter.TRUE;
}
+ // shortcut for unEvaluatable filter
+ if (filter.isEvaluable() == false) {
+ TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
+ return ConstantTupleFilter.TRUE;
+ }
+
// map to column onto grid table
if (colMapping != null && filter instanceof ColumnTupleFilter) {
ColumnTupleFilter colFilter = (ColumnTupleFilter) filter;
@@ -75,18 +81,9 @@ public class GTUtil {
return new ColumnTupleFilter(info.colRef(gtColIdx));
}
- // below consider compare filter only
- if (filter instanceof CompareTupleFilter) {
-
- // shortcut for unEvaluatable compare filter
- if (TupleFilter.isEvaluableRecursively(filter) == false) {
- TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
- return ConstantTupleFilter.TRUE;
- }
-
- if (encodeConstants) {
- return encodeConstants((CompareTupleFilter) filter);
- }
+ // encode constants
+ if (encodeConstants && filter instanceof CompareTupleFilter) {
+ return encodeConstants((CompareTupleFilter) filter);
}
return filter;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
new file mode 100644
index 0000000..46ec66c
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
@@ -0,0 +1,214 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.BitSet;
+import java.util.Map;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.NumberDictionaryBuilder;
+import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.dict.TrieDictionaryBuilder;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.ExtractTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.storage.gridtable.GTInfo.Builder;
+import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+public class DictGridTableTest {
+
+ @Test
+ public void test() throws IOException {
+ GridTable table = newTestTable();
+ verifyFirstRow(table);
+ verifyScanWithUnevaluatableFilter(table);
+ verifyScanWithEvaluatableFilter(table);
+ }
+
+ private void verifyFirstRow(GridTable table) throws IOException {
+ doScanAndVerify(table, new GTScanRequest(table.getInfo()), "[1421193600000, 30, Yang, 10, 10.5]");
+ }
+
+ private void verifyScanWithUnevaluatableFilter(GridTable table) throws IOException {
+ GTInfo info = table.getInfo();
+
+ CompareTupleFilter fcomp = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+ ExtractTupleFilter funevaluatable = unevaluatable(info.colRef(1));
+ LogicalTupleFilter filter = and(fcomp, funevaluatable);
+
+ GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+ // note the unEvaluatable column 1 in filter is added to group by
+ assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+
+ doScanAndVerify(table, req, "[1421280000000, 20, null, 20, null]");
+ }
+
+ private void verifyScanWithEvaluatableFilter(GridTable table) throws IOException {
+ GTInfo info = table.getInfo();
+
+ CompareTupleFilter fcomp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+ CompareTupleFilter fcomp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10"));
+ LogicalTupleFilter filter = and(fcomp1, fcomp2);
+
+ GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+ // note the evaluatable column 1 in filter is added to returned columns but not in group by
+ assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.2 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+
+ doScanAndVerify(table, req, "[1421280000000, 30, null, 30, null]", "[1421366400000, 20, null, 40, null]");
+ }
+
+ private void doScanAndVerify(GridTable table, GTScanRequest req, String... verifyRows) throws IOException {
+ System.out.println(req);
+ IGTScanner scanner = table.scan(req);
+ int i = 0;
+ for (GTRecord r : scanner) {
+ System.out.println(r);
+ if (verifyRows != null && i < verifyRows.length) {
+ assertEquals(verifyRows[i], r.toString());
+ }
+ i++;
+ }
+ scanner.close();
+ }
+
+ private Object enc(GTInfo info, int col, String value) {
+ ByteBuffer buf = ByteBuffer.allocate(info.maxRecordLength);
+ info.codeSystem.encodeColumnValue(col, value, buf);
+ return ByteArray.copyOf(buf.array(), buf.arrayOffset(), buf.position());
+ }
+
+ private ExtractTupleFilter unevaluatable(TblColRef col) {
+ ExtractTupleFilter r = new ExtractTupleFilter(FilterOperatorEnum.EXTRACT);
+ r.addChild(new ColumnTupleFilter(col));
+ return r;
+ }
+
+ private CompareTupleFilter compare(TblColRef col, FilterOperatorEnum op, Object value) {
+ CompareTupleFilter result = new CompareTupleFilter(op);
+ result.addChild(new ColumnTupleFilter(col));
+ result.addChild(new ConstantTupleFilter(value));
+ return result;
+ }
+
+ private LogicalTupleFilter and(TupleFilter... children) {
+ return logic(FilterOperatorEnum.AND, children);
+ }
+
+ private LogicalTupleFilter or(TupleFilter... children) {
+ return logic(FilterOperatorEnum.AND, children);
+ }
+
+ private LogicalTupleFilter not(TupleFilter child) {
+ return logic(FilterOperatorEnum.AND, child);
+ }
+
+ private LogicalTupleFilter logic(FilterOperatorEnum op, TupleFilter... children) {
+ LogicalTupleFilter result = new LogicalTupleFilter(op);
+ for (TupleFilter c : children) {
+ result.addChild(c);
+ }
+ return result;
+ }
+
+ static GridTable newTestTable() throws IOException {
+ GTInfo info = newInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ GTRecord r = new GTRecord(table.getInfo());
+ GTBuilder builder = table.rebuild();
+
+ builder.write(r.setValues("2015-01-14", "30", "Yang", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-14", "30", "Luke", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "30", "Xu", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "20", "Dong", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "20", "Jason", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "20", "Mahone", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "20", "Qianhao", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "30", "George", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-17", "10", "Kejia", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+
+ return table;
+ }
+
+ static GTInfo newInfo() {
+ Builder builder = GTInfo.builder();
+ builder.setCodeSystem(newDictCodeSystem());
+ builder.setColumns( //
+ DataType.getInstance("timestamp"), //
+ DataType.getInstance("integer"), //
+ DataType.getInstance("varchar"), //
+ DataType.getInstance("bigint"), //
+ DataType.getInstance("decimal") //
+ );
+ builder.setPrimaryKey(setOf(0));
+ builder.setColumnPreferIndex(setOf(0));
+ builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
+ builder.enableRowBlock(4);
+ GTInfo info = builder.build();
+ return info;
+ }
+
+ @SuppressWarnings("rawtypes")
+ private static GTDictionaryCodeSystem newDictCodeSystem() {
+ Map<Integer, Dictionary> dictionaryMap = Maps.newHashMap();
+ dictionaryMap.put(1, newDictionaryOfInteger());
+ dictionaryMap.put(2, newDictionaryOfString());
+ return new GTDictionaryCodeSystem(dictionaryMap);
+ }
+
+ @SuppressWarnings("rawtypes")
+ private static Dictionary newDictionaryOfString() {
+ TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter());
+ builder.addValue("Dong");
+ builder.addValue("George");
+ builder.addValue("Jason");
+ builder.addValue("Kejia");
+ builder.addValue("Luke");
+ builder.addValue("Mahone");
+ builder.addValue("Qianhao");
+ builder.addValue("Shaofeng");
+ builder.addValue("Xu");
+ builder.addValue("Yang");
+ return builder.build(0);
+ }
+
+ @SuppressWarnings("rawtypes")
+ private static Dictionary newDictionaryOfInteger() {
+ NumberDictionaryBuilder<String> builder = new NumberDictionaryBuilder<>(new StringBytesConverter());
+ builder.addValue("10");
+ builder.addValue("20");
+ builder.addValue("30");
+ builder.addValue("40");
+ builder.addValue("50");
+ builder.addValue("60");
+ builder.addValue("70");
+ builder.addValue("80");
+ builder.addValue("90");
+ builder.addValue("100");
+ return builder.build(0);
+ }
+
+ private static BitSet setOf(int... values) {
+ BitSet set = new BitSet();
+ for (int i : values)
+ set.set(i);
+ return set;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
deleted file mode 100644
index 1460039..0000000
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
+++ /dev/null
@@ -1,165 +0,0 @@
-package org.apache.kylin.storage.gridtable;
-
-import static org.junit.Assert.*;
-import it.uniroma3.mat.extendedset.intset.ConciseSet;
-
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.kylin.common.util.ByteArray;
-import org.apache.kylin.metadata.filter.ColumnTupleFilter;
-import org.apache.kylin.metadata.filter.CompareTupleFilter;
-import org.apache.kylin.metadata.filter.ConstantTupleFilter;
-import org.apache.kylin.metadata.filter.LogicalTupleFilter;
-import org.apache.kylin.metadata.filter.TupleFilter;
-import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
-import org.apache.kylin.metadata.model.TblColRef;
-import org.apache.kylin.metadata.serializer.StringSerializer;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class GTInvertedIndexTest {
-
- GTInfo info;
- GTInvertedIndex index;
- ArrayList<CompareTupleFilter> basicFilters = Lists.newArrayList();
- ArrayList<ConciseSet> basicResults = Lists.newArrayList();
-
- public GTInvertedIndexTest() {
-
- info = GridTableTest.advancedInfo();
- TblColRef colA = info.colRef(0);
-
- // block i contains value "i", the last is NULL
- index = new GTInvertedIndex(info);
- GTRowBlock mockBlock = GTRowBlock.allocate(info);
- GTRowBlock.Writer writer = mockBlock.getWriter();
- GTRecord record = new GTRecord(info);
- for (int i = 0; i < 10; i++) {
- record.setValues(i < 9 ? "" + i : null, "", "", new LongWritable(0), new BigDecimal(0));
- for (int j = 0; j < info.getRowBlockSize(); j++) {
- writer.append(record);
- }
- writer.readyForFlush();
- index.add(mockBlock);
-
- writer.clearForNext();
- }
-
- basicFilters.add(compare(colA, FilterOperatorEnum.ISNULL));
- basicResults.add(set(9));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.ISNOTNULL));
- basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.EQ, 0));
- basicResults.add(set(0));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.NEQ, 0));
- basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.IN, 0, 5));
- basicResults.add(set(0, 5));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.NOTIN, 0, 5));
- basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.LT, 3));
- basicResults.add(set(0, 1, 2));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.LTE, 3));
- basicResults.add(set(0, 1, 2, 3));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.GT, 3));
- basicResults.add(set(4, 5, 6, 7, 8));
-
- basicFilters.add(compare(colA, FilterOperatorEnum.GTE, 3));
- basicResults.add(set(3, 4, 5, 6, 7, 8));
- }
-
- @Test
- public void testBasics() {
- for (int i = 0; i < basicFilters.size(); i++) {
- assertEquals(basicResults.get(i), index.filter(basicFilters.get(i)));
- }
- }
-
- @Test
- public void testLogicalAnd() {
- for (int i = 0; i < basicFilters.size(); i++) {
- for (int j = 0; j < basicFilters.size(); j++) {
- LogicalTupleFilter f = logical(FilterOperatorEnum.AND, basicFilters.get(i), basicFilters.get(j));
- ConciseSet r = basicResults.get(i).clone();
- r.retainAll(basicResults.get(j));
- assertEquals(r, index.filter(f));
- }
- }
- }
-
- @Test
- public void testLogicalOr() {
- for (int i = 0; i < basicFilters.size(); i++) {
- for (int j = 0; j < basicFilters.size(); j++) {
- LogicalTupleFilter f = logical(FilterOperatorEnum.OR, basicFilters.get(i), basicFilters.get(j));
- ConciseSet r = basicResults.get(i).clone();
- r.addAll(basicResults.get(j));
- assertEquals(r, index.filter(f));
- }
- }
- }
-
- @Test
- public void testNotEvaluable() {
- ConciseSet all = set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
-
- CompareTupleFilter notEvaluable = compare(info.colRef(1), FilterOperatorEnum.EQ, 0);
- assertEquals(all, index.filter(notEvaluable));
-
- LogicalTupleFilter or = logical(FilterOperatorEnum.OR, basicFilters.get(0), notEvaluable);
- assertEquals(all, index.filter(or));
-
- LogicalTupleFilter and = logical(FilterOperatorEnum.AND, basicFilters.get(0), notEvaluable);
- assertEquals(basicResults.get(0), index.filter(and));
- }
-
- public static CompareTupleFilter compare(TblColRef col, TupleFilter.FilterOperatorEnum op, int... ids) {
- CompareTupleFilter filter = new CompareTupleFilter(op);
- filter.addChild(columnFilter(col));
- for (int i : ids) {
- filter.addChild(constFilter(i));
- }
- return filter;
- }
-
- public static LogicalTupleFilter logical(TupleFilter.FilterOperatorEnum op, TupleFilter... filters) {
- LogicalTupleFilter filter = new LogicalTupleFilter(op);
- for (TupleFilter f : filters)
- filter.addChild(f);
- return filter;
- }
-
- public static ColumnTupleFilter columnFilter(TblColRef col) {
- return new ColumnTupleFilter(col);
- }
-
- public static ConstantTupleFilter constFilter(int id) {
- byte[] space = new byte[10];
- ByteBuffer buf = ByteBuffer.wrap(space);
- StringSerializer stringSerializer = new StringSerializer();
- stringSerializer.serialize("" + id, buf);
- ByteArray data = new ByteArray(buf.array(), buf.arrayOffset(), buf.position());
- return new ConstantTupleFilter(data);
- }
-
- public static ConciseSet set(int... ints) {
- ConciseSet set = new ConciseSet();
- for (int i : ints)
- set.add(i);
- return set;
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
deleted file mode 100644
index 6561c6e..0000000
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ /dev/null
@@ -1,208 +0,0 @@
-package org.apache.kylin.storage.gridtable;
-
-import static org.junit.Assert.*;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.util.BitSet;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.kylin.metadata.model.DataType;
-import org.apache.kylin.storage.gridtable.GTInfo.Builder;
-import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
-import org.junit.Test;
-
-public class GridTableTest {
-
- @Test
- public void testBasics() throws IOException {
- GTInfo info = basicInfo();
- GTSimpleMemStore store = new GTSimpleMemStore(info);
- GridTable table = new GridTable(info, store);
-
- GTBuilder builder = rebuild(table);
- IGTScanner scanner = scan(table);
- assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
- assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
- }
-
- @Test
- public void testAdvanced() throws IOException {
- GTInfo info = advancedInfo();
- GTSimpleMemStore store = new GTSimpleMemStore(info);
- GridTable table = new GridTable(info, store);
-
- GTBuilder builder = rebuild(table);
- IGTScanner scanner = scan(table);
- assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
- assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
- }
-
- @Test
- public void testAggregate() throws IOException {
- GTInfo info = advancedInfo();
- GTSimpleMemStore store = new GTSimpleMemStore(info);
- GridTable table = new GridTable(info, store);
-
- GTBuilder builder = rebuild(table);
- IGTScanner scanner = scanAndAggregate(table);
- assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
- assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
- }
-
- @Test
- public void testAppend() throws IOException {
- GTInfo info = advancedInfo();
- GTSimpleMemStore store = new GTSimpleMemStore(info);
- GridTable table = new GridTable(info, store);
-
- rebuildViaAppend(table);
- IGTScanner scanner = scan(table);
- assertEquals(3, scanner.getScannedRowBlockCount());
- assertEquals(10, scanner.getScannedRowCount());
- }
-
- private IGTScanner scan(GridTable table) throws IOException {
- GTScanRequest req = new GTScanRequest(table.getInfo());
- IGTScanner scanner = table.scan(req);
- for (GTRecord r : scanner) {
- Object[] v = r.getValues();
- assertTrue(((String) v[0]).startsWith("2015-"));
- assertTrue(((String) v[2]).equals("Food"));
- assertTrue(((LongWritable) v[3]).get() == 10);
- assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
- System.out.println(r);
- }
- scanner.close();
- System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
- System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
- return scanner;
- }
-
- private IGTScanner scanAndAggregate(GridTable table) throws IOException {
- GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
- IGTScanner scanner = table.scan(req);
- int i = 0;
- for (GTRecord r : scanner) {
- Object[] v = r.getValues();
- switch (i) {
- case 0:
- assertTrue(((LongWritable) v[3]).get() == 20);
- assertTrue(((BigDecimal) v[4]).doubleValue() == 21.0);
- break;
- case 1:
- assertTrue(((LongWritable) v[3]).get() == 30);
- assertTrue(((BigDecimal) v[4]).doubleValue() == 31.5);
- break;
- case 2:
- assertTrue(((LongWritable) v[3]).get() == 40);
- assertTrue(((BigDecimal) v[4]).doubleValue() == 42.0);
- break;
- case 3:
- assertTrue(((LongWritable) v[3]).get() == 10);
- assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
- break;
- default:
- fail();
- }
- i++;
- System.out.println(r);
- }
- scanner.close();
- System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
- System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
- return scanner;
- }
-
- static GTBuilder rebuild(GridTable table) throws IOException {
- GTRecord r = new GTRecord(table.getInfo());
- GTBuilder builder = table.rebuild();
-
- builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.close();
-
- System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
- System.out.println("Written Row Count: " + builder.getWrittenRowCount());
- return builder;
- }
-
- static void rebuildViaAppend(GridTable table) throws IOException {
- GTRecord r = new GTRecord(table.getInfo());
- GTBuilder builder;
-
- builder = table.append();
- builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.close();
- System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
- System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
- builder = table.append();
- builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.close();
- System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
- System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
- builder = table.append();
- builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.close();
- System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
- System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
- builder = table.append();
- builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
- builder.close();
- System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
- System.out.println("Written Row Count: " + builder.getWrittenRowCount());
- }
-
- static GTInfo basicInfo() {
- Builder builder = infoBuilder();
- GTInfo info = builder.build();
- return info;
- }
-
- static GTInfo advancedInfo() {
- Builder builder = infoBuilder();
- builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
- builder.enableRowBlock(4);
- GTInfo info = builder.build();
- return info;
- }
-
- private static Builder infoBuilder() {
- Builder builder = GTInfo.builder();
- builder.setCodeSystem(new GTSampleCodeSystem());
- builder.setColumns( //
- DataType.getInstance("varchar"), //
- DataType.getInstance("varchar"), //
- DataType.getInstance("varchar"), //
- DataType.getInstance("bigint"), //
- DataType.getInstance("decimal") //
- );
- builder.setPrimaryKey(setOf(0));
- builder.setColumnPreferIndex(setOf(0));
- return builder;
- }
-
- private static BitSet setOf(int... values) {
- BitSet set = new BitSet();
- for (int i : values)
- set.set(i);
- return set;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
new file mode 100644
index 0000000..c5878b3
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
@@ -0,0 +1,208 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.BitSet;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.storage.gridtable.GTInfo.Builder;
+import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
+import org.junit.Test;
+
+public class SimpleGridTableTest {
+
+ @Test
+ public void testBasics() throws IOException {
+ GTInfo info = basicInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ GTBuilder builder = rebuild(table);
+ IGTScanner scanner = scan(table);
+ assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+ assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+ }
+
+ @Test
+ public void testAdvanced() throws IOException {
+ GTInfo info = advancedInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ GTBuilder builder = rebuild(table);
+ IGTScanner scanner = scan(table);
+ assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+ assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+ }
+
+ @Test
+ public void testAggregate() throws IOException {
+ GTInfo info = advancedInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ GTBuilder builder = rebuild(table);
+ IGTScanner scanner = scanAndAggregate(table);
+ assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+ assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+ }
+
+ @Test
+ public void testAppend() throws IOException {
+ GTInfo info = advancedInfo();
+ GTSimpleMemStore store = new GTSimpleMemStore(info);
+ GridTable table = new GridTable(info, store);
+
+ rebuildViaAppend(table);
+ IGTScanner scanner = scan(table);
+ assertEquals(3, scanner.getScannedRowBlockCount());
+ assertEquals(10, scanner.getScannedRowCount());
+ }
+
+ private IGTScanner scan(GridTable table) throws IOException {
+ GTScanRequest req = new GTScanRequest(table.getInfo());
+ IGTScanner scanner = table.scan(req);
+ for (GTRecord r : scanner) {
+ Object[] v = r.getValues();
+ assertTrue(((String) v[0]).startsWith("2015-"));
+ assertTrue(((String) v[2]).equals("Food"));
+ assertTrue(((LongWritable) v[3]).get() == 10);
+ assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
+ System.out.println(r);
+ }
+ scanner.close();
+ System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
+ System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
+ return scanner;
+ }
+
+ private IGTScanner scanAndAggregate(GridTable table) throws IOException {
+ GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
+ IGTScanner scanner = table.scan(req);
+ int i = 0;
+ for (GTRecord r : scanner) {
+ Object[] v = r.getValues();
+ switch (i) {
+ case 0:
+ assertTrue(((LongWritable) v[3]).get() == 20);
+ assertTrue(((BigDecimal) v[4]).doubleValue() == 21.0);
+ break;
+ case 1:
+ assertTrue(((LongWritable) v[3]).get() == 30);
+ assertTrue(((BigDecimal) v[4]).doubleValue() == 31.5);
+ break;
+ case 2:
+ assertTrue(((LongWritable) v[3]).get() == 40);
+ assertTrue(((BigDecimal) v[4]).doubleValue() == 42.0);
+ break;
+ case 3:
+ assertTrue(((LongWritable) v[3]).get() == 10);
+ assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
+ break;
+ default:
+ fail();
+ }
+ i++;
+ System.out.println(r);
+ }
+ scanner.close();
+ System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
+ System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
+ return scanner;
+ }
+
+ static GTBuilder rebuild(GridTable table) throws IOException {
+ GTRecord r = new GTRecord(table.getInfo());
+ GTBuilder builder = table.rebuild();
+
+ builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+
+ System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+ System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+ return builder;
+ }
+
+ static void rebuildViaAppend(GridTable table) throws IOException {
+ GTRecord r = new GTRecord(table.getInfo());
+ GTBuilder builder;
+
+ builder = table.append();
+ builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+ System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+ System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+ builder = table.append();
+ builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+ System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+ System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+ builder = table.append();
+ builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+ System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+ System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+ builder = table.append();
+ builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
+ builder.close();
+ System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+ System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+ }
+
+ static GTInfo basicInfo() {
+ Builder builder = infoBuilder();
+ GTInfo info = builder.build();
+ return info;
+ }
+
+ static GTInfo advancedInfo() {
+ Builder builder = infoBuilder();
+ builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
+ builder.enableRowBlock(4);
+ GTInfo info = builder.build();
+ return info;
+ }
+
+ private static Builder infoBuilder() {
+ Builder builder = GTInfo.builder();
+ builder.setCodeSystem(new GTSampleCodeSystem());
+ builder.setColumns( //
+ DataType.getInstance("varchar"), //
+ DataType.getInstance("varchar"), //
+ DataType.getInstance("varchar"), //
+ DataType.getInstance("bigint"), //
+ DataType.getInstance("decimal") //
+ );
+ builder.setPrimaryKey(setOf(0));
+ builder.setColumnPreferIndex(setOf(0));
+ return builder;
+ }
+
+ private static BitSet setOf(int... values) {
+ BitSet set = new BitSet();
+ for (int i : values)
+ set.set(i);
+ return set;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
new file mode 100644
index 0000000..f96b709
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
@@ -0,0 +1,165 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.serializer.StringSerializer;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class SimpleInvertedIndexTest {
+
+ GTInfo info;
+ GTInvertedIndex index;
+ ArrayList<CompareTupleFilter> basicFilters = Lists.newArrayList();
+ ArrayList<ConciseSet> basicResults = Lists.newArrayList();
+
+ public SimpleInvertedIndexTest() {
+
+ info = SimpleGridTableTest.advancedInfo();
+ TblColRef colA = info.colRef(0);
+
+ // block i contains value "i", the last is NULL
+ index = new GTInvertedIndex(info);
+ GTRowBlock mockBlock = GTRowBlock.allocate(info);
+ GTRowBlock.Writer writer = mockBlock.getWriter();
+ GTRecord record = new GTRecord(info);
+ for (int i = 0; i < 10; i++) {
+ record.setValues(i < 9 ? "" + i : null, "", "", new LongWritable(0), new BigDecimal(0));
+ for (int j = 0; j < info.getRowBlockSize(); j++) {
+ writer.append(record);
+ }
+ writer.readyForFlush();
+ index.add(mockBlock);
+
+ writer.clearForNext();
+ }
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.ISNULL));
+ basicResults.add(set(9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.ISNOTNULL));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.EQ, 0));
+ basicResults.add(set(0));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.NEQ, 0));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.IN, 0, 5));
+ basicResults.add(set(0, 5));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.NOTIN, 0, 5));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.LT, 3));
+ basicResults.add(set(0, 1, 2));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.LTE, 3));
+ basicResults.add(set(0, 1, 2, 3));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.GT, 3));
+ basicResults.add(set(4, 5, 6, 7, 8));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.GTE, 3));
+ basicResults.add(set(3, 4, 5, 6, 7, 8));
+ }
+
+ @Test
+ public void testBasics() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ assertEquals(basicResults.get(i), index.filter(basicFilters.get(i)));
+ }
+ }
+
+ @Test
+ public void testLogicalAnd() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ for (int j = 0; j < basicFilters.size(); j++) {
+ LogicalTupleFilter f = logical(FilterOperatorEnum.AND, basicFilters.get(i), basicFilters.get(j));
+ ConciseSet r = basicResults.get(i).clone();
+ r.retainAll(basicResults.get(j));
+ assertEquals(r, index.filter(f));
+ }
+ }
+ }
+
+ @Test
+ public void testLogicalOr() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ for (int j = 0; j < basicFilters.size(); j++) {
+ LogicalTupleFilter f = logical(FilterOperatorEnum.OR, basicFilters.get(i), basicFilters.get(j));
+ ConciseSet r = basicResults.get(i).clone();
+ r.addAll(basicResults.get(j));
+ assertEquals(r, index.filter(f));
+ }
+ }
+ }
+
+ @Test
+ public void testNotEvaluable() {
+ ConciseSet all = set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+ CompareTupleFilter notEvaluable = compare(info.colRef(1), FilterOperatorEnum.EQ, 0);
+ assertEquals(all, index.filter(notEvaluable));
+
+ LogicalTupleFilter or = logical(FilterOperatorEnum.OR, basicFilters.get(0), notEvaluable);
+ assertEquals(all, index.filter(or));
+
+ LogicalTupleFilter and = logical(FilterOperatorEnum.AND, basicFilters.get(0), notEvaluable);
+ assertEquals(basicResults.get(0), index.filter(and));
+ }
+
+ public static CompareTupleFilter compare(TblColRef col, TupleFilter.FilterOperatorEnum op, int... ids) {
+ CompareTupleFilter filter = new CompareTupleFilter(op);
+ filter.addChild(columnFilter(col));
+ for (int i : ids) {
+ filter.addChild(constFilter(i));
+ }
+ return filter;
+ }
+
+ public static LogicalTupleFilter logical(TupleFilter.FilterOperatorEnum op, TupleFilter... filters) {
+ LogicalTupleFilter filter = new LogicalTupleFilter(op);
+ for (TupleFilter f : filters)
+ filter.addChild(f);
+ return filter;
+ }
+
+ public static ColumnTupleFilter columnFilter(TblColRef col) {
+ return new ColumnTupleFilter(col);
+ }
+
+ public static ConstantTupleFilter constFilter(int id) {
+ byte[] space = new byte[10];
+ ByteBuffer buf = ByteBuffer.wrap(space);
+ StringSerializer stringSerializer = new StringSerializer();
+ stringSerializer.serialize("" + id, buf);
+ ByteArray data = new ByteArray(buf.array(), buf.arrayOffset(), buf.position());
+ return new ConstantTupleFilter(data);
+ }
+
+ public static ConciseSet set(int... ints) {
+ ConciseSet set = new ConciseSet();
+ for (int i : ints)
+ set.add(i);
+ return set;
+ }
+
+
+}
[36/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/5837af0f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/5837af0f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/5837af0f
Branch: refs/heads/streaming-localdict
Commit: 5837af0f426233150d61cb6cb40ab756ed34a8df
Parents: b979dfa d1c115d
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 15:36:01 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 15:36:01 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/common/util/FIFOIterable.java | 20 +++
.../apache/kylin/common/util/FIFOIterator.java | 34 +++++
.../org/apache/kylin/common/util/BasicTest.java | 12 +-
.../test_kylin_cube_with_slr_desc.json | 2 +-
.../kylin/invertedindex/index/TableRecord.java | 5 +-
.../invertedindex/index/TableRecordInfo.java | 10 +-
.../kylin/invertedindex/model/IIDesc.java | 1 +
.../model/IIKeyValueCodecWithState.java | 24 ++-
.../apache/kylin/invertedindex/model/IIRow.java | 10 ++
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 20 ++-
.../cube/FactDistinctIIColumnsMapper.java | 28 ++--
.../kylin/job/BuildCubeWithEngineTest.java | 1 -
.../job/hadoop/invertedindex/II2CubeTest.java | 146 +++++++++++++++++++
.../invertedindex/ToyIIStreamBuilder.java | 36 +++++
streaming/pom.xml | 7 +
.../kylin/streaming/cube/CubeStreamBuilder.java | 20 +--
.../invertedindex/IIStreamBuilder.java | 33 +++--
17 files changed, 342 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
[14/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7658a500
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7658a500
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7658a500
Branch: refs/heads/streaming-localdict
Commit: 7658a5001d0ba835beedca76b4b8687bcfd03bc8
Parents: d4a271d c3ff4f4
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:16:06 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:16:06 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/job/IIStreamBuilderTest.java | 80 ++++++++++++++++++++
.../apache/kylin/streaming/KafkaConsumer.java | 5 +-
.../kylin/streaming/StreamingBootstrap.java | 33 ++++----
.../apache/kylin/streaming/StreamingCLI.java | 3 +-
.../invertedindex/IIStreamBuilder.java | 4 +-
.../invertedindex/IIStreamBuilderTest.java | 41 ----------
6 files changed, 107 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
[45/50] incubator-kylin git commit: fix
Posted by li...@apache.org.
fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b5a78a60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b5a78a60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b5a78a60
Branch: refs/heads/streaming-localdict
Commit: b5a78a600cbd294ce1457bde93c1d682064b40d1
Parents: d72f2e6
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 18:18:21 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 18:18:21 2015 +0800
----------------------------------------------------------------------
bin/kylin.sh | 2 +-
.../kylin/job/streaming/StreamingCLI.java | 2 +
.../apache/kylin/job/IIStreamBuilderTest.java | 48 ++++++++++++++++----
3 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/bin/kylin.sh b/bin/kylin.sh
index 95568e1..c300915 100644
--- a/bin/kylin.sh
+++ b/bin/kylin.sh
@@ -99,7 +99,7 @@ then
-Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
-Dkylin.hive.dependency=${hive_dependency} \
-Dspring.profiles.active=${spring_profile} \
- org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${tomcat_root}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}/$2 &
+ org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${KYLIN_HOME}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}$2 &
echo "streaming started $2"
exit 0
else
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 219ca41..1d6994f 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -59,6 +59,8 @@ public class StreamingCLI {
printArgsError(args);
}
} catch (Exception e) {
+ logger.error("error start streaming", e);
+ System.exit(-1);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index d42da33..bafcb61 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -34,23 +34,27 @@
package org.apache.kylin.job;
+import org.apache.hadoop.util.ToolRunner;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractKylinTestCase;
import org.apache.kylin.common.util.ClassUtil;
import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.job.hadoop.cube.StorageCleanupJob;
import org.apache.kylin.job.streaming.StreamingBootstrap;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
+import java.io.IOException;
/**
* Created by qianzhou on 3/6/15.
*/
public class IIStreamBuilderTest extends HBaseMetadataTestCase {
+ private static final Logger logger = LoggerFactory.getLogger(IIStreamBuilderTest.class);
+
private KylinConfig kylinConfig;
@BeforeClass
@@ -59,6 +63,30 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
System.setProperty("hdp.version", "2.2.0.0-2041"); // mapred-site.xml ref this
}
+ @AfterClass
+ public static void afterClass() throws Exception {
+// backup();
+ }
+
+ private static void backup() throws Exception {
+ int exitCode = cleanupOldStorage();
+ if (exitCode == 0) {
+ exportHBaseData();
+ }
+ }
+
+ private static int cleanupOldStorage() throws Exception {
+ String[] args = {"--delete", "true"};
+
+ int exitCode = ToolRunner.run(new StorageCleanupJob(), args);
+ return exitCode;
+ }
+
+ private static void exportHBaseData() throws IOException {
+ ExportHBaseData export = new ExportHBaseData();
+ export.exportTables();
+ }
+
@Before
public void before() throws Exception {
HBaseMetadataTestCase.staticCreateTestMetadata(AbstractKylinTestCase.SANDBOX_TEST_DATA);
@@ -68,13 +96,13 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
DeployUtil.overrideJobJarLocations();
}
- @After
- public void after() {
- this.cleanupTestMetadata();
- }
-
@Test
public void test() throws Exception {
- StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+// final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
+// bootstrap.start("eagle", 0);
+// Thread.sleep(30 * 60 * 1000);
+// logger.info("time is up, stop streaming");
+// bootstrap.stop();
+// Thread.sleep(5 * 1000);
}
}
[42/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/a36d4166
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/a36d4166
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/a36d4166
Branch: refs/heads/streaming-localdict
Commit: a36d416602885ea546ea6a388ba2f3eb78188df7
Parents: ea96dc5
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:55:44 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:55:44 2015 +0800
----------------------------------------------------------------------
.../kylin/job/streaming/StreamingBootstrap.java | 5 +-
.../apache/kylin/job/BuildIIWithStreamTest.java | 4 +-
.../job/hadoop/invertedindex/II2CubeTest.java | 58 ++++++++------------
3 files changed, 28 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index 5d1673c..f6abad7 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -123,11 +123,10 @@ public class StreamingBootstrap {
getStreamQueue().put(new Stream(offset, bytes));
}
};
- final IIDesc desc = ii.getDescriptor();
kafkaConsumers.put(getKey(streaming, partitionId), consumer);
- final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
- task.setStreamParser(JsonStreamParser.instance);
+ final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), iiSegment.getIIInstance(), partitionId);
+ task.setStreamParser(new JsonStreamParser(ii.getDescriptor().listAllColumns()));
Executors.newSingleThreadExecutor().submit(consumer);
Executors.newSingleThreadExecutor().submit(task).get();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
index dae2d03..a3a7489 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
@@ -215,14 +215,14 @@ public class BuildIIWithStreamTest {
ExecutorService executorService = Executors.newSingleThreadExecutor();
- final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), desc, 0);
+ final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), segment.getIIInstance(), 0);
int count = 0;
while (reader.next()) {
queue.put(parse(reader.getRow()));
count++;
}
logger.info("total record count:" + count + " htable:" + segment.getStorageLocationIdentifier());
- queue.put(new Stream(-1, null));
+ queue.put(Stream.EOF);
final Future<?> future = executorService.submit(streamBuilder);
try {
future.get();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 97c71f8..da1cb18 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -1,14 +1,9 @@
package org.apache.kylin.job.hadoop.invertedindex;
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import javax.annotation.Nullable;
-
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
@@ -24,22 +19,20 @@ import org.apache.kylin.invertedindex.IIManager;
import org.apache.kylin.invertedindex.index.Slice;
import org.apache.kylin.invertedindex.index.TableRecordInfo;
import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.invertedindex.model.*;
import org.apache.kylin.job.constant.BatchConstants;
import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.StringStreamParser;
+import org.apache.kylin.streaming.invertedindex.SliceBuilder;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import com.google.common.base.Function;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.*;
/**
* Created by Hongbin Ma(Binmahone) on 3/26/15.
@@ -53,10 +46,10 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
List<IIRow> iiRows;
- final String[] inputs = new String[] { //
- "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+ final String[] inputs = new String[]{ //
+ "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
"ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
- "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+ "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0"};
@Before
public void setUp() throws Exception {
@@ -64,22 +57,20 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
this.ii = IIManager.getInstance(getTestConfig()).getII(iiName);
this.iiDesc = ii.getDescriptor();
- Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+ List<Stream> streams = Lists.transform(Arrays.asList(inputs), new Function<String, Stream>() {
@Nullable
@Override
public Stream apply(String input) {
- return new Stream(0, input.getBytes());
+ return new Stream(System.currentTimeMillis(), input.getBytes());
}
});
- LinkedBlockingQueue q = new LinkedBlockingQueue();
- q.addAll(streams);
- q.put(new Stream(-1, null));//a stop sign for builder
iiRows = Lists.newArrayList();
- ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRows);
- ExecutorService executorService = Executors.newSingleThreadExecutor();
- Future<?> future = executorService.submit(builder);
- future.get();
+ final Slice slice = new SliceBuilder(iiDesc, (short) 0).buildSlice(streams, StringStreamParser.instance);
+ IIKeyValueCodec codec = new IIKeyValueCodec(slice.getInfo());
+ for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+ iiRows.add(iiRow);
+ }
}
@@ -131,15 +122,14 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
@Nullable
@Override
public Pair<ImmutableBytesWritable, Result> apply(@Nullable IIRow input) {
- return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[] { 1 }), Result.create(input.makeCells()));
+ return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[]{1}), Result.create(input.makeCells()));
}
})));
List<Pair<LongWritable, Text>> result = mapDriver.run();
- Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
- for(Pair<LongWritable, Text> pair : result)
- {
- Assert.assertEquals(pair.getFirst().get(),6);
+ Set<String> lstgNames = Sets.newHashSet("FP-non GTC", "ABIN");
+ for (Pair<LongWritable, Text> pair : result) {
+ Assert.assertEquals(pair.getFirst().get(), 6);
Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
}
}
[31/50] incubator-kylin git commit: KYLIN-653 checking
Posted by li...@apache.org.
KYLIN-653 checking
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/fc5ab528
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/fc5ab528
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/fc5ab528
Branch: refs/heads/streaming-localdict
Commit: fc5ab528fcdb1d8fd90f861e4f937c353c09bbab
Parents: d09e00d
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 11:31:57 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800
----------------------------------------------------------------------
.../invertedindex/model/IIKeyValueCodecWithState.java | 12 ++++++++++--
.../job/hadoop/cube/FactDistinctIIColumnsMapper.java | 6 ++++--
.../invertedindex/IIKeyValueCodecWithStateTest.java | 4 ++++
3 files changed, 18 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index 82f1020..29ffd40 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -1,14 +1,14 @@
package org.apache.kylin.invertedindex.model;
-import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
-import com.google.common.base.Preconditions;
+import org.apache.kylin.common.util.FIFOIterable;
import org.apache.kylin.common.util.FIFOIterator;
import org.apache.kylin.invertedindex.index.Slice;
import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
/**
@@ -20,8 +20,16 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
super(digest);
}
+ /**
+ *
+ * @param kvs kvs must be a {@link org.apache.kylin.common.util.FIFOIterable } to avoid {@link java.util.ConcurrentModificationException}.
+ * @return
+ */
@Override
public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+ if (!(kvs instanceof FIFOIterable)) {
+ throw new IllegalArgumentException("kvs must be a {@link org.apache.kylin.common.util.FIFOIterable } to avoid {@link java.util.ConcurrentModificationException}.");
+ }
return new IIRowDecoderWithState(digest, kvs.iterator());
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 705e272..6a236fd 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -22,12 +22,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Queue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.FIFOIterable;
import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.invertedindex.IIInstance;
import org.apache.kylin.invertedindex.IIManager;
@@ -50,7 +52,7 @@ import com.google.common.collect.Lists;
public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
private IIJoinedFlatTableDesc intermediateTableDesc;
- private ArrayList<IIRow> buffer = Lists.newArrayList();
+ private Queue<IIRow> buffer = Lists.newLinkedList();
private Iterator<Slice> slices;
private String iiName;
@@ -73,7 +75,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
TableRecordInfo info = new TableRecordInfo(iiDesc);
KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
- slices = codec.decodeKeyValue(buffer).iterator();
+ slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
for (int i = 0; i < factDictCols.size(); ++i) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
index 416d31a..5ade5f1 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -71,6 +71,10 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
cleanupTestMetadata();
}
+ /**
+ * simulate stream building into slices, and encode the slice into IIRows.
+ * Then reconstruct the IIRows to slice.
+ */
@Test
public void basicTest() {
Queue<IIRow> buffer = Lists.newLinkedList();
[02/50] incubator-kylin git commit: KYLIN-630 add distinct column
mapper for II storage
Posted by li...@apache.org.
KYLIN-630 add distinct column mapper for II storage
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/1b52438e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/1b52438e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/1b52438e
Branch: refs/heads/streaming-localdict
Commit: 1b52438e2eec3dd271b66b6a6352ccf1bc0278d3
Parents: 8e0695b
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 16:03:30 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 16:03:30 2015 +0800
----------------------------------------------------------------------
.../model/IIJoinedFlatTableDesc.java | 12 +-
.../apache/kylin/invertedindex/model/IIRow.java | 13 ++
.../org/apache/kylin/job/JoinedFlatTable.java | 1 -
.../kylin/job/hadoop/AbstractHadoopJob.java | 2 +-
.../kylin/job/hadoop/cube/CubeHFileMapper.java | 2 +-
.../kylin/job/hadoop/cube/CuboidReducer.java | 2 +-
.../job/hadoop/cube/FactDistinctColumnsJob.java | 2 +-
.../hadoop/cube/FactDistinctColumnsMapper.java | 129 -------------------
.../cube/FactDistinctColumnsMapperBase.java | 2 +-
.../hadoop/cube/FactDistinctColumnsReducer.java | 2 +-
.../cube/FactDistinctHiveColumnsMapper.java | 129 +++++++++++++++++++
.../cube/FactDistinctIIColumnsMapper.java | 129 +++++++++++++++++++
.../job/hadoop/cube/MergeCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/NDCuboidMapper.java | 2 +-
.../job/hadoop/cube/NewBaseCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidReducer.java | 2 +-
.../invertedindex/InvertedIndexMapper.java | 2 +-
.../invertedindex/InvertedIndexPartitioner.java | 2 +-
.../invertedindex/InvertedIndexReducer.java | 2 +-
.../metadata/model/IJoinedFlatTableDesc.java | 2 -
.../metadata/model/IntermediateColumnDesc.java | 4 +
.../endpoint/HbaseServerKVIterator.java | 9 +-
23 files changed, 296 insertions(+), 160 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
index 44114da..14934dc 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
@@ -19,13 +19,13 @@
package org.apache.kylin.invertedindex.model;
import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import com.google.common.collect.Lists;
+import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
+import org.apache.kylin.metadata.model.IntermediateColumnDesc;
+import org.apache.kylin.metadata.model.TblColRef;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.metadata.model.*;
+import com.google.common.collect.Lists;
/**
* Created by Hongbin Ma(Binmahone) on 12/30/14.
@@ -35,7 +35,6 @@ public class IIJoinedFlatTableDesc implements IJoinedFlatTableDesc {
private IIDesc iiDesc;
private String tableName;
private List<IntermediateColumnDesc> columnList = Lists.newArrayList();
- private Map<String, String> tableAliasMap;
public IIJoinedFlatTableDesc(IIDesc iiDesc) {
this.iiDesc = iiDesc;
@@ -57,6 +56,7 @@ public class IIJoinedFlatTableDesc implements IJoinedFlatTableDesc {
return tableName + "_" + jobUUID.replace("-", "_");
}
+ @Override
public List<IntermediateColumnDesc> getColumnList() {
return columnList;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
index aba4fff..f3d398a 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
@@ -34,7 +34,9 @@
package org.apache.kylin.invertedindex.model;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.util.BytesUtil;
/**
* Created by qianzhou on 3/10/15.
@@ -50,6 +52,7 @@ public final class IIRow {
this.value = value;
this.dictionary = dictionary;
}
+
public IIRow() {
this(new ImmutableBytesWritable(), new ImmutableBytesWritable(), new ImmutableBytesWritable());
}
@@ -61,7 +64,17 @@ public final class IIRow {
public ImmutableBytesWritable getValue() {
return value;
}
+
public ImmutableBytesWritable getDictionary() {
return dictionary;
}
+
+ public void updateWith(Cell c) {
+ if (BytesUtil.compareBytes(IIDesc.HBASE_QUALIFIER_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_QUALIFIER_BYTES.length) == 0) {
+ this.getKey().set(c.getRowArray(), c.getRowOffset(), c.getRowLength());
+ this.getValue().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
+ } else if (BytesUtil.compareBytes(IIDesc.HBASE_DICTIONARY_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_DICTIONARY_BYTES.length) == 0) {
+ this.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
index cc3dc1b..100fbca 100644
--- a/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
+++ b/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
@@ -27,7 +27,6 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
-import org.apache.kylin.cube.model.DimensionDesc;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
index 038fe2f..9f73488 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
@@ -330,7 +330,7 @@ public abstract class AbstractHadoopJob extends Configured implements Tool {
return input.getSplits(job).size();
}
- public static KylinConfig loadKylinPropsAndMetadata(Configuration conf) throws IOException {
+ public static KylinConfig loadKylinPropsAndMetadata() throws IOException {
File metaDir = new File("meta");
System.setProperty(KylinConfig.KYLIN_CONF, metaDir.getAbsolutePath());
logger.info("The absolute path for meta dir is " + metaDir.getAbsolutePath());
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
index 17dc24e..1236f8c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
@@ -62,7 +62,7 @@ public class CubeHFileMapper extends KylinMapper<Text, Text, ImmutableBytesWrita
super.publishConfiguration(context.getConfiguration());
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
CubeManager cubeMgr = CubeManager.getInstance(config);
cubeDesc = cubeMgr.getCube(cubeName).getDescriptor();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
index 7181fa1..b747dff 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
@@ -64,7 +64,7 @@ public class CuboidReducer extends KylinReducer<Text, Text, Text, Text> {
super.publishConfiguration(context.getConfiguration());
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor();
measuresDescs = cubeDesc.getMeasures();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
index 094014e..17c5e9b 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
@@ -101,7 +101,7 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
dbTableNames[1]);
job.setInputFormatClass(HCatInputFormat.class);
- job.setMapperClass(FactDistinctColumnsMapper.class);
+ job.setMapperClass(FactDistinctHiveColumnsMapper.class);
job.setCombinerClass(FactDistinctColumnsCombiner.class);
job.setMapOutputKeyClass(ShortWritable.class);
job.setMapOutputValueClass(Text.class);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
deleted file mode 100644
index 3a50249..0000000
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hive.hcatalog.data.HCatRecord;
-import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
-import org.apache.hive.hcatalog.data.schema.HCatSchema;
-import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
-import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.cube.cuboid.CuboidScheduler;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.dict.lookup.HiveTableReader;
-import org.apache.kylin.job.constant.BatchConstants;
-
-import com.google.common.collect.Lists;
-
-/**
- * @author yangli9
- */
-public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
-
- private HCatSchema schema = null;
- private CubeJoinedFlatTableDesc intermediateTableDesc;
-
- protected boolean collectStatistics = false;
- protected CuboidScheduler cuboidScheduler = null;
- protected List<String> rowKeyValues = null;
- protected HyperLogLogPlusCounter hll;
- protected int nRowKey;
-
- @Override
- protected void setup(Context context) throws IOException {
- super.setup(context);
-
- schema = HCatInputFormat.getTableSchema(context.getConfiguration());
- intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
-
-
- collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
- if (collectStatistics) {
- cuboidScheduler = new CuboidScheduler(cubeDesc);
- hll = new HyperLogLogPlusCounter(16);
- rowKeyValues = Lists.newArrayList();
- nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
- }
- }
-
- @Override
- public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
- try {
- int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
- HCatFieldSchema fieldSchema;
- for (int i : factDictCols) {
- outputKey.set((short) i);
- fieldSchema = schema.get(flatTableIndexes[i]);
- Object fieldValue = record.get(fieldSchema.getName(), schema);
- if (fieldValue == null)
- continue;
- byte[] bytes = Bytes.toBytes(fieldValue.toString());
- outputValue.set(bytes, 0, bytes.length);
- context.write(outputKey, outputValue);
- }
- } catch (Exception ex) {
- handleErrorRecord(record, ex);
- }
-
- if (collectStatistics) {
- String[] row = HiveTableReader.getRowAsStringArray(record);
- putRowKeyToHLL(row, baseCuboidId);
- }
- }
-
- private void putRowKeyToHLL(String[] row, long cuboidId) {
- rowKeyValues.clear();
- long mask = Long.highestOneBit(baseCuboidId);
- for (int i = 0; i < nRowKey; i++) {
- if ((mask & cuboidId) == 1) {
- rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
- }
- mask = mask >> 1;
- }
-
- String key = StringUtils.join(rowKeyValues, ",");
- hll.add(key);
-
- Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
- for (Long childId : children) {
- putRowKeyToHLL(row, childId);
- }
-
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- if (collectStatistics) {
- // output hll to reducer, key is -1
- // keyBuf = Bytes.toBytes(-1);
- outputKey.set((short) -1);
- ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
- hll.writeRegisters(hllBuf);
- outputValue.set(hllBuf.array());
- context.write(outputKey, outputValue);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index 603277c..c0455ff 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -40,7 +40,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
protected void setup(Context context) throws IOException {
Configuration conf = context.getConfiguration();
publishConfiguration(conf);
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
cube = CubeManager.getInstance(config).getCube(cubeName);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index 383def4..2052d08 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -61,7 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
super.publishConfiguration(context.getConfiguration());
Configuration conf = context.getConfiguration();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
CubeDesc cubeDesc = cube.getDescriptor();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
new file mode 100644
index 0000000..64ae353
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
+import org.apache.kylin.dict.lookup.HiveTableReader;
+import org.apache.kylin.job.constant.BatchConstants;
+
+import com.google.common.collect.Lists;
+
+/**
+ * @author yangli9
+ */
+public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
+
+ private HCatSchema schema = null;
+ private CubeJoinedFlatTableDesc intermediateTableDesc;
+
+ protected boolean collectStatistics = false;
+ protected CuboidScheduler cuboidScheduler = null;
+ protected List<String> rowKeyValues = null;
+ protected HyperLogLogPlusCounter hll;
+ protected int nRowKey;
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ super.setup(context);
+
+ schema = HCatInputFormat.getTableSchema(context.getConfiguration());
+ intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
+
+
+ collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
+ if (collectStatistics) {
+ cuboidScheduler = new CuboidScheduler(cubeDesc);
+ hll = new HyperLogLogPlusCounter(16);
+ rowKeyValues = Lists.newArrayList();
+ nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
+ }
+ }
+
+ @Override
+ public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
+ try {
+ int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
+ HCatFieldSchema fieldSchema;
+ for (int i : factDictCols) {
+ outputKey.set((short) i);
+ fieldSchema = schema.get(flatTableIndexes[i]);
+ Object fieldValue = record.get(fieldSchema.getName(), schema);
+ if (fieldValue == null)
+ continue;
+ byte[] bytes = Bytes.toBytes(fieldValue.toString());
+ outputValue.set(bytes, 0, bytes.length);
+ context.write(outputKey, outputValue);
+ }
+ } catch (Exception ex) {
+ handleErrorRecord(record, ex);
+ }
+
+ if (collectStatistics) {
+ String[] row = HiveTableReader.getRowAsStringArray(record);
+ putRowKeyToHLL(row, baseCuboidId);
+ }
+ }
+
+ private void putRowKeyToHLL(String[] row, long cuboidId) {
+ rowKeyValues.clear();
+ long mask = Long.highestOneBit(baseCuboidId);
+ for (int i = 0; i < nRowKey; i++) {
+ if ((mask & cuboidId) == 1) {
+ rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
+ }
+ mask = mask >> 1;
+ }
+
+ String key = StringUtils.join(rowKeyValues, ",");
+ hll.add(key);
+
+ Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
+ for (Long childId : children) {
+ putRowKeyToHLL(row, childId);
+ }
+
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ if (collectStatistics) {
+ // output hll to reducer, key is -1
+ // keyBuf = Bytes.toBytes(-1);
+ outputKey.set((short) -1);
+ ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
+ hll.writeRegisters(hllBuf);
+ outputValue.set(hllBuf.array());
+ context.write(outputKey, outputValue);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
new file mode 100644
index 0000000..75e127e
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.RawTableRecord;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.*;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.model.IntermediateColumnDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+
+/**
+ * @author yangli9
+ */
+public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
+
+ private IIJoinedFlatTableDesc intermediateTableDesc;
+ private ArrayList<IIRow> buffer = Lists.newArrayList();
+ private Iterable<Slice> slices;
+
+ private String iiName;
+ private IIInstance ii;
+ private IIDesc iiDesc;
+
+ private int[] baseCuboidCol2FlattenTableCol;
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ super.setup(context);
+
+ Configuration conf = context.getConfiguration();
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+ iiName = conf.get(BatchConstants.CFG_II_NAME);
+ ii = IIManager.getInstance(config).getII(iiName);
+ iiDesc = ii.getDescriptor();
+
+ intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
+ TableRecordInfo info = new TableRecordInfo(iiDesc);
+ KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
+ slices = codec.decodeKeyValue(buffer);
+
+ baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
+ for (int i = 0; i < factDictCols.size(); ++i) {
+ int index = findTblCol(intermediateTableDesc.getColumnList(), columns.get(factDictCols.get(i)));
+ baseCuboidCol2FlattenTableCol[i] = index;
+ }
+ }
+
+ private int findTblCol(List<IntermediateColumnDesc> columns, final TblColRef col) {
+ return Iterators.indexOf(columns.iterator(), new Predicate<IntermediateColumnDesc>() {
+ @Override
+ public boolean apply(IntermediateColumnDesc input) {
+ return input.getColRef().equals(col);
+ }
+ });
+ }
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result cells, Context context) throws IOException, InterruptedException {
+ IIRow iiRow = new IIRow();
+ for (Cell c : cells.rawCells()) {
+ iiRow.updateWith(c);
+ }
+ buffer.add(iiRow);
+
+ if (slices.iterator().hasNext()) {
+ byte[] vBytesBuffer = null;
+ Slice slice = slices.iterator().next();
+
+ for (RawTableRecord record : slice) {
+ for (int i = 0; i < factDictCols.size(); ++i) {
+ int baseCuboidIndex = factDictCols.get(i);
+ outputKey.set((short) baseCuboidIndex);
+ int indexInRecord = baseCuboidCol2FlattenTableCol[i];
+
+ Dictionary<?> dictionary = slice.getLocalDictionaries().get(indexInRecord);
+ if (vBytesBuffer == null || dictionary.getSizeOfValue() > vBytesBuffer.length) {
+ vBytesBuffer = new byte[dictionary.getSizeOfValue() * 2];
+ }
+
+ int vid = record.getValueID(baseCuboidIndex);
+ if (vid == dictionary.nullId()) {
+ continue;
+ }
+ int vBytesSize = dictionary.getValueBytesFromId(vid, vBytesBuffer, 0);
+
+ outputValue.set(vBytesBuffer, 0, vBytesSize);
+ context.write(outputKey, outputValue);
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
index 431f2b7..417e996 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
@@ -114,7 +114,7 @@ public class MergeCuboidMapper extends KylinMapper<Text, Text, Text, Text> {
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
- config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cubeManager = CubeManager.getInstance(config);
cube = cubeManager.getCube(cubeName);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
index dc65baa..e476bd7 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
@@ -68,7 +68,7 @@ public class NDCuboidMapper extends KylinMapper<Text, Text, Text, Text> {
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
index e75457e..79c334c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
@@ -125,7 +125,7 @@ public class NewBaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, T
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
metadataManager = MetadataManager.getInstance(config);
cube = CubeManager.getInstance(config).getCube(cubeName);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
index a58369f..5a3565a 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
@@ -81,7 +81,7 @@ public class InMemCuboidMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Tex
Configuration conf = context.getConfiguration();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
cube = CubeManager.getInstance(config).getCube(cubeName);
cubeDesc = cube.getDescriptor();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
index de2539c..48fe3a1 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
@@ -44,7 +44,7 @@ public class InMemCuboidReducer extends KylinReducer<Text, Text, Text, Text> {
super.publishConfiguration(context.getConfiguration());
cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor();
measuresDescs = cubeDesc.getMeasures();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
index 735a945..0344043 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
@@ -58,7 +58,7 @@ public class InvertedIndexMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, L
Configuration conf = context.getConfiguration();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
IIManager mgr = IIManager.getInstance(config);
IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
index 141565f..fa4dccf 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
@@ -54,7 +54,7 @@ public class InvertedIndexPartitioner extends Partitioner<LongWritable, Immutabl
public void setConf(Configuration conf) {
this.conf = conf;
try {
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
IIManager mgr = IIManager.getInstance(config);
IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
index a1c0811..9f238b0 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
@@ -53,7 +53,7 @@ public class InvertedIndexReducer extends KylinReducer<LongWritable, ImmutableBy
super.publishConfiguration(context.getConfiguration());
Configuration conf = context.getConfiguration();
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
IIManager mgr = IIManager.getInstance(config);
IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
index 917e12b..abf87b7 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
@@ -20,8 +20,6 @@ package org.apache.kylin.metadata.model;
import java.util.List;
-import org.apache.kylin.metadata.model.DataModelDesc;
-
/**
* Created by Hongbin Ma(Binmahone) on 12/30/14.
*/
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
index 860773c..1b931a0 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
@@ -36,6 +36,10 @@ public class IntermediateColumnDesc {
return id;
}
+ public TblColRef getColRef() {
+ return this.colRef;
+ }
+
public String getColumnName() {
return colRef.getName();
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java b/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
index a7b67d8..5d9f633 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
@@ -63,7 +63,6 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
this.regionScanner = innerScanner;
}
-
@Override
public boolean hasNext() {
return hasMore;
@@ -81,12 +80,7 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
throw new IllegalStateException("Hbase row contains less than 1 cell");
}
for (Cell c : results) {
- if (BytesUtil.compareBytes(IIDesc.HBASE_QUALIFIER_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_QUALIFIER_BYTES.length) == 0) {
- row.getKey().set(c.getRowArray(), c.getRowOffset(), c.getRowLength());
- row.getValue().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
- } else if (BytesUtil.compareBytes(IIDesc.HBASE_DICTIONARY_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_DICTIONARY_BYTES.length) == 0) {
- row.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
- }
+ row.updateWith(c);
}
return row;
}
@@ -97,7 +91,6 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
}
}
-
@Override
public Iterator<IIRow> iterator() {
return new IIRowIterator(innerScanner);
[34/50] incubator-kylin git commit: KYLIN-653 minor change
Posted by li...@apache.org.
KYLIN-653 minor change
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d1c115d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d1c115d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d1c115d8
Branch: refs/heads/streaming-localdict
Commit: d1c115d8242b4fc819ab36153d8bffa89a265631
Parents: 0f8b7a4
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 15:11:06 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:17:00 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1c115d8/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 6832dcf..62cf6e8 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -9,7 +9,6 @@ import java.util.concurrent.LinkedBlockingQueue;
import javax.annotation.Nullable;
-import com.google.common.collect.Sets;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
@@ -40,6 +39,7 @@ import org.junit.Test;
import com.google.common.base.Function;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
/**
* Created by Hongbin Ma(Binmahone) on 3/26/15.
[17/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/2b5495ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/2b5495ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/2b5495ce
Branch: refs/heads/streaming-localdict
Commit: 2b5495ce1debe21be361e942428cfff0bd1dff36
Parents: c3ff4f4
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 10:05:20 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 10:05:20 2015 +0800
----------------------------------------------------------------------
.../invertedindex/index/BatchSliceBuilder.java | 2 +-
.../kylin/job/streaming/StreamingBootstrap.java | 117 +++++++++++++++++
.../kylin/job/streaming/StreamingCLI.java | 71 ++++++++++
.../apache/kylin/job/IIStreamBuilderTest.java | 2 +-
pom.xml | 1 +
.../apache/kylin/streaming/KafkaRequester.java | 128 +++++++++++--------
.../kylin/streaming/StreamingBootstrap.java | 109 ----------------
.../apache/kylin/streaming/StreamingCLI.java | 71 ----------
8 files changed, 265 insertions(+), 236 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
index 6ba328c..037dd6c 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
@@ -87,7 +87,7 @@ public class BatchSliceBuilder {
private long increaseSliceTimestamp(long timestamp) {
if (timestamp <= sliceTimestamp) {
- return ++timestamp; // ensure slice timestamp increases
+ return sliceTimestamp+1; // ensure slice timestamp increases
} else {
return timestamp;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
new file mode 100644
index 0000000..ddaae29
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -0,0 +1,117 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job.streaming;
+
+import com.google.common.base.Preconditions;
+import kafka.api.OffsetRequest;
+import kafka.cluster.Broker;
+import kafka.javaapi.PartitionMetadata;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.IISegment;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.job.hadoop.invertedindex.IICreateHTableJob;
+import org.apache.kylin.streaming.*;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingBootstrap {
+
+ private KylinConfig kylinConfig;
+ private StreamManager streamManager;
+ private IIManager iiManager;
+
+ public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
+ return new StreamingBootstrap(kylinConfig);
+ }
+
+ private StreamingBootstrap(KylinConfig kylinConfig) {
+ this.kylinConfig = kylinConfig;
+ this.streamManager = StreamManager.getInstance(kylinConfig);
+ this.iiManager = IIManager.getInstance(kylinConfig);
+ }
+
+ private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
+ final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
+ if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
+ return partitionMetadata.leader();
+ } else {
+ return null;
+ }
+ }
+
+ public void startStreaming(String streamingConf, int partitionId) throws Exception {
+ final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
+ Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+ final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
+ Preconditions.checkNotNull(ii);
+ Preconditions.checkArgument(ii.getSegments().size() > 0);
+ final IISegment iiSegment = ii.getSegments().get(0);
+
+ final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
+ Preconditions.checkState(leadBroker != null, "cannot find lead broker");
+ final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
+ long streamOffset = ii.getStreamOffsets().get(partitionId);
+ if (streamOffset < earliestOffset) {
+ streamOffset = earliestOffset;
+ }
+
+
+ KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
+ @Override
+ protected void consume(long offset, ByteBuffer payload) throws Exception {
+ byte[] bytes = new byte[payload.limit()];
+ payload.get(bytes);
+ getStreamQueue().put(new Stream(offset, bytes));
+ }
+ };
+ final IIDesc desc = ii.getDescriptor();
+
+ Executors.newSingleThreadExecutor().submit(consumer);
+ final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
+ task.setStreamParser(JsonStreamParser.instance);
+ final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
+ future.get();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
new file mode 100644
index 0000000..8813cb3
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -0,0 +1,71 @@
+/*
+ *
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ * contributor license agreements. See the NOTICE file distributed with
+ *
+ * this work for additional information regarding copyright ownership.
+ *
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ * (the "License"); you may not use this file except in compliance with
+ *
+ * the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ *
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ *
+ * limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job.streaming;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingCLI {
+
+ private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
+
+ public static void main(String[] args) {
+ try {
+ if (args.length < 2) {
+ printArgsError(args);
+ return;
+ }
+ if (args[0].equals("start")) {
+ String kafkaConfName = args[1];
+ StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
+ } else if (args.equals("stop")) {
+
+ } else {
+ printArgsError(args);
+ }
+ } catch (Exception e) {
+ }
+ }
+
+ private static void printArgsError(String[] args) {
+ logger.warn("invalid args:" + StringUtils.join(args, " "));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index 35a0fe9..d42da33 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -38,7 +38,7 @@ import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractKylinTestCase;
import org.apache.kylin.common.util.ClassUtil;
import org.apache.kylin.common.util.HBaseMetadataTestCase;
-import org.apache.kylin.streaming.StreamingBootstrap;
+import org.apache.kylin.job.streaming.StreamingBootstrap;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 38d6220..064ea11 100644
--- a/pom.xml
+++ b/pom.xml
@@ -616,6 +616,7 @@
<exclude>**/BuildCubeWithEngineTest.java</exclude>
<exclude>**/BuildIIWithEngineTest.java</exclude>
<exclude>**/BuildIIWithStreamTest.java</exclude>
+ <exclude>**/IIStreamBuilderTest.java</exclude>
<exclude>**/SampleCubeSetupTest.java</exclude>
<exclude>**/KylinQueryTest.java</exclude>
<exclude>**/Kafka*Test.java</exclude>
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
index 699c0ed..ce87047 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
@@ -59,53 +59,65 @@ public final class KafkaRequester {
private static final Logger logger = LoggerFactory.getLogger(KafkaRequester.class);
public static TopicMeta getKafkaTopicMeta(KafkaConfig kafkaConfig) {
- SimpleConsumer consumer;
+ SimpleConsumer consumer = null;
for (Broker broker : kafkaConfig.getBrokers()) {
- consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
- List<String> topics = Collections.singletonList(kafkaConfig.getTopic());
- TopicMetadataRequest req = new TopicMetadataRequest(topics);
- TopicMetadataResponse resp = consumer.send(req);
- final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
- if (topicMetadatas.size() != 1) {
- break;
- }
- final TopicMetadata topicMetadata = topicMetadatas.get(0);
- if (topicMetadata.errorCode() != 0) {
- break;
- }
- List<Integer> partitionIds = Lists.transform(topicMetadata.partitionsMetadata(), new Function<PartitionMetadata, Integer>() {
- @Nullable
- @Override
- public Integer apply(PartitionMetadata partitionMetadata) {
- return partitionMetadata.partitionId();
+ try {
+ consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
+ List<String> topics = Collections.singletonList(kafkaConfig.getTopic());
+ TopicMetadataRequest req = new TopicMetadataRequest(topics);
+ TopicMetadataResponse resp = consumer.send(req);
+ final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
+ if (topicMetadatas.size() != 1) {
+ break;
+ }
+ final TopicMetadata topicMetadata = topicMetadatas.get(0);
+ if (topicMetadata.errorCode() != 0) {
+ break;
}
- });
- return new TopicMeta(kafkaConfig.getTopic(), partitionIds);
+ List<Integer> partitionIds = Lists.transform(topicMetadata.partitionsMetadata(), new Function<PartitionMetadata, Integer>() {
+ @Nullable
+ @Override
+ public Integer apply(PartitionMetadata partitionMetadata) {
+ return partitionMetadata.partitionId();
+ }
+ });
+ return new TopicMeta(kafkaConfig.getTopic(), partitionIds);
+ } finally {
+ if (consumer != null) {
+ consumer.close();
+ }
+ }
}
logger.debug("cannot find topic:" + kafkaConfig.getTopic());
return null;
}
public static PartitionMetadata getPartitionMetadata(String topic, int partitionId, List<Broker> brokers, KafkaConfig kafkaConfig) {
- SimpleConsumer consumer;
+ SimpleConsumer consumer = null;
for (Broker broker : brokers) {
- consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
- List<String> topics = Collections.singletonList(topic);
- TopicMetadataRequest req = new TopicMetadataRequest(topics);
- TopicMetadataResponse resp = consumer.send(req);
- final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
- if (topicMetadatas.size() != 1) {
- logger.warn("invalid topicMetadata size:" + topicMetadatas.size());
- break;
- }
- final TopicMetadata topicMetadata = topicMetadatas.get(0);
- if (topicMetadata.errorCode() != 0) {
- logger.warn("fetching topicMetadata with errorCode:" + topicMetadata.errorCode());
- break;
- }
- for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
- if (partitionMetadata.partitionId() == partitionId) {
- return partitionMetadata;
+ try {
+ consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
+ List<String> topics = Collections.singletonList(topic);
+ TopicMetadataRequest req = new TopicMetadataRequest(topics);
+ TopicMetadataResponse resp = consumer.send(req);
+ final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
+ if (topicMetadatas.size() != 1) {
+ logger.warn("invalid topicMetadata size:" + topicMetadatas.size());
+ break;
+ }
+ final TopicMetadata topicMetadata = topicMetadatas.get(0);
+ if (topicMetadata.errorCode() != 0) {
+ logger.warn("fetching topicMetadata with errorCode:" + topicMetadata.errorCode());
+ break;
+ }
+ for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
+ if (partitionMetadata.partitionId() == partitionId) {
+ return partitionMetadata;
+ }
+ }
+ } finally {
+ if (consumer != null) {
+ consumer.close();
}
}
}
@@ -116,30 +128,38 @@ public final class KafkaRequester {
public static FetchResponse fetchResponse(String topic, int partitionId, long offset, Broker broker, KafkaConfig kafkaConfig) {
final String clientName = "client_" + topic + "_" + partitionId;
SimpleConsumer consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), clientName);
- kafka.api.FetchRequest req = new FetchRequestBuilder()
- .clientId(clientName)
- .addFetch(topic, partitionId, offset, kafkaConfig.getMaxReadCount()) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
- .build();
- return consumer.fetch(req);
+ try {
+ kafka.api.FetchRequest req = new FetchRequestBuilder()
+ .clientId(clientName)
+ .addFetch(topic, partitionId, offset, kafkaConfig.getMaxReadCount()) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
+ .build();
+ return consumer.fetch(req);
+ } finally {
+ consumer.close();
+ }
}
public static long getLastOffset(String topic, int partitionId,
long whichTime, Broker broker, KafkaConfig kafkaConfig) {
String clientName = "client_" + topic + "_" + partitionId;
SimpleConsumer consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), clientName);
- TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
- Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
- requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
- kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
- requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
- OffsetResponse response = consumer.getOffsetsBefore(request);
+ try {
+ TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
+ Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
+ requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
+ kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
+ requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
+ OffsetResponse response = consumer.getOffsetsBefore(request);
- if (response.hasError()) {
- System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partitionId));
- return 0;
+ if (response.hasError()) {
+ System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partitionId));
+ return 0;
+ }
+ long[] offsets = response.offsets(topic, partitionId);
+ return offsets[0];
+ } finally {
+ consumer.close();
}
- long[] offsets = response.offsets(topic, partitionId);
- return offsets[0];
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
deleted file mode 100644
index bd1ab42..0000000
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- *
- * contributor license agreements. See the NOTICE file distributed with
- *
- * this work for additional information regarding copyright ownership.
- *
- * The ASF licenses this file to You under the Apache License, Version 2.0
- *
- * (the "License"); you may not use this file except in compliance with
- *
- * the License. You may obtain a copy of the License at
- *
- *
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- *
- * distributed under the License is distributed on an "AS IS" BASIS,
- *
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- * See the License for the specific language governing permissions and
- *
- * limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming;
-
-import com.google.common.base.Preconditions;
-import kafka.api.OffsetRequest;
-import kafka.cluster.Broker;
-import kafka.javaapi.PartitionMetadata;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.invertedindex.IIInstance;
-import org.apache.kylin.invertedindex.IIManager;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-
-import java.nio.ByteBuffer;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-/**
- * Created by qianzhou on 3/26/15.
- */
-public class StreamingBootstrap {
-
- private KylinConfig kylinConfig;
- private StreamManager streamManager;
- private IIManager iiManager;
-
- public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
- return new StreamingBootstrap(kylinConfig);
- }
-
- private StreamingBootstrap(KylinConfig kylinConfig) {
- this.kylinConfig = kylinConfig;
- this.streamManager = StreamManager.getInstance(kylinConfig);
- this.iiManager = IIManager.getInstance(kylinConfig);
- }
-
- private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
- final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
- if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
- return partitionMetadata.leader();
- } else {
- return null;
- }
- }
-
- public void startStreaming(String streamingConf, int partitionId) throws Exception {
- final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
- Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
- final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
- Preconditions.checkNotNull(ii);
-
- final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
- Preconditions.checkState(leadBroker != null, "cannot find lead broker");
- final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
- long streamOffset = ii.getStreamOffsets().get(partitionId);
- if (streamOffset < earliestOffset) {
- streamOffset = earliestOffset;
- }
-
-
- KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
- @Override
- protected void consume(long offset, ByteBuffer payload) throws Exception {
- byte[] bytes = new byte[payload.limit()];
- payload.get(bytes);
- getStreamQueue().put(new Stream(offset, bytes));
- }
- };
- final IIDesc desc = ii.getDescriptor();
- Executors.newSingleThreadExecutor().submit(consumer);
- final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId);
- task.setStreamParser(JsonStreamParser.instance);
- final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
- future.get();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
deleted file mode 100644
index dac8ce0..0000000
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- *
- * contributor license agreements. See the NOTICE file distributed with
- *
- * this work for additional information regarding copyright ownership.
- *
- * The ASF licenses this file to You under the Apache License, Version 2.0
- *
- * (the "License"); you may not use this file except in compliance with
- *
- * the License. You may obtain a copy of the License at
- *
- *
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- *
- * distributed under the License is distributed on an "AS IS" BASIS,
- *
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- * See the License for the specific language governing permissions and
- *
- * limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.kylin.common.KylinConfig;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Created by qianzhou on 3/26/15.
- */
-public class StreamingCLI {
-
- private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
-
- public static void main(String[] args) {
- try {
- if (args.length < 2) {
- printArgsError(args);
- return;
- }
- if (args[0].equals("start")) {
- String kafkaConfName = args[1];
- StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
- } else if (args.equals("stop")) {
-
- } else {
- printArgsError(args);
- }
- } catch (Exception e) {
- }
- }
-
- private static void printArgsError(String[] args) {
- logger.warn("invalid args:" + StringUtils.join(args, " "));
- }
-
-}
[08/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3bf6b377
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3bf6b377
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3bf6b377
Branch: refs/heads/streaming-localdict
Commit: 3bf6b377fe419371a4d53ef79af2c69a7f872713
Parents: d564876 9a1c4cb
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 18:04:03 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 18:04:03 2015 +0800
----------------------------------------------------------------------
.../kylin/common/persistence/ResourceStore.java | 2 +-
.../localmeta/streaming/kafka_test.json | 15 ++
.../apache/kylin/invertedindex/IIInstance.java | 12 ++
.../apache/kylin/invertedindex/IIManager.java | 17 +-
.../invertedindex/index/BatchSliceBuilder.java | 6 +-
.../model/IIJoinedFlatTableDesc.java | 12 +-
.../invertedindex/model/IIKeyValueCodec.java | 91 +++++----
.../model/IIKeyValueCodecWithState.java | 68 +++++++
.../apache/kylin/invertedindex/model/IIRow.java | 13 ++
.../org/apache/kylin/job/JoinedFlatTable.java | 1 -
.../kylin/job/hadoop/AbstractHadoopJob.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/CubeHFileMapper.java | 2 +-
.../kylin/job/hadoop/cube/CuboidReducer.java | 2 +-
.../job/hadoop/cube/FactDistinctColumnsJob.java | 2 +-
.../hadoop/cube/FactDistinctColumnsMapper.java | 200 -------------------
.../cube/FactDistinctColumnsMapperBase.java | 81 ++++++++
.../hadoop/cube/FactDistinctColumnsReducer.java | 2 +-
.../cube/FactDistinctHiveColumnsMapper.java | 129 ++++++++++++
.../cube/FactDistinctIIColumnsMapper.java | 129 ++++++++++++
.../job/hadoop/cube/MergeCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/NDCuboidMapper.java | 2 +-
.../job/hadoop/cube/NewBaseCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidReducer.java | 2 +-
.../invertedindex/InvertedIndexMapper.java | 2 +-
.../invertedindex/InvertedIndexPartitioner.java | 2 +-
.../invertedindex/InvertedIndexReducer.java | 2 +-
.../metadata/model/IJoinedFlatTableDesc.java | 2 -
.../metadata/model/IntermediateColumnDesc.java | 4 +
.../endpoint/HbaseServerKVIterator.java | 9 +-
streaming/pom.xml | 8 +
.../kylin/streaming/JsonStreamParser.java | 73 +++++++
.../org/apache/kylin/streaming/KafkaConfig.java | 19 +-
.../apache/kylin/streaming/KafkaConsumer.java | 17 +-
.../apache/kylin/streaming/StreamBuilder.java | 9 +
.../apache/kylin/streaming/StreamManager.java | 18 +-
.../apache/kylin/streaming/StreamParser.java | 47 +++++
.../kylin/streaming/StreamingBootstrap.java | 102 ++++++++++
.../apache/kylin/streaming/StreamingCLI.java | 70 +++++++
.../kylin/streaming/StringStreamParser.java | 55 +++++
.../kylin/streaming/cube/CubeStreamBuilder.java | 2 +-
.../invertedindex/IIStreamBuilder.java | 2 +-
.../kylin/streaming/EternalStreamProducer.java | 3 +-
.../apache/kylin/streaming/KafkaBaseTest.java | 22 --
.../apache/kylin/streaming/KafkaConfigTest.java | 50 -----
.../kylin/streaming/KafkaConsumerTest.java | 8 +-
.../kylin/streaming/KafkaRequesterTest.java | 9 +
.../kylin/streaming/Nous/NousMessageTest.java | 4 +-
.../kylin/streaming/OneOffStreamProducer.java | 2 +-
.../kylin/streaming/StreamManagerTest.java | 69 +++++++
.../invertedindex/PrintOutStreamBuilder.java | 67 +++++++
.../kafka_streaming_test/kafka.properties | 10 -
53 files changed, 1068 insertions(+), 417 deletions(-)
----------------------------------------------------------------------
[13/50] incubator-kylin git commit: Collect cuboid statistics during
fetching distinct columns;
Posted by li...@apache.org.
Collect cuboid statistics during fetching distinct columns;
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d4a271df
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d4a271df
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d4a271df
Branch: refs/heads/streaming-localdict
Commit: d4a271df9d9b055e44d1a6fc1e3cc3055e14c2bd
Parents: 7360f5b
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:15:57 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:15:57 2015 +0800
----------------------------------------------------------------------
.../cube/FactDistinctColumnsMapperBase.java | 5 +-
.../hadoop/cube/FactDistinctColumnsReducer.java | 53 ++++++++++++++------
.../cube/FactDistinctHiveColumnsMapper.java | 51 +++++++++++++------
3 files changed, 77 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index c0455ff..9945769 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -5,6 +5,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hive.hcatalog.data.HCatRecord;
@@ -23,7 +24,7 @@ import org.apache.kylin.metadata.model.TblColRef;
/**
* Created by Hongbin Ma(Binmahone) on 3/26/15.
*/
-public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, ShortWritable, Text> {
+public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, LongWritable, Text> {
protected String cubeName;
protected CubeInstance cube;
@@ -32,7 +33,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
protected List<TblColRef> columns;
protected ArrayList<Integer> factDictCols;
- protected ShortWritable outputKey = new ShortWritable();
+ protected LongWritable outputKey = new LongWritable();
protected Text outputValue = new Text();
protected int errorRecordCounter =0;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index 2052d08..e1529d3 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -19,13 +19,14 @@
package org.apache.kylin.job.hadoop.cube;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.kylin.common.KylinConfig;
@@ -44,17 +45,20 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
/**
* @author yangli9
*/
-public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text, NullWritable, Text> {
+public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text, NullWritable, Text> {
private List<TblColRef> columnList = new ArrayList<TblColRef>();
private boolean collectStatistics = false;
private String statisticsOutput = null;
private List<Long> rowKeyCountInMappers;
- private HyperLogLogPlusCounter totalHll;
+ private Map<Long, Long> rowKeyCountInCuboids;
+ protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+ protected long baseCuboidId;
@Override
protected void setup(Context context) throws IOException {
@@ -66,23 +70,24 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
CubeDesc cubeDesc = cube.getDescriptor();
- long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+ baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
columnList = baseCuboid.getColumns();
collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
statisticsOutput = conf.get(BatchConstants.CFG_STATISTICS_OUTPUT);
if (collectStatistics) {
- totalHll = new HyperLogLogPlusCounter(16);
rowKeyCountInMappers = Lists.newArrayList();
+ rowKeyCountInCuboids = Maps.newHashMap();
+ cuboidHLLMap = Maps.newHashMap();
}
}
@Override
- public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+ public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
if (key.get() >= 0) {
- TblColRef col = columnList.get(key.get());
+ TblColRef col = columnList.get((int) key.get());
HashSet<ByteArray> set = new HashSet<ByteArray>();
for (Text textValue : values) {
@@ -105,26 +110,38 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
}
} else {
// for hll
+ long cuboidId = 0 - key.get();
+
for (Text value : values) {
HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(16);
ByteArray byteArray = new ByteArray(value.getBytes());
hll.readRegisters(byteArray.asBuffer());
- rowKeyCountInMappers.add(hll.getCountEstimate());
- // merge the hll with total hll
- totalHll.merge(hll);
+ if (cuboidId > baseCuboidId) {
+ // if this is the summary info from a mapper, record the number before merge
+ rowKeyCountInMappers.add(hll.getCountEstimate());
+ }
+
+ if (cuboidHLLMap.get(cuboidId) != null) {
+ hll.merge(cuboidHLLMap.get(cuboidId));
+ }
+ cuboidHLLMap.put(cuboidId, hll);
}
}
}
protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
+
+ for (Long cuboidId : cuboidHLLMap.keySet()) {
+ rowKeyCountInCuboids.put(cuboidId, cuboidHLLMap.get(cuboidId).getCountEstimate());
+ }
+
//output the hll info;
if (collectStatistics) {
Configuration conf = context.getConfiguration();
FileSystem fs = FileSystem.get(conf);
- String outputPath = conf.get(BatchConstants.CFG_STATISTICS_OUTPUT);
- FSDataOutputStream out = fs.create(new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBE_ESTIMATION));
+ FSDataOutputStream out = fs.create(new Path(statisticsOutput, BatchConstants.CFG_STATISTICS_CUBE_ESTIMATION));
try {
long totalSum = 0;
@@ -141,13 +158,21 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
out.write('\n');
- msg = "The merged cube segment has " + totalHll.getCountEstimate() + " rows.";
+ long grantTotal = rowKeyCountInCuboids.get(baseCuboidId + 1);
+ msg = "The merged cube has " + grantTotal + " rows.";
out.write(msg.getBytes());
out.write('\n');
- msg = "The compaction rate is " + (totalHll.getCountEstimate()) + "/" + totalSum + " = " + (totalHll.getCountEstimate() * 100.0) / totalSum + "%.";
+ msg = "The compaction rate is " + (grantTotal) + "/" + totalSum + " = " + (grantTotal * 100.0) / totalSum + "%.";
out.write(msg.getBytes());
out.write('\n');
+ out.write('\n');
+
+ for (long i = 0; i < baseCuboidId; i++) {
+ msg = "Cuboid " + i + " has " + rowKeyCountInCuboids.get(i) + " rows.";
+ out.write(msg.getBytes());
+ out.write('\n');
+ }
} finally {
out.close();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
index 64ae353..9e9c096 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -18,11 +18,8 @@
package org.apache.kylin.job.hadoop.cube;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hive.hcatalog.data.HCatRecord;
@@ -30,12 +27,17 @@ import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
import org.apache.kylin.dict.lookup.HiveTableReader;
import org.apache.kylin.job.constant.BatchConstants;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
/**
* @author yangli9
@@ -48,7 +50,8 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
protected boolean collectStatistics = false;
protected CuboidScheduler cuboidScheduler = null;
protected List<String> rowKeyValues = null;
- protected HyperLogLogPlusCounter hll;
+ protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+ protected HyperLogLogPlusCounter totalHll = null;
protected int nRowKey;
@Override
@@ -58,11 +61,11 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
schema = HCatInputFormat.getTableSchema(context.getConfiguration());
intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
-
collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
if (collectStatistics) {
cuboidScheduler = new CuboidScheduler(cubeDesc);
- hll = new HyperLogLogPlusCounter(16);
+ cuboidHLLMap = Maps.newHashMap();
+ totalHll = new HyperLogLogPlusCounter(16);
rowKeyValues = Lists.newArrayList();
nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
}
@@ -74,7 +77,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
HCatFieldSchema fieldSchema;
for (int i : factDictCols) {
- outputKey.set((short) i);
+ outputKey.set((long) i);
fieldSchema = schema.get(flatTableIndexes[i]);
Object fieldValue = record.get(fieldSchema.getName(), schema);
if (fieldValue == null)
@@ -103,8 +106,13 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
mask = mask >> 1;
}
- String key = StringUtils.join(rowKeyValues, ",");
- hll.add(key);
+ HyperLogLogPlusCounter hll = cuboidHLLMap.get(cuboidId);
+ if (hll == null) {
+ hll = new HyperLogLogPlusCounter(16);
+ cuboidHLLMap.put(cuboidId, hll);
+ }
+
+ hll.add(StringUtils.join(rowKeyValues, ","));
Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
for (Long childId : children) {
@@ -116,11 +124,22 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
if (collectStatistics) {
- // output hll to reducer, key is -1
- // keyBuf = Bytes.toBytes(-1);
- outputKey.set((short) -1);
+
+ // output each cuboid's hll to reducer, key is 0 - cuboidId
+ for (Long cuboidId : cuboidHLLMap.keySet()) {
+ HyperLogLogPlusCounter hll = cuboidHLLMap.get(cuboidId);
+ totalHll.merge(hll); // merge each cuboid's counter to the total hll
+ outputKey.set(0 - cuboidId);
+ ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
+ hll.writeRegisters(hllBuf);
+ outputValue.set(hllBuf.array());
+ context.write(outputKey, outputValue);
+ }
+
+ //output the total hll for this mapper;
+ outputKey.set(0 - baseCuboidId - 1);
ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
- hll.writeRegisters(hllBuf);
+ totalHll.writeRegisters(hllBuf);
outputValue.set(hllBuf.array());
context.write(outputKey, outputValue);
}
[21/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b6b3388c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b6b3388c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b6b3388c
Branch: refs/heads/streaming-localdict
Commit: b6b3388ce2239fe36f60f8aad2349081813b10f7
Parents: 7088724
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 11:57:27 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 11:57:27 2015 +0800
----------------------------------------------------------------------
.../kylin/job/streaming/StreamingBootstrap.java | 35 +++++++++++++++-----
.../kylin/job/streaming/StreamingCLI.java | 2 +-
.../apache/kylin/job/BuildIIWithStreamTest.java | 26 +++++----------
.../apache/kylin/streaming/KafkaConsumer.java | 8 ++++-
.../java/org/apache/kylin/streaming/Stream.java | 2 ++
5 files changed, 45 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index ddaae29..65b23c4 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -35,6 +35,7 @@
package org.apache.kylin.job.streaming;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
import kafka.api.OffsetRequest;
import kafka.cluster.Broker;
import kafka.javaapi.PartitionMetadata;
@@ -50,8 +51,8 @@ import org.apache.kylin.streaming.*;
import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
import java.nio.ByteBuffer;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
+import java.util.Map;
+import java.util.concurrent.*;
/**
* Created by qianzhou on 3/26/15.
@@ -62,6 +63,8 @@ public class StreamingBootstrap {
private StreamManager streamManager;
private IIManager iiManager;
+ private Map<String, KafkaConsumer> kafkaConsumers = Maps.newConcurrentMap();
+
public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
return new StreamingBootstrap(kylinConfig);
}
@@ -81,9 +84,17 @@ public class StreamingBootstrap {
}
}
- public void startStreaming(String streamingConf, int partitionId) throws Exception {
- final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
- Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+ public void stop(String streaming, int partitionId) throws Exception {
+ final KafkaConsumer consumer = kafkaConsumers.remove(getKey(streaming, partitionId));
+ if (consumer != null) {
+ consumer.stop();
+ consumer.getStreamQueue().put(Stream.EOF);
+ }
+ }
+
+ public void start(String streaming, int partitionId) throws Exception {
+ final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streaming);
+ Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streaming);
final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
Preconditions.checkNotNull(ii);
Preconditions.checkArgument(ii.getSegments().size() > 0);
@@ -96,7 +107,8 @@ public class StreamingBootstrap {
if (streamOffset < earliestOffset) {
streamOffset = earliestOffset;
}
-
+ String[] args = new String[]{"-iiname", kafkaConfig.getIiName(), "-htablename", iiSegment.getStorageLocationIdentifier()};
+ ToolRunner.run(new IICreateHTableJob(), args);
KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
@Override
@@ -107,11 +119,16 @@ public class StreamingBootstrap {
}
};
final IIDesc desc = ii.getDescriptor();
+ kafkaConsumers.put(getKey(streaming, partitionId), consumer);
- Executors.newSingleThreadExecutor().submit(consumer);
final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
task.setStreamParser(JsonStreamParser.instance);
- final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
- future.get();
+
+ Executors.newSingleThreadExecutor().submit(consumer);
+ Executors.newSingleThreadExecutor().submit(task);
+ }
+
+ private String getKey(String streaming, int partitionId) {
+ return streaming + "_" + partitionId;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 8813cb3..4977339 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -54,7 +54,7 @@ public class StreamingCLI {
}
if (args[0].equals("start")) {
String kafkaConfName = args[1];
- StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
+ StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).start(kafkaConfName, 0);
} else if (args.equals("stop")) {
} else {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
index 04a53f7..dae2d03 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
@@ -34,7 +34,6 @@
package org.apache.kylin.job;
-import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
@@ -59,14 +58,19 @@ import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.metadata.realization.RealizationStatusEnum;
import org.apache.kylin.streaming.Stream;
import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-import org.junit.*;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.List;
+import java.util.TimeZone;
+import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -111,7 +115,7 @@ public class BuildIIWithStreamTest {
}
@AfterClass
- public static void after() throws Exception {
+ public static void afterClass() throws Exception {
backup();
}
@@ -213,22 +217,10 @@ public class BuildIIWithStreamTest {
ExecutorService executorService = Executors.newSingleThreadExecutor();
final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), desc, 0);
int count = 0;
- List<String[]> rawData = Lists.newArrayList();
while (reader.next()) {
- desc.getTimestampColumn();
- rawData.add(reader.getRow());
+ queue.put(parse(reader.getRow()));
count++;
}
- final int timestampColumn = desc.getTimestampColumn();
- Collections.sort(rawData, new Comparator<String[]>() {
- @Override
- public int compare(String[] o1, String[] o2) {
- return o1[timestampColumn].compareTo(o2[timestampColumn]);
- }
- });
- for (String[] row : rawData) {
- queue.put(parse(row));
- }
logger.info("total record count:" + count + " htable:" + segment.getStorageLocationIdentifier());
queue.put(new Stream(-1, null));
final Future<?> future = executorService.submit(streamBuilder);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 18c8403..b083dea 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -64,6 +64,8 @@ public abstract class KafkaConsumer implements Runnable {
private Logger logger;
+ private volatile boolean stop = false;
+
public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
this.topic = topic;
this.partitionId = partitionId;
@@ -92,7 +94,7 @@ public abstract class KafkaConsumer implements Runnable {
public void run() {
try {
Broker leadBroker = getLeadBroker();
- while (true) {
+ while (!stop) {
if (leadBroker == null) {
leadBroker = getLeadBroker();
}
@@ -123,4 +125,8 @@ public abstract class KafkaConsumer implements Runnable {
protected abstract void consume(long offset, ByteBuffer payload) throws Exception;
+ public void stop() {
+ this.stop = true;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/Stream.java b/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
index 2c6a86c..d337c4c 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
@@ -42,6 +42,8 @@ public class Stream {
private long offset;
private byte[] rawData;
+ public static final Stream EOF = new Stream(-1, new byte[0]);
+
public Stream(long offset, byte[] rawData) {
this.offset = offset;
this.rawData = rawData;
[15/50] incubator-kylin git commit: Use LongWritable as key type in
fact distinct job.
Posted by li...@apache.org.
Use LongWritable as key type in fact distinct job.
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8d40a578
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8d40a578
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8d40a578
Branch: refs/heads/streaming-localdict
Commit: 8d40a578170da66c61503d7b42fe70c3a930dadd
Parents: 7658a50
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:32:56 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:32:56 2015 +0800
----------------------------------------------------------------------
.../job/hadoop/cube/FactDistinctColumnsCombiner.java | 6 +++---
.../kylin/job/hadoop/cube/FactDistinctColumnsJob.java | 4 ++--
.../job/hadoop/cube/FactDistinctColumnsMapperBase.java | 11 +++++------
3 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
index c0cdd46..59ccd5a 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
@@ -21,9 +21,9 @@ package org.apache.kylin.job.hadoop.cube;
import java.io.IOException;
import java.util.HashSet;
+import org.apache.hadoop.io.LongWritable;
import org.apache.kylin.common.mr.KylinReducer;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.kylin.common.util.ByteArray;
@@ -31,7 +31,7 @@ import org.apache.kylin.common.util.ByteArray;
/**
* @author yangli9
*/
-public class FactDistinctColumnsCombiner extends KylinReducer<ShortWritable, Text, ShortWritable, Text> {
+public class FactDistinctColumnsCombiner extends KylinReducer<LongWritable, Text, LongWritable, Text> {
private Text outputValue = new Text();
@@ -41,7 +41,7 @@ public class FactDistinctColumnsCombiner extends KylinReducer<ShortWritable, Tex
}
@Override
- public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+ public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
if(key.get() >= 0) {
HashSet<ByteArray> set = new HashSet<ByteArray>();
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
index 17c5e9b..5903c7b 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
@@ -22,8 +22,8 @@ import java.io.IOException;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -103,7 +103,7 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
job.setInputFormatClass(HCatInputFormat.class);
job.setMapperClass(FactDistinctHiveColumnsMapper.class);
job.setCombinerClass(FactDistinctColumnsCombiner.class);
- job.setMapOutputKeyClass(ShortWritable.class);
+ job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index 9945769..2f046ab 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -1,12 +1,7 @@
package org.apache.kylin.job.hadoop.cube;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.kylin.common.KylinConfig;
@@ -21,6 +16,10 @@ import org.apache.kylin.job.constant.BatchConstants;
import org.apache.kylin.job.hadoop.AbstractHadoopJob;
import org.apache.kylin.metadata.model.TblColRef;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Created by Hongbin Ma(Binmahone) on 3/26/15.
*/
@@ -35,7 +34,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
protected LongWritable outputKey = new LongWritable();
protected Text outputValue = new Text();
- protected int errorRecordCounter =0;
+ protected int errorRecordCounter = 0;
@Override
protected void setup(Context context) throws IOException {
[33/50] incubator-kylin git commit: KYLIN-653 adding streaming build
test cases
Posted by li...@apache.org.
KYLIN-653 adding streaming build test cases
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/4df05317
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/4df05317
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/4df05317
Branch: refs/heads/streaming-localdict
Commit: 4df05317e1a754d1b1e422fdf5df580b2fa3366d
Parents: bbbcae8
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 09:49:52 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800
----------------------------------------------------------------------
.../kylin/invertedindex/index/TableRecord.java | 5 +-
.../invertedindex/index/TableRecordInfo.java | 10 +--
.../kylin/invertedindex/model/IIDesc.java | 1 +
.../model/IIKeyValueCodecWithState.java | 6 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 20 +++--
.../cube/FactDistinctIIColumnsMapper.java | 9 +-
.../kylin/job/BuildCubeWithEngineTest.java | 1 -
.../invertedindex/IIStreamBuilder.java | 33 ++++---
.../IIKeyValueCodecWithStateTest.java | 91 ++++++++++++++++++++
.../invertedindex/ToyIIStreamBuilder.java | 35 ++++++++
10 files changed, 177 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
index ce1b7e0..78cea3d 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
@@ -18,15 +18,12 @@
package org.apache.kylin.invertedindex.index;
-import com.google.common.collect.Lists;
-import org.apache.kylin.dict.DateStrDictionary;
import org.apache.commons.lang.ObjectUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.dict.DateStrDictionary;
import org.apache.kylin.dict.Dictionary;
-import java.util.List;
-
/**
* @author yangli9, honma
* <p/>
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
index 3136ebb..9a08e64 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
@@ -18,19 +18,17 @@
package org.apache.kylin.invertedindex.index;
-import com.google.common.collect.Maps;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.invertedindex.IISegment;
import org.apache.kylin.invertedindex.model.IIDesc;
import org.apache.kylin.metadata.measure.fixedlen.FixedLenMeasureCodec;
-import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.DataType;
import org.apache.kylin.metadata.model.TblColRef;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
/**
* @author yangli9
* <p/>
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
index cda3c4d..17edb86 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
@@ -319,6 +319,7 @@ public class IIDesc extends RootPersistentEntity {
return sliceSize;
}
+
public String getSignature() {
return signature;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index a8e149a..e838283 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -26,6 +26,7 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
protected static class IIRowDecoderWithState extends IIRowDecoder {
final ArrayList<IIRow> buffer = Lists.newArrayList();
+ private Iterator<Slice> superIterator = null;
private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
super(digest, iiRowIterator);
@@ -33,7 +34,10 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
}
private Iterator<Slice> getSuperIterator() {
- return super.iterator();
+ if (superIterator == null) {
+ superIterator = super.iterator();
+ }
+ return superIterator;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
index 41b21a7..a023c0c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
@@ -210,20 +210,24 @@ public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text
try {
bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
- intermediateTableDesc.sanityCheck(bytesSplitter);
+ outputKV(context);
- byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
- outputKey.set(rowKey, 0, rowKey.length);
-
- buildValue(bytesSplitter.getSplitBuffers());
- outputValue.set(valueBuf.array(), 0, valueBuf.position());
-
- context.write(outputKey, outputValue);
} catch (Exception ex) {
handleErrorRecord(bytesSplitter, ex);
}
}
+ private void outputKV(Context context) throws IOException, InterruptedException {
+ intermediateTableDesc.sanityCheck(bytesSplitter);
+
+ byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
+ outputKey.set(rowKey, 0, rowKey.length);
+
+ buildValue(bytesSplitter.getSplitBuffers());
+ outputValue.set(valueBuf.array(), 0, valueBuf.position());
+ context.write(outputKey, outputValue);
+ }
+
private void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
System.err.println("Insane record: " + bytesSplitter);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 75e127e..705e272 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -20,6 +20,7 @@ package org.apache.kylin.job.hadoop.cube;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -50,7 +51,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
private IIJoinedFlatTableDesc intermediateTableDesc;
private ArrayList<IIRow> buffer = Lists.newArrayList();
- private Iterable<Slice> slices;
+ private Iterator<Slice> slices;
private String iiName;
private IIInstance ii;
@@ -72,7 +73,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
TableRecordInfo info = new TableRecordInfo(iiDesc);
KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
- slices = codec.decodeKeyValue(buffer);
+ slices = codec.decodeKeyValue(buffer).iterator();
baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
for (int i = 0; i < factDictCols.size(); ++i) {
@@ -98,9 +99,9 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
}
buffer.add(iiRow);
- if (slices.iterator().hasNext()) {
+ if (slices.hasNext()) {
byte[] vBytesBuffer = null;
- Slice slice = slices.iterator().next();
+ Slice slice = slices.next();
for (RawTableRecord record : slice) {
for (int i = 0; i < factDictCols.size(); ++i) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
index ce70f2c..a33dab5 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
@@ -107,7 +107,6 @@ public class BuildCubeWithEngineTest {
jobService.deleteJob(jobId);
}
}
-
}
@After
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index f9adefe..0cf3c77 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -82,7 +82,11 @@ public class IIStreamBuilder extends StreamBuilder {
super(queue, desc.getSliceSize());
this.desc = desc;
try {
- this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
+ if (hTableName != null) {
+ this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
+ } else {
+ this.hTable = null;
+ }
} catch (IOException e) {
logger.error("cannot open htable name:" + hTableName, e);
throw new RuntimeException("cannot open htable name:" + hTableName, e);
@@ -105,12 +109,18 @@ public class IIStreamBuilder extends StreamBuilder {
TableRecordInfo tableRecordInfo = new TableRecordInfo(desc, dictionaryMap);
final Slice slice = buildSlice(table, sliceBuilder, tableRecordInfo, dictionaryMap);
logger.info("slice info, shard:" + slice.getShard() + " timestamp:" + slice.getTimestamp() + " record count:" + slice.getRecordCount());
- loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
+
+ outputSlice(slice, tableRecordInfo);
submitOffset();
+
stopwatch.stop();
logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + TimeUnit.MILLISECONDS);
}
+ protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+ loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
+ }
+
private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
final List<TblColRef> allColumns = desc.listAllColumns();
@@ -122,15 +132,19 @@ public class IIStreamBuilder extends StreamBuilder {
}
}
}
+
Map<Integer, Dictionary<?>> result = Maps.newHashMap();
for (TblColRef tblColRef : valueMap.keySet()) {
- result.put(desc.findColumn(tblColRef), DictionaryGenerator.buildDictionaryFromValueList(tblColRef.getType(), Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
- @Nullable
- @Override
- public byte[] apply(String input) {
- return input.getBytes();
- }
- })));
+ result.put(desc.findColumn(tblColRef), //
+ DictionaryGenerator.buildDictionaryFromValueList(//
+ tblColRef.getType(), //
+ Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
+ @Nullable
+ @Override
+ public byte[] apply(String input) {
+ return input.getBytes();
+ }
+ })));
}
return result;
}
@@ -178,7 +192,6 @@ public class IIStreamBuilder extends StreamBuilder {
}
}
-
private void submitOffset() {
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
new file mode 100644
index 0000000..25e250c
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -0,0 +1,91 @@
+package org.apache.kylin.streaming.invertedindex;
+
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.annotation.Nullable;
+
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.streaming.Stream;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
+
+ IIInstance ii;
+ IIDesc iiDesc;
+ List<IIRow> iiRowList = Lists.newArrayList();
+
+ final String[] inputs = new String[] { //
+ "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+ "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,2012-08-16,43479,10000807,26.2474,0", //
+ "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+
+ @Before
+ public void setUp() throws Exception {
+ this.createTestMetadata();
+ this.ii = IIManager.getInstance(getTestConfig()).getII("test_kylin_ii_inner_join");
+ this.iiDesc = ii.getDescriptor();
+
+ Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+ @Nullable
+ @Override
+ public Stream apply(String input) {
+ return new Stream(0, input.getBytes());
+ }
+ });
+ LinkedBlockingQueue q = new LinkedBlockingQueue();
+ q.addAll(streams);
+ q.put(new Stream(-1, null));//a stop sign for builder
+
+ ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRowList);
+ ExecutorService executorService = Executors.newSingleThreadExecutor();
+ Future<?> future = executorService.submit(builder);
+ future.get();
+ }
+
+ @Test
+ public void basicTest() {
+ ArrayList<IIRow> buffer = Lists.newArrayList();
+ TableRecordInfo info = new TableRecordInfo(iiDesc);
+ TableRecordInfoDigest digest = info.getDigest();
+ int columnCount = digest.getColumnCount();
+ KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
+ Iterator<Slice> slices = codec.decodeKeyValue(buffer).iterator();
+
+ Assert.assertTrue(!slices.hasNext());
+ Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
+
+ for (int i = 0; i < digest.getColumnCount(); ++i) {
+ buffer.add(iiRowList.get(i));
+
+ if (i != digest.getColumnCount() - 1) {
+ Assert.assertTrue(!slices.hasNext());
+ } else {
+ Assert.assertTrue(slices.hasNext());
+ }
+ }
+
+ Slice newSlice = slices.next();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
new file mode 100644
index 0000000..161b6f6
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
@@ -0,0 +1,35 @@
+package org.apache.kylin.streaming.invertedindex;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.streaming.Stream;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ *
+ * A IIStreamBuilder that can hold all the built slices in form of IIRow
+ * This is only for test use
+ */
+public class ToyIIStreamBuilder extends IIStreamBuilder {
+ private List<IIRow> result;
+
+ public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
+ super(queue, null, desc, partitionId);
+ this.result = result;
+ }
+
+ protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+ IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
+ for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+ result.add(iiRow);
+ }
+ }
+
+}
[44/50] incubator-kylin git commit: KYLIN-653 minor change
Posted by li...@apache.org.
KYLIN-653 minor change
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/cff578a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/cff578a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/cff578a7
Branch: refs/heads/streaming-localdict
Commit: cff578a7df8e75b07caf4f21803f0426aa94485e
Parents: a36d416
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 18:05:23 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 18:06:19 2015 +0800
----------------------------------------------------------------------
.../main/java/org/apache/kylin/job/cube/CubingJobBuilder.java | 2 --
.../java/org/apache/kylin/job/BuildCubeWithEngineTest.java | 7 ++-----
.../apache/kylin/job/hadoop/invertedindex/II2CubeTest.java | 2 ++
3 files changed, 4 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java b/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
index 278f4dd..7cde298 100644
--- a/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
+++ b/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
@@ -170,9 +170,7 @@ public final class CubingJobBuilder extends AbstractJobBuilder {
final AbstractExecutable intermediateHiveTableStep = createIntermediateHiveTableStep(intermediateTableDesc, jobId);
result.addTask(intermediateHiveTableStep);
-
result.addTask(createFactDistinctColumnsStep(seg, intermediateHiveTableName, jobId));
-
result.addTask(createBuildDictionaryStep(seg, factDistinctColumnsPath));
MapReduceExecutable baseCuboidStep = null;
if(!useImMemCubing) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
index a33dab5..dc2f74f 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
@@ -51,16 +51,13 @@ import static org.junit.Assert.assertEquals;
public class BuildCubeWithEngineTest {
- private JobEngineConfig jobEngineConfig;
+ private static final Log logger = LogFactory.getLog(BuildCubeWithEngineTest.class);
+ private JobEngineConfig jobEngineConfig;
private CubeManager cubeManager;
-
private DefaultScheduler scheduler;
-
protected ExecutableManager jobService;
- private static final Log logger = LogFactory.getLog(BuildCubeWithEngineTest.class);
-
protected void waitForJob(String jobId) {
while (true) {
AbstractExecutable job = jobService.getJob(jobId);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index da1cb18..080da1b 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -22,6 +22,7 @@ import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
import org.apache.kylin.invertedindex.model.*;
import org.apache.kylin.job.constant.BatchConstants;
import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
+import org.apache.kylin.job.hadoop.cube.IIToBaseCuboidMapper;
import org.apache.kylin.streaming.Stream;
import org.apache.kylin.streaming.StringStreamParser;
import org.apache.kylin.streaming.invertedindex.SliceBuilder;
@@ -133,4 +134,5 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
}
}
+
}
[41/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/ea96dc54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/ea96dc54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/ea96dc54
Branch: refs/heads/streaming-localdict
Commit: ea96dc5453553d40a31e6fce7f5d7f489514f883
Parents: 8e6afbf f3a592b
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:51:20 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:51:20 2015 +0800
----------------------------------------------------------------------
.../hadoop/cube/FactDistinctHiveColumnsMapper.java | 16 ++++++++--------
.../kylin/job/hadoop/invertedindex/II2CubeTest.java | 8 ++++----
2 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
[22/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3d3cee84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3d3cee84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3d3cee84
Branch: refs/heads/streaming-localdict
Commit: 3d3cee8475a335f984283d584962dd25f7f00754
Parents: b6b3388 7f73abe
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 11:58:26 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 11:58:26 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/ByteArray.java | 23 ++-
.../java/org/apache/kylin/dict/Dictionary.java | 31 ++--
.../org/apache/kylin/dict/TrieDictionary.java | 48 ++---
.../apache/kylin/dict/NumberDictionaryTest.java | 2 +-
.../hadoop/cubev2/BuildDictionaryMapper.java | 184 +++++++++++++++++++
.../gridtable/GTDictionaryCodeSystem.java | 3 +-
.../kylin/storage/gridtable/GTRecord.java | 5 +-
.../kafka_streaming_test/eagle.properties | 10 +
8 files changed, 247 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
[12/50] incubator-kylin git commit: Refine GTDictionaryCodeSystem.java
Posted by li...@apache.org.
Refine GTDictionaryCodeSystem.java
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7360f5bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7360f5bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7360f5bd
Branch: refs/heads/streaming-localdict
Commit: 7360f5bd61f28a38e16d61f0b1388024ebe51fdd
Parents: 3bf6b37
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 21:47:51 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 21:47:51 2015 +0800
----------------------------------------------------------------------
.../gridtable/GTDictionaryCodeSystem.java | 68 +++++++++++---------
1 file changed, 38 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7360f5bd/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index cff108a..45b5d5f 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -1,6 +1,5 @@
package org.apache.kylin.storage.gridtable;
-import com.google.common.collect.Maps;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.dict.Dictionary;
@@ -9,7 +8,6 @@ import org.apache.kylin.metadata.measure.MeasureAggregator;
import org.apache.kylin.metadata.serializer.DataTypeSerializer;
import java.nio.ByteBuffer;
-import java.util.BitSet;
import java.util.Map;
/**
@@ -17,10 +15,9 @@ import java.util.Map;
*/
public class GTDictionaryCodeSystem implements IGTCodeSystem {
private GTInfo info;
- private BitSet encodedColumns = null;
private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
- private Map<Integer, DataTypeSerializer> serializerMap = null; // column index; value: serializer for this column;
private IFilterCodeSystem<ByteArray> filterCS;
+ private DataTypeSerializer[] serializers;
public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
this.dictionaryMaps = dictionaryMaps;
@@ -29,15 +26,13 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
@Override
public void init(GTInfo info) {
this.info = info;
- encodedColumns = new BitSet();
- for (Integer index : dictionaryMaps.keySet()) {
- encodedColumns.set(index);
- }
- serializerMap = Maps.newHashMap();
+ serializers = new DataTypeSerializer[info.nColumns];
for (int i = 0; i < info.nColumns; i++) {
- if (!encodedColumns.get(i)) {
- serializerMap.put(i, DataTypeSerializer.create(info.colTypes[i]));
+ if (dictionaryMaps.get(i) != null) {
+ serializers[i] = new DictionarySerializer(dictionaryMaps.get(i));
+ } else {
+ serializers[i] = DataTypeSerializer.create(info.colTypes[i]);
}
}
@@ -77,35 +72,22 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
@Override
public int codeLength(int col, ByteBuffer buf) {
- if (useDictionary(col))
- return dictionaryMaps.get(col).getSizeOfId();
- else
- return serializerMap.get(col).peekLength(buf);
+ return serializers[col].peekLength(buf);
}
@Override
public void encodeColumnValue(int col, Object value, ByteBuffer buf) {
- if (useDictionary(col)) {
- int id = dictionaryMaps.get(col).getIdFromValue(value);
- BytesUtil.writeUnsigned(id, dictionaryMaps.get(col).getSizeOfId(), buf);
- } else {
- serializerMap.get(col).serialize(value, buf);
- }
+ serializers[col].serialize(value, buf);
}
@Override
public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
-
+ throw new UnsupportedOperationException();
}
@Override
public Object decodeColumnValue(int col, ByteBuffer buf) {
- if (useDictionary(col)) {
- int id = BytesUtil.readUnsigned(buf, dictionaryMaps.get(col).getSizeOfId());
- return dictionaryMaps.get(col).getValueFromId(id);
- } else {
- return serializerMap.get(col).deserialize(buf);
- }
+ return serializers[col].deserialize(buf);
}
@Override
@@ -113,7 +95,33 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
return MeasureAggregator.create(aggrFunction, info.colTypes[col].toString());
}
- private boolean useDictionary(int col) {
- return encodedColumns.get(col);
+ class DictionarySerializer extends DataTypeSerializer {
+ private Dictionary dictionary;
+
+ DictionarySerializer(Dictionary dictionary) {
+ this.dictionary = dictionary;
+ }
+
+ @Override
+ public void serialize(Object value, ByteBuffer out) {
+ int id = dictionary.getIdFromValue(value);
+ BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), out);
+ }
+
+ @Override
+ public Object deserialize(ByteBuffer in) {
+ int id = BytesUtil.readUnsigned(in, dictionary.getSizeOfId());
+ return dictionary.getValueFromId(id);
+ }
+
+ @Override
+ public int peekLength(ByteBuffer in) {
+ return dictionary.getSizeOfId();
+ }
+
+ @Override
+ public Object valueOf(byte[] value) {
+ throw new UnsupportedOperationException();
+ }
}
}
[09/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/21b8f0f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/21b8f0f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/21b8f0f6
Branch: refs/heads/streaming-localdict
Commit: 21b8f0f6f43bec5caba0d7c5bbac2f47a5aef27a
Parents: 9a1c4cb
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 18:04:14 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 18:04:14 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/streaming/KafkaConsumer.java | 5 +++--
.../org/apache/kylin/streaming/StreamingBootstrap.java | 12 +++++-------
.../kylin/streaming/invertedindex/IIStreamBuilder.java | 4 ++--
3 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 910041c..18c8403 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -46,6 +46,7 @@ import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
/**
@@ -59,7 +60,7 @@ public abstract class KafkaConsumer implements Runnable {
private KafkaConfig kafkaConfig;
private List<Broker> replicaBrokers;
private long offset;
- private BlockingQueue<Stream> streamQueue;
+ private LinkedBlockingQueue<Stream> streamQueue;
private Logger logger;
@@ -70,7 +71,7 @@ public abstract class KafkaConsumer implements Runnable {
offset = startOffset;
this.replicaBrokers = initialBrokers;
logger = LoggerFactory.getLogger("KafkaConsumer_" + topic + "_" + partitionId);
- streamQueue = new ArrayBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
+ streamQueue = new LinkedBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
}
public BlockingQueue<Stream> getStreamQueue() {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
index 4528a72..4b7c6b7 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -43,9 +43,11 @@ import org.apache.kylin.invertedindex.IIDescManager;
import org.apache.kylin.invertedindex.IIInstance;
import org.apache.kylin.invertedindex.IIManager;
import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
import java.nio.ByteBuffer;
import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
/**
* Created by qianzhou on 3/26/15.
@@ -91,12 +93,8 @@ public class StreamingBootstrap {
}
};
final IIDesc desc = ii.getDescriptor();
- Executors.newSingleThreadExecutor().execute(consumer);
- while (true) {
- final Stream stream = consumer.getStreamQueue().poll();
- if (stream != null) {
- System.out.println("offset:" + stream.getOffset() + " content:" + new String(stream.getRawData()));
- }
- }
+ Executors.newSingleThreadExecutor().submit(consumer);
+ final Future<?> future = Executors.newSingleThreadExecutor().submit(new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId));
+ future.get();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index 9724ba7..f9adefe 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -64,7 +64,7 @@ import javax.annotation.Nullable;
import java.io.IOException;
import java.util.List;
import java.util.Map;
-import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
/**
@@ -78,7 +78,7 @@ public class IIStreamBuilder extends StreamBuilder {
private final HTableInterface hTable;
private final BatchSliceBuilder sliceBuilder;
- public IIStreamBuilder(LinkedBlockingDeque<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
+ public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
super(queue, desc.getSliceSize());
this.desc = desc;
try {
[35/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b979dfae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b979dfae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b979dfae
Branch: refs/heads/streaming-localdict
Commit: b979dfaea999b548e089a0e41d38e164e7b46662
Parents: dee2955
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 15:35:57 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 15:35:57 2015 +0800
----------------------------------------------------------------------
.../kylin/job/streaming/StreamingBootstrap.java | 17 +++++++++++------
.../apache/kylin/job/streaming/StreamingCLI.java | 2 --
.../org/apache/kylin/streaming/KafkaConsumer.java | 10 ++++------
3 files changed, 15 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index 65b23c4..5d1673c 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -66,7 +66,14 @@ public class StreamingBootstrap {
private Map<String, KafkaConsumer> kafkaConsumers = Maps.newConcurrentMap();
public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
- return new StreamingBootstrap(kylinConfig);
+ final StreamingBootstrap bootstrap = new StreamingBootstrap(kylinConfig);
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ bootstrap.stop();
+ }
+ }));
+ return bootstrap;
}
private StreamingBootstrap(KylinConfig kylinConfig) {
@@ -84,11 +91,9 @@ public class StreamingBootstrap {
}
}
- public void stop(String streaming, int partitionId) throws Exception {
- final KafkaConsumer consumer = kafkaConsumers.remove(getKey(streaming, partitionId));
- if (consumer != null) {
+ public void stop() {
+ for (KafkaConsumer consumer : kafkaConsumers.values()) {
consumer.stop();
- consumer.getStreamQueue().put(Stream.EOF);
}
}
@@ -125,7 +130,7 @@ public class StreamingBootstrap {
task.setStreamParser(JsonStreamParser.instance);
Executors.newSingleThreadExecutor().submit(consumer);
- Executors.newSingleThreadExecutor().submit(task);
+ Executors.newSingleThreadExecutor().submit(task).get();
}
private String getKey(String streaming, int partitionId) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 4977339..219ca41 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -55,8 +55,6 @@ public class StreamingCLI {
if (args[0].equals("start")) {
String kafkaConfName = args[1];
StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).start(kafkaConfName, 0);
- } else if (args.equals("stop")) {
-
} else {
printArgsError(args);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index b083dea..868673d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -34,7 +34,6 @@
package org.apache.kylin.streaming;
-import kafka.api.OffsetRequest;
import kafka.cluster.Broker;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.PartitionMetadata;
@@ -44,10 +43,8 @@ import org.slf4j.LoggerFactory;
import java.nio.ByteBuffer;
import java.util.List;
-import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicLong;
/**
* Created by qianzhou on 2/15/15.
@@ -64,7 +61,7 @@ public abstract class KafkaConsumer implements Runnable {
private Logger logger;
- private volatile boolean stop = false;
+ private volatile boolean isRunning = true;
public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
this.topic = topic;
@@ -94,7 +91,7 @@ public abstract class KafkaConsumer implements Runnable {
public void run() {
try {
Broker leadBroker = getLeadBroker();
- while (!stop) {
+ while (isRunning) {
if (leadBroker == null) {
leadBroker = getLeadBroker();
}
@@ -118,6 +115,7 @@ public abstract class KafkaConsumer implements Runnable {
offset++;
}
}
+ getStreamQueue().put(Stream.EOF);
} catch (Exception e) {
logger.error("consumer has encountered an error", e);
}
@@ -126,7 +124,7 @@ public abstract class KafkaConsumer implements Runnable {
protected abstract void consume(long offset, ByteBuffer payload) throws Exception;
public void stop() {
- this.stop = true;
+ this.isRunning = false;
}
}
[04/50] incubator-kylin git commit: refactor
Posted by li...@apache.org.
refactor
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/56d57a2d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/56d57a2d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/56d57a2d
Branch: refs/heads/streaming-localdict
Commit: 56d57a2d5940a19c575125a9aff073235355b4c3
Parents: 9dd1512
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 16:21:41 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 16:21:41 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/streaming/JsonStreamParser.java | 11 +++++++++--
.../java/org/apache/kylin/streaming/KafkaConfig.java | 3 +++
.../java/org/apache/kylin/streaming/StreamParser.java | 3 +--
.../org/apache/kylin/streaming/StringStreamParser.java | 2 +-
.../apache/kylin/streaming/cube/CubeStreamBuilder.java | 2 +-
.../streaming/invertedindex/PrintOutStreamBuilder.java | 6 +++---
6 files changed, 18 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
index cb43dc6..5c8b49d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
@@ -35,6 +35,7 @@
package org.apache.kylin.streaming;
import com.google.common.collect.Lists;
+import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.kylin.metadata.model.TblColRef;
@@ -42,6 +43,7 @@ import org.apache.kylin.metadata.model.TblColRef;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Map;
/**
* Created by qianzhou on 3/25/15.
@@ -55,11 +57,16 @@ public final class JsonStreamParser implements StreamParser {
private JsonStreamParser(){}
@Override
- public List<String> parse(Stream stream, Collection<TblColRef> allColumns) {
+ public List<String> parse(Stream stream, List<TblColRef> allColumns) {
final JsonObject root = jsonParser.parse(new String(stream.getRawData())).getAsJsonObject();
ArrayList<String> result = Lists.newArrayList();
+
for (TblColRef column : allColumns) {
- result.add(root.get(column.getName()).getAsString());
+ for (Map.Entry<String, JsonElement> entry : root.entrySet()) {
+ if (entry.getKey().equalsIgnoreCase(column.getName())) {
+ result.add(entry.getValue().getAsString());
+ }
+ }
}
return result;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
index ee5a96a..b22c7e0 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
@@ -76,6 +76,9 @@ public class KafkaConfig extends RootPersistentEntity {
@JsonProperty("bufferSize")
private int bufferSize;
+ @JsonProperty("iiDesc")
+ private String iiDesc;
+
private int partitionId;
public int getTimeout() {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
index 0c59151..9b41c95 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
@@ -36,7 +36,6 @@ package org.apache.kylin.streaming;
import org.apache.kylin.metadata.model.TblColRef;
-import java.util.Collection;
import java.util.List;
/**
@@ -44,5 +43,5 @@ import java.util.List;
*/
public interface StreamParser {
- List<String> parse(Stream stream, Collection<TblColRef> allColumns);
+ List<String> parse(Stream stream, List<TblColRef> allColumns);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
index 7611869..3c62a3a 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
@@ -49,7 +49,7 @@ public final class StringStreamParser implements StreamParser {
private StringStreamParser(){}
@Override
- public List<String> parse(Stream stream, Collection<TblColRef> allColumns) {
+ public List<String> parse(Stream stream, List<TblColRef> allColumns) {
return Lists.newArrayList(new String(stream.getRawData()).split(","));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 9429033..912c3cd 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -410,7 +410,7 @@ public class CubeStreamBuilder extends StreamBuilder {
}
private List<String> parseStream(Stream stream, CubeDesc desc) {
- return getStreamParser().parse(stream, desc.listAllColumns());
+ return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
index 43aa0a5..e83bdc5 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
@@ -49,9 +49,9 @@ import java.util.concurrent.BlockingQueue;
*/
public class PrintOutStreamBuilder extends StreamBuilder {
- private final Collection<TblColRef> allColumns;
+ private final List<TblColRef> allColumns;
- public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, Collection<TblColRef> allColumns) {
+ public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, List<TblColRef> allColumns) {
super(streamQueue, sliceSize);
setStreamParser(JsonStreamParser.instance);
this.allColumns = allColumns;
@@ -61,7 +61,7 @@ public class PrintOutStreamBuilder extends StreamBuilder {
protected void build(List<Stream> streamsToBuild) throws Exception {
for (Stream stream : streamsToBuild) {
final List<String> row = getStreamParser().parse(stream, allColumns);
- System.out.println(StringUtils.join(row, ","));
+ System.out.println("offset:" + stream.getOffset() + " " + StringUtils.join(row, ","));
}
}
}
[43/50] incubator-kylin git commit: add streaming shell
Posted by li...@apache.org.
add streaming shell
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d72f2e67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d72f2e67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d72f2e67
Branch: refs/heads/streaming-localdict
Commit: d72f2e679571d6b6ba5baade8b49f6c15b9adcf8
Parents: a36d416
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 17:07:10 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 17:07:10 2015 +0800
----------------------------------------------------------------------
bin/kylin.sh | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d72f2e67/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/bin/kylin.sh b/bin/kylin.sh
index 179fa47..95568e1 100644
--- a/bin/kylin.sh
+++ b/bin/kylin.sh
@@ -76,6 +76,32 @@ then
fi
rm ${KYLIN_HOME}/pid
exit 0
+elif [ $1 == "streaming" ]
+then
+ useSandbox=`cat ${KYLIN_HOME}/conf/kylin.properties | grep 'kylin.sandbox' | awk -F '=' '{print $2}'`
+ spring_profile="default"
+ if [ "$useSandbox" = "true" ]
+ then spring_profile="sandbox"
+ fi
+
+ #retrive $hive_dependency
+ source ${dir}/find-hive-dependency.sh
+ #retrive $KYLIN_EXTRA_START_OPTS
+ if [ -f "${dir}/setenv.sh" ]
+ then source ${dir}/setenv.sh
+ fi
+
+ export HBASE_CLASSPATH=$hive_dependency:${HBASE_CLASSPATH}
+ export JAVA_OPTS="-Xms2048M -Xmx2048M"
+
+ hbase ${KYLIN_EXTRA_START_OPTS} \
+ -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager \
+ -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
+ -Dkylin.hive.dependency=${hive_dependency} \
+ -Dspring.profiles.active=${spring_profile} \
+ org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${tomcat_root}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}/$2 &
+ echo "streaming started $2"
+ exit 0
else
echo "usage: kylin.sh start or kylin.sh stop"
exit 1
[46/50] incubator-kylin git commit: remove compilation error
Posted by li...@apache.org.
remove compilation error
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/a3ff2d9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/a3ff2d9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/a3ff2d9f
Branch: refs/heads/streaming-localdict
Commit: a3ff2d9ffeb65bd58aa4a81b562dcb6fa9fc5a60
Parents: cff578a
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 18:26:06 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 18:26:06 2015 +0800
----------------------------------------------------------------------
job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a3ff2d9f/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index d42da33..3e352ff 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -75,6 +75,6 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
@Test
public void test() throws Exception {
- StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+ //StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
}
}
[05/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/71324f4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/71324f4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/71324f4c
Branch: refs/heads/streaming-localdict
Commit: 71324f4cc5b168a2fee318b84c167d17a72c08fb
Parents: 56d57a2 c8f4c2a
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 16:21:47 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 16:21:47 2015 +0800
----------------------------------------------------------------------
.../model/IIJoinedFlatTableDesc.java | 12 +-
.../invertedindex/model/IIKeyValueCodec.java | 91 +++++----
.../model/IIKeyValueCodecWithState.java | 68 +++++++
.../apache/kylin/invertedindex/model/IIRow.java | 13 ++
.../org/apache/kylin/job/JoinedFlatTable.java | 1 -
.../kylin/job/hadoop/AbstractHadoopJob.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/CubeHFileMapper.java | 2 +-
.../kylin/job/hadoop/cube/CuboidReducer.java | 2 +-
.../job/hadoop/cube/FactDistinctColumnsJob.java | 2 +-
.../hadoop/cube/FactDistinctColumnsMapper.java | 200 -------------------
.../cube/FactDistinctColumnsMapperBase.java | 81 ++++++++
.../hadoop/cube/FactDistinctColumnsReducer.java | 2 +-
.../cube/FactDistinctHiveColumnsMapper.java | 129 ++++++++++++
.../cube/FactDistinctIIColumnsMapper.java | 129 ++++++++++++
.../job/hadoop/cube/MergeCuboidMapper.java | 2 +-
.../kylin/job/hadoop/cube/NDCuboidMapper.java | 2 +-
.../job/hadoop/cube/NewBaseCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidMapper.java | 2 +-
.../job/hadoop/cubev2/InMemCuboidReducer.java | 2 +-
.../invertedindex/InvertedIndexMapper.java | 2 +-
.../invertedindex/InvertedIndexPartitioner.java | 2 +-
.../invertedindex/InvertedIndexReducer.java | 2 +-
.../metadata/model/IJoinedFlatTableDesc.java | 2 -
.../metadata/model/IntermediateColumnDesc.java | 4 +
.../endpoint/HbaseServerKVIterator.java | 9 +-
26 files changed, 490 insertions(+), 277 deletions(-)
----------------------------------------------------------------------
[49/50] incubator-kylin git commit: KYLIN-625,
filter constants convert pass
Posted by li...@apache.org.
KYLIN-625, filter constants convert pass
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/48a79714
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/48a79714
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/48a79714
Branch: refs/heads/streaming-localdict
Commit: 48a797149b604d0f58f6b450bde2c4fc3c75937e
Parents: d7fc231
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 21:03:13 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 21:03:13 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/metadata/model/ColumnDesc.java | 10 +++++++
.../apache/kylin/metadata/model/TableDesc.java | 7 +++++
.../apache/kylin/storage/gridtable/GTUtil.java | 12 ++-------
.../storage/gridtable/DictGridTableTest.java | 28 ++++++++++++++++++--
4 files changed, 45 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
index 95b320c..194b650 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
@@ -21,6 +21,7 @@ package org.apache.kylin.metadata.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.JsonProperty;
+
import org.apache.commons.lang.StringUtils;
/**
@@ -131,4 +132,13 @@ public class ColumnDesc {
return "ColumnDesc [name=" + name + ",table=" + table.getIdentity() + "]";
}
+ public static ColumnDesc mockup(TableDesc table, int oneBasedColumnIndex, String name, String datatype) {
+ ColumnDesc desc = new ColumnDesc();
+ String id = "" + oneBasedColumnIndex;
+ desc.setId(id);
+ desc.setName(name);
+ desc.setDatatype(datatype);
+ desc.init(table);
+ return desc;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
index 6db1202..6934ae9 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
@@ -162,4 +162,11 @@ public class TableDesc extends RootPersistentEntity {
public String toString() {
return "TableDesc [database=" + getDatabase() + " name=" + name + "]";
}
+
+ /** create a mockup table for unit test */
+ public static TableDesc mockup(String tableName) {
+ TableDesc mockup = new TableDesc();
+ mockup.setName(tableName);
+ return mockup;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index 7d042eb..94e5206 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -19,18 +19,10 @@ import com.google.common.collect.Sets;
public class GTUtil {
- static final TableDesc MOCKUP_TABLE = new TableDesc();
- static {
- MOCKUP_TABLE.setName("GT_MOCKUP_TABLE");
- }
+ static final TableDesc MOCKUP_TABLE = TableDesc.mockup("GT_MOCKUP_TABLE");
static TblColRef tblColRef(int col, String datatype) {
- ColumnDesc desc = new ColumnDesc();
- String id = "" + (col + 1);
- desc.setId(id);
- desc.setName(id);
- desc.setDatatype(datatype);
- desc.init(MOCKUP_TABLE);
+ ColumnDesc desc = ColumnDesc.mockup(MOCKUP_TABLE, col + 1, "" + col, datatype);
return new TblColRef(desc);
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
index 46ec66c..a3de8b8 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
@@ -21,7 +21,9 @@ import org.apache.kylin.metadata.filter.ExtractTupleFilter;
import org.apache.kylin.metadata.filter.LogicalTupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.storage.gridtable.GTInfo.Builder;
import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
@@ -37,6 +39,7 @@ public class DictGridTableTest {
verifyFirstRow(table);
verifyScanWithUnevaluatableFilter(table);
verifyScanWithEvaluatableFilter(table);
+ verifyConvertFilterConstants(table);
}
private void verifyFirstRow(GridTable table) throws IOException {
@@ -51,8 +54,9 @@ public class DictGridTableTest {
LogicalTupleFilter filter = and(fcomp, funevaluatable);
GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+
// note the unEvaluatable column 1 in filter is added to group by
- assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+ assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
doScanAndVerify(table, req, "[1421280000000, 20, null, 20, null]");
}
@@ -65,12 +69,32 @@ public class DictGridTableTest {
LogicalTupleFilter filter = and(fcomp1, fcomp2);
GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+
// note the evaluatable column 1 in filter is added to returned columns but not in group by
- assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.2 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+ assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.1 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
doScanAndVerify(table, req, "[1421280000000, 30, null, 30, null]", "[1421366400000, 20, null, 40, null]");
}
+ private void verifyConvertFilterConstants(GridTable table) {
+ GTInfo info = table.getInfo();
+
+ TableDesc extTable = TableDesc.mockup("ext");
+ TblColRef extColA = new TblColRef(ColumnDesc.mockup(extTable, 1, "A", "timestamp"));
+ TblColRef extColB = new TblColRef(ColumnDesc.mockup(extTable, 2, "B", "integer"));
+
+ CompareTupleFilter fcomp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+ CompareTupleFilter fcomp2 = compare(extColB, FilterOperatorEnum.EQ, "10");
+ LogicalTupleFilter filter = and(fcomp1, fcomp2);
+
+ Map<TblColRef, Integer> colMapping = Maps.newHashMap();
+ colMapping.put(extColA, 0);
+ colMapping.put(extColB, 1);
+
+ TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+ assertEquals("AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.1 EQ [\\x00]]", newFilter.toString());
+ }
+
private void doScanAndVerify(GridTable table, GTScanRequest req, String... verifyRows) throws IOException {
System.out.println(req);
IGTScanner scanner = table.scan(req);
[26/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b2010404
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b2010404
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b2010404
Branch: refs/heads/streaming-localdict
Commit: b20104040c96fd76d419f2773aa3d00f997350ad
Parents: 3d3cee8 71bbd0c
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 13:57:35 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 13:57:35 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/dict/DateStrDictionary.java | 12 +-
.../job/hadoop/cubev2/InMemCuboidMapper.java | 2 +-
.../gridtable/GTDictionaryCodeSystem.java | 16 +-
.../kylin/storage/gridtable/GTScanRange.java | 61 +++
.../storage/gridtable/GTScanRangePlanner.java | 474 +++++++++++++++++++
.../kylin/storage/gridtable/GTScanRequest.java | 22 +-
.../apache/kylin/storage/gridtable/GTUtil.java | 26 +-
.../kylin/storage/gridtable/IGTCodeSystem.java | 25 +-
.../kylin/storage/gridtable/GridTableTest.java | 2 +-
9 files changed, 603 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
[48/50] incubator-kylin git commit: fix
Posted by li...@apache.org.
fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c043b858
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c043b858
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c043b858
Branch: refs/heads/streaming-localdict
Commit: c043b8588673ef282759b0144d2448dddd13145e
Parents: b5a78a6 a3ff2d9
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 18:27:03 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 18:27:03 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/job/cube/CubingJobBuilder.java | 2 --
.../org/apache/kylin/job/BuildCubeWithEngineTest.java | 7 ++-----
.../java/org/apache/kylin/job/IIStreamBuilderTest.java | 12 ++++++------
.../kylin/job/hadoop/invertedindex/II2CubeTest.java | 2 ++
4 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c043b858/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --cc job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index bafcb61,3e352ff..d15d1e5
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@@ -96,13 -68,13 +96,13 @@@ public class IIStreamBuilderTest extend
DeployUtil.overrideJobJarLocations();
}
- @After
- public void after() {
- this.cleanupTestMetadata();
- }
-
@Test
public void test() throws Exception {
- // final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
- // bootstrap.start("eagle", 0);
- // Thread.sleep(30 * 60 * 1000);
- // logger.info("time is up, stop streaming");
- // bootstrap.stop();
- // Thread.sleep(5 * 1000);
- //StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
++ final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
++ bootstrap.start("eagle", 0);
++ Thread.sleep(30 * 60 * 1000);
++ logger.info("time is up, stop streaming");
++ bootstrap.stop();
++ Thread.sleep(5 * 1000);
}
}
[18/50] incubator-kylin git commit: Merge branch
'streaming-localdict' of https://github.com/KylinOLAP/Kylin into
streaming-localdict
Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/70887247
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/70887247
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/70887247
Branch: refs/heads/streaming-localdict
Commit: 70887247acfa9b2bcf3241f8beb4fa97e14a1607
Parents: 2b5495c 0edf400
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 10:05:44 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 10:05:44 2015 +0800
----------------------------------------------------------------------
.../cube/FactDistinctColumnsCombiner.java | 6 +-
.../job/hadoop/cube/FactDistinctColumnsJob.java | 4 +-
.../cube/FactDistinctColumnsMapperBase.java | 16 ++---
.../hadoop/cube/FactDistinctColumnsReducer.java | 61 +++++++++++++-----
.../cube/FactDistinctHiveColumnsMapper.java | 51 ++++++++++-----
.../gridtable/GTDictionaryCodeSystem.java | 68 +++++++++++---------
6 files changed, 130 insertions(+), 76 deletions(-)
----------------------------------------------------------------------
[29/50] incubator-kylin git commit: add serializer for
Date/Time/Timestamp
Posted by li...@apache.org.
add serializer for Date/Time/Timestamp
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/24accccc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/24accccc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/24accccc
Branch: refs/heads/streaming-localdict
Commit: 24accccc59009dd305cd70fb96cfe3160ad8ffa1
Parents: bbbcae8
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 14:37:49 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 14:37:49 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/dict/DateStrDictionary.java | 73 +--------
.../kylin/invertedindex/index/TableRecord.java | 7 +-
.../metadata/serializer/DataTypeSerializer.java | 4 +-
.../metadata/serializer/DateTimeSerializer.java | 39 +++++
.../metadata/tool/HiveSourceTableLoader.java | 155 -------------------
.../apache/kylin/metadata/util/DateFormat.java | 76 +++++++++
.../metadata/util/HiveSourceTableLoader.java | 155 +++++++++++++++++++
.../tool/HiveSourceTableLoaderTest.java | 2 +-
.../apache/kylin/rest/service/CubeService.java | 2 +-
.../kylin/storage/hbase/HBaseKeyRange.java | 12 +-
.../org/apache/kylin/storage/tuple/Tuple.java | 6 +-
.../kylin/storage/gridtable/GridTableTest.java | 8 +-
12 files changed, 292 insertions(+), 247 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 95f67ff..4523e67 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -18,17 +18,14 @@
package org.apache.kylin.dict;
+import static org.apache.kylin.metadata.util.DateFormat.*;
+
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
import java.util.Date;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang.StringUtils;
@@ -44,74 +41,8 @@ import org.apache.commons.lang.StringUtils;
*/
public class DateStrDictionary extends Dictionary<String> {
- static final String DEFAULT_DATE_PATTERN = "yyyy-MM-dd";
- static final String DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS = "yyyy-MM-dd HH:mm:ss";
- static final String DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS = "yyyy-MM-dd HH:mm:ss.SSS";
-
static final int ID_9999_12_31 = 3652426; // assume 0 based
- static final private Map<String, ThreadLocal<SimpleDateFormat>> threadLocalMap = new ConcurrentHashMap<String, ThreadLocal<SimpleDateFormat>>();
-
- static SimpleDateFormat getDateFormat(String datePattern) {
- ThreadLocal<SimpleDateFormat> formatThreadLocal = threadLocalMap.get(datePattern);
- if (formatThreadLocal == null) {
- threadLocalMap.put(datePattern, formatThreadLocal = new ThreadLocal<SimpleDateFormat>());
- }
- SimpleDateFormat format = formatThreadLocal.get();
- if (format == null) {
- format = new SimpleDateFormat(datePattern);
- format.setTimeZone(TimeZone.getTimeZone("GMT")); // NOTE: this must be GMT to calculate epoch date correctly
- formatThreadLocal.set(format);
- }
- return format;
- }
-
- public static String dateToString(Date date) {
- return dateToString(date, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
- }
-
- public static String dateToString(Date date, String pattern) {
- return getDateFormat(pattern).format(date);
- }
-
- public static Date stringToDate(String str) {
- return stringToDate(str, DEFAULT_DATE_PATTERN);
- }
-
- public static Date stringToDate(String str, String pattern) {
- Date date = null;
- try {
- date = getDateFormat(pattern).parse(str);
- } catch (ParseException e) {
- throw new IllegalArgumentException("'" + str + "' is not a valid date of pattern '" + pattern + "'", e);
- }
- return date;
- }
-
- public static long stringToMillis(String str) {
- if (isAllDigits(str)) {
- return Long.parseLong(str);
- } else if (str.length() == 10) {
- return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
- } else if (str.length() == 19) {
- return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
- } else if (str.length() == 23) {
- return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS).getTime();
- } else {
- throw new IllegalArgumentException("there is no valid date pattern for:" + str);
- }
- }
-
- private static boolean isAllDigits(String str) {
- for (int i = 0, n = str.length(); i < n; i++) {
- if (Character.isDigit(str.charAt(i)) == false)
- return false;
- }
- return true;
- }
-
- // ============================================================================
-
private String pattern;
private int baseId;
private int maxId;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
index ce1b7e0..15869f9 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
@@ -18,14 +18,11 @@
package org.apache.kylin.invertedindex.index;
-import com.google.common.collect.Lists;
-import org.apache.kylin.dict.DateStrDictionary;
import org.apache.commons.lang.ObjectUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.kylin.dict.Dictionary;
-
-import java.util.List;
+import org.apache.kylin.metadata.util.DateFormat;
/**
* @author yangli9, honma
@@ -67,7 +64,7 @@ public class TableRecord implements Cloneable {
public long getTimestamp() {
String str = getValueString(info.getTimestampColumn());
- return DateStrDictionary.stringToMillis(str);
+ return DateFormat.stringToMillis(str);
}
public int length(int col) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
index 094c2f1..63d4ddd 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
@@ -42,7 +42,9 @@ abstract public class DataTypeSerializer<T> implements BytesSerializer<T> {
implementations.put("integer", LongSerializer.class);
implementations.put("int", LongSerializer.class);
implementations.put("smallint", LongSerializer.class);
- implementations.put("date", StringSerializer.class);
+ implementations.put("date", DateTimeSerializer.class);
+ implementations.put("datetime", DateTimeSerializer.class);
+ implementations.put("timestamp", DateTimeSerializer.class);
}
public static DataTypeSerializer<?> create(String dataType) {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
new file mode 100644
index 0000000..465c158
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
@@ -0,0 +1,39 @@
+package org.apache.kylin.metadata.serializer;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.metadata.util.DateFormat;
+
+public class DateTimeSerializer extends DataTypeSerializer<LongWritable> {
+
+ // avoid mass object creation
+ LongWritable current = new LongWritable();
+
+ @Override
+ public void serialize(LongWritable value, ByteBuffer out) {
+ out.putLong(value.get());
+ }
+
+ @Override
+ public LongWritable deserialize(ByteBuffer in) {
+ current.set(in.getLong());
+ return current;
+ }
+
+ @Override
+ public int peekLength(ByteBuffer in) {
+ return 8;
+ }
+
+ @Override
+ public LongWritable valueOf(byte[] value) {
+ if (value == null)
+ current.set(0L);
+ else
+ current.set(DateFormat.stringToMillis(Bytes.toString(value)));
+ return current;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java b/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
deleted file mode 100644
index 5297188..0000000
--- a/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.metadata.tool;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.util.HadoopUtil;
-import org.apache.kylin.common.util.HiveClient;
-import org.apache.kylin.metadata.MetadataConstants;
-import org.apache.kylin.metadata.MetadataManager;
-import org.apache.kylin.metadata.model.ColumnDesc;
-import org.apache.kylin.metadata.model.TableDesc;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.*;
-
-/**
- * Management class to sync hive table metadata with command See main method for
- * how to use the class
- *
- * @author jianliu
- */
-public class HiveSourceTableLoader {
-
- @SuppressWarnings("unused")
- private static final Logger logger = LoggerFactory.getLogger(HiveSourceTableLoader.class);
-
- public static final String OUTPUT_SURFIX = "json";
- public static final String TABLE_FOLDER_NAME = "table";
- public static final String TABLE_EXD_FOLDER_NAME = "table_exd";
-
- public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {
-
- Map<String, Set<String>> db2tables = Maps.newHashMap();
- for (String table : hiveTables) {
- String[] parts = HadoopUtil.parseHiveTableName(table);
- Set<String> set = db2tables.get(parts[0]);
- if (set == null) {
- set = Sets.newHashSet();
- db2tables.put(parts[0], set);
- }
- set.add(parts[1]);
- }
-
- // extract from hive
- Set<String> loadedTables = Sets.newHashSet();
- for (String database : db2tables.keySet()) {
- List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
- loadedTables.addAll(loaded);
- }
-
- return loadedTables;
- }
-
- private static List<String> extractHiveTables(String database, Set<String> tables, KylinConfig config) throws IOException {
-
- List<String> loadedTables = Lists.newArrayList();
- MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
- for (String tableName : tables) {
- Table table = null;
- HiveClient hiveClient = new HiveClient();
- List<FieldSchema> partitionFields = null;
- List<FieldSchema> fields = null;
- try {
- table = hiveClient.getHiveTable(database, tableName);
- partitionFields = table.getPartitionKeys();
- fields = hiveClient.getHiveTableFields(database, tableName);
- } catch (Exception e) {
- e.printStackTrace();
- throw new IOException(e);
- }
-
- if (fields != null && partitionFields != null && partitionFields.size() > 0) {
- fields.addAll(partitionFields);
- }
-
- long tableSize = hiveClient.getFileSizeForTable(table);
- long tableFileNum = hiveClient.getFileNumberForTable(table);
- TableDesc tableDesc = metaMgr.getTableDesc(database + "." + tableName);
- if (tableDesc == null) {
- tableDesc = new TableDesc();
- tableDesc.setDatabase(database.toUpperCase());
- tableDesc.setName(tableName.toUpperCase());
- tableDesc.setUuid(UUID.randomUUID().toString());
- tableDesc.setLastModified(0);
- }
-
- int columnNumber = fields.size();
- List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber);
- for (int i = 0; i < columnNumber; i++) {
- FieldSchema field = fields.get(i);
- ColumnDesc cdesc = new ColumnDesc();
- cdesc.setName(field.getName().toUpperCase());
- cdesc.setDatatype(field.getType());
- cdesc.setId(String.valueOf(i + 1));
- columns.add(cdesc);
- }
- tableDesc.setColumns(columns.toArray(new ColumnDesc[columnNumber]));
-
- StringBuffer partitionColumnString = new StringBuffer();
- for (int i = 0, n = partitionFields.size(); i < n; i++) {
- if (i > 0)
- partitionColumnString.append(", ");
- partitionColumnString.append(partitionFields.get(i).getName().toUpperCase());
- }
-
- Map<String, String> map = metaMgr.getTableDescExd(tableDesc.getIdentity());
-
- if (map == null) {
- map = Maps.newHashMap();
- }
- map.put(MetadataConstants.TABLE_EXD_TABLENAME, table.getTableName());
- map.put(MetadataConstants.TABLE_EXD_LOCATION, table.getSd().getLocation());
- map.put(MetadataConstants.TABLE_EXD_IF, table.getSd().getInputFormat());
- map.put(MetadataConstants.TABLE_EXD_OF, table.getSd().getOutputFormat());
- map.put(MetadataConstants.TABLE_EXD_OWNER, table.getOwner());
- map.put(MetadataConstants.TABLE_EXD_LAT, String.valueOf(table.getLastAccessTime()));
- map.put(MetadataConstants.TABLE_EXD_PC, partitionColumnString.toString());
- map.put(MetadataConstants.TABLE_EXD_TFS, String.valueOf(tableSize));
- map.put(MetadataConstants.TABLE_EXD_TNF, String.valueOf(tableFileNum));
- map.put(MetadataConstants.TABLE_EXD_PARTITIONED, Boolean.valueOf(partitionFields != null && partitionFields.size() > 0).toString());
-
- metaMgr.saveSourceTable(tableDesc);
- metaMgr.saveTableExd(tableDesc.getIdentity(), map);
- loadedTables.add(tableDesc.getIdentity());
- }
-
-
- return loadedTables;
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java b/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
new file mode 100644
index 0000000..c0967e3
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
@@ -0,0 +1,76 @@
+package org.apache.kylin.metadata.util;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class DateFormat {
+
+ public static final String DEFAULT_DATE_PATTERN = "yyyy-MM-dd";
+ public static final String DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS = "yyyy-MM-dd HH:mm:ss";
+ public static final String DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS = "yyyy-MM-dd HH:mm:ss.SSS";
+
+ static final private Map<String, ThreadLocal<SimpleDateFormat>> threadLocalMap = new ConcurrentHashMap<String, ThreadLocal<SimpleDateFormat>>();
+
+ static SimpleDateFormat getDateFormat(String datePattern) {
+ ThreadLocal<SimpleDateFormat> formatThreadLocal = threadLocalMap.get(datePattern);
+ if (formatThreadLocal == null) {
+ threadLocalMap.put(datePattern, formatThreadLocal = new ThreadLocal<SimpleDateFormat>());
+ }
+ SimpleDateFormat format = formatThreadLocal.get();
+ if (format == null) {
+ format = new SimpleDateFormat(datePattern);
+ format.setTimeZone(TimeZone.getTimeZone("GMT")); // NOTE: this must be GMT to calculate epoch date correctly
+ formatThreadLocal.set(format);
+ }
+ return format;
+ }
+
+ public static String dateToString(Date date) {
+ return dateToString(date, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
+ }
+
+ public static String dateToString(Date date, String pattern) {
+ return getDateFormat(pattern).format(date);
+ }
+
+ public static Date stringToDate(String str) {
+ return stringToDate(str, DEFAULT_DATE_PATTERN);
+ }
+
+ public static Date stringToDate(String str, String pattern) {
+ Date date = null;
+ try {
+ date = getDateFormat(pattern).parse(str);
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("'" + str + "' is not a valid date of pattern '" + pattern + "'", e);
+ }
+ return date;
+ }
+
+ public static long stringToMillis(String str) {
+ if (isAllDigits(str)) {
+ return Long.parseLong(str);
+ } else if (str.length() == 10) {
+ return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
+ } else if (str.length() == 19) {
+ return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
+ } else if (str.length() == 23) {
+ return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS).getTime();
+ } else {
+ throw new IllegalArgumentException("there is no valid date pattern for:" + str);
+ }
+ }
+
+ private static boolean isAllDigits(String str) {
+ for (int i = 0, n = str.length(); i < n; i++) {
+ if (Character.isDigit(str.charAt(i)) == false)
+ return false;
+ }
+ return true;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java b/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
new file mode 100644
index 0000000..fe5c2b3
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.metadata.util;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.HiveClient;
+import org.apache.kylin.metadata.MetadataConstants;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Management class to sync hive table metadata with command See main method for
+ * how to use the class
+ *
+ * @author jianliu
+ */
+public class HiveSourceTableLoader {
+
+ @SuppressWarnings("unused")
+ private static final Logger logger = LoggerFactory.getLogger(HiveSourceTableLoader.class);
+
+ public static final String OUTPUT_SURFIX = "json";
+ public static final String TABLE_FOLDER_NAME = "table";
+ public static final String TABLE_EXD_FOLDER_NAME = "table_exd";
+
+ public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {
+
+ Map<String, Set<String>> db2tables = Maps.newHashMap();
+ for (String table : hiveTables) {
+ String[] parts = HadoopUtil.parseHiveTableName(table);
+ Set<String> set = db2tables.get(parts[0]);
+ if (set == null) {
+ set = Sets.newHashSet();
+ db2tables.put(parts[0], set);
+ }
+ set.add(parts[1]);
+ }
+
+ // extract from hive
+ Set<String> loadedTables = Sets.newHashSet();
+ for (String database : db2tables.keySet()) {
+ List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
+ loadedTables.addAll(loaded);
+ }
+
+ return loadedTables;
+ }
+
+ private static List<String> extractHiveTables(String database, Set<String> tables, KylinConfig config) throws IOException {
+
+ List<String> loadedTables = Lists.newArrayList();
+ MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+ for (String tableName : tables) {
+ Table table = null;
+ HiveClient hiveClient = new HiveClient();
+ List<FieldSchema> partitionFields = null;
+ List<FieldSchema> fields = null;
+ try {
+ table = hiveClient.getHiveTable(database, tableName);
+ partitionFields = table.getPartitionKeys();
+ fields = hiveClient.getHiveTableFields(database, tableName);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ if (fields != null && partitionFields != null && partitionFields.size() > 0) {
+ fields.addAll(partitionFields);
+ }
+
+ long tableSize = hiveClient.getFileSizeForTable(table);
+ long tableFileNum = hiveClient.getFileNumberForTable(table);
+ TableDesc tableDesc = metaMgr.getTableDesc(database + "." + tableName);
+ if (tableDesc == null) {
+ tableDesc = new TableDesc();
+ tableDesc.setDatabase(database.toUpperCase());
+ tableDesc.setName(tableName.toUpperCase());
+ tableDesc.setUuid(UUID.randomUUID().toString());
+ tableDesc.setLastModified(0);
+ }
+
+ int columnNumber = fields.size();
+ List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber);
+ for (int i = 0; i < columnNumber; i++) {
+ FieldSchema field = fields.get(i);
+ ColumnDesc cdesc = new ColumnDesc();
+ cdesc.setName(field.getName().toUpperCase());
+ cdesc.setDatatype(field.getType());
+ cdesc.setId(String.valueOf(i + 1));
+ columns.add(cdesc);
+ }
+ tableDesc.setColumns(columns.toArray(new ColumnDesc[columnNumber]));
+
+ StringBuffer partitionColumnString = new StringBuffer();
+ for (int i = 0, n = partitionFields.size(); i < n; i++) {
+ if (i > 0)
+ partitionColumnString.append(", ");
+ partitionColumnString.append(partitionFields.get(i).getName().toUpperCase());
+ }
+
+ Map<String, String> map = metaMgr.getTableDescExd(tableDesc.getIdentity());
+
+ if (map == null) {
+ map = Maps.newHashMap();
+ }
+ map.put(MetadataConstants.TABLE_EXD_TABLENAME, table.getTableName());
+ map.put(MetadataConstants.TABLE_EXD_LOCATION, table.getSd().getLocation());
+ map.put(MetadataConstants.TABLE_EXD_IF, table.getSd().getInputFormat());
+ map.put(MetadataConstants.TABLE_EXD_OF, table.getSd().getOutputFormat());
+ map.put(MetadataConstants.TABLE_EXD_OWNER, table.getOwner());
+ map.put(MetadataConstants.TABLE_EXD_LAT, String.valueOf(table.getLastAccessTime()));
+ map.put(MetadataConstants.TABLE_EXD_PC, partitionColumnString.toString());
+ map.put(MetadataConstants.TABLE_EXD_TFS, String.valueOf(tableSize));
+ map.put(MetadataConstants.TABLE_EXD_TNF, String.valueOf(tableFileNum));
+ map.put(MetadataConstants.TABLE_EXD_PARTITIONED, Boolean.valueOf(partitionFields != null && partitionFields.size() > 0).toString());
+
+ metaMgr.saveSourceTable(tableDesc);
+ metaMgr.saveTableExd(tableDesc.getIdentity(), map);
+ loadedTables.add(tableDesc.getIdentity());
+ }
+
+
+ return loadedTables;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
----------------------------------------------------------------------
diff --git a/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java b/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
index cd773ba..1f48b77 100644
--- a/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
+++ b/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
@@ -26,9 +26,9 @@ import java.util.Set;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
-
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.metadata.util.HiveSourceTableLoader;
public class HiveSourceTableLoaderTest extends HBaseMetadataTestCase {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/kylin/rest/service/CubeService.java b/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
index de97a7b..d786b1e 100644
--- a/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
+++ b/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
@@ -44,7 +44,7 @@ import org.apache.kylin.metadata.project.ProjectManager;
import org.apache.kylin.metadata.project.RealizationEntry;
import org.apache.kylin.metadata.realization.RealizationStatusEnum;
import org.apache.kylin.metadata.realization.RealizationType;
-import org.apache.kylin.metadata.tool.HiveSourceTableLoader;
+import org.apache.kylin.metadata.util.HiveSourceTableLoader;
import org.apache.kylin.rest.constant.Constant;
import org.apache.kylin.rest.controller.QueryController;
import org.apache.kylin.rest.exception.InternalErrorException;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
index 1c81eac..e766317 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
@@ -26,9 +26,6 @@ import java.util.Set;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.Cuboid;
@@ -37,8 +34,11 @@ import org.apache.kylin.cube.kv.FuzzyKeyEncoder;
import org.apache.kylin.cube.kv.FuzzyMaskEncoder;
import org.apache.kylin.cube.kv.RowConstants;
import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.DateStrDictionary;
import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.util.DateFormat;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
/**
*
@@ -135,10 +135,10 @@ public class HBaseKeyRange implements Comparable<HBaseKeyRange> {
private void initPartitionRange(ColumnValueRange dimRange) {
if (null != dimRange.getBeginValue()) {
- this.partitionColumnStartDate = DateStrDictionary.stringToDate(dimRange.getBeginValue()).getTime();
+ this.partitionColumnStartDate = DateFormat.stringToDate(dimRange.getBeginValue()).getTime();
}
if (null != dimRange.getEndValue()) {
- this.partitionColumnEndDate = DateStrDictionary.stringToDate(dimRange.getEndValue()).getTime();
+ this.partitionColumnEndDate = DateFormat.stringToDate(dimRange.getEndValue()).getTime();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
index dd904d4..2d18597 100644
--- a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
+++ b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
@@ -26,10 +26,10 @@ import org.apache.kylin.common.util.Array;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
-import org.apache.kylin.dict.DateStrDictionary;
import org.apache.kylin.dict.lookup.LookupStringTable;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.metadata.tuple.ITuple;
+import org.apache.kylin.metadata.util.DateFormat;
/**
* @author xjiang
@@ -133,7 +133,7 @@ public class Tuple implements ITuple {
// TODO use data type enum instead of string comparison
if ("date".equals(dataType)) {
// convert epoch time
- Date dateValue = DateStrDictionary.stringToDate(strValue); // NOTE: forces GMT timezone
+ Date dateValue = DateFormat.stringToDate(strValue); // NOTE: forces GMT timezone
long millis = dateValue.getTime();
long days = millis / (1000 * 3600 * 24);
return Integer.valueOf((int) days); // Optiq expects Integer instead of Long. by honma
@@ -150,7 +150,7 @@ public class Tuple implements ITuple {
} else if ("decimal".equals(dataType)) {
return new BigDecimal(strValue);
} else if ("timestamp".equals(dataType)) {
- return Long.valueOf(DateStrDictionary.stringToMillis(strValue));
+ return Long.valueOf(DateFormat.stringToMillis(strValue));
} else {
return strValue;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
index 1a69138..6561c6e 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
@@ -114,7 +114,7 @@ public class GridTableTest {
return scanner;
}
- private GTBuilder rebuild(GridTable table) throws IOException {
+ static GTBuilder rebuild(GridTable table) throws IOException {
GTRecord r = new GTRecord(table.getInfo());
GTBuilder builder = table.rebuild();
@@ -135,7 +135,7 @@ public class GridTableTest {
return builder;
}
- private void rebuildViaAppend(GridTable table) throws IOException {
+ static void rebuildViaAppend(GridTable table) throws IOException {
GTRecord r = new GTRecord(table.getInfo());
GTBuilder builder;
@@ -170,13 +170,13 @@ public class GridTableTest {
System.out.println("Written Row Count: " + builder.getWrittenRowCount());
}
- public static GTInfo basicInfo() {
+ static GTInfo basicInfo() {
Builder builder = infoBuilder();
GTInfo info = builder.build();
return info;
}
- public static GTInfo advancedInfo() {
+ static GTInfo advancedInfo() {
Builder builder = infoBuilder();
builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
builder.enableRowBlock(4);
[32/50] incubator-kylin git commit: KYLIN-653 fact distinct mapper
for II test passed
Posted by li...@apache.org.
KYLIN-653 fact distinct mapper for II test passed
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/0f8b7a46
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/0f8b7a46
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/0f8b7a46
Branch: refs/heads/streaming-localdict
Commit: 0f8b7a4689cde3e4844132efba8665cf0362bf60
Parents: fc5ab52
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 14:52:46 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/BasicTest.java | 2 -
.../test_kylin_cube_with_slr_desc.json | 2 +-
.../apache/kylin/invertedindex/model/IIRow.java | 10 ++
.../cube/FactDistinctIIColumnsMapper.java | 15 +-
.../job/hadoop/invertedindex/II2CubeTest.java | 146 +++++++++++++++++++
.../invertedindex/ToyIIStreamBuilder.java | 36 +++++
streaming/pom.xml | 7 +
.../kylin/streaming/cube/CubeStreamBuilder.java | 20 +--
.../IIKeyValueCodecWithStateTest.java | 103 -------------
.../invertedindex/ToyIIStreamBuilder.java | 35 -----
10 files changed, 211 insertions(+), 165 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
index 0b92bf9..068ebbf 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
@@ -23,9 +23,7 @@ import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
-import java.util.concurrent.*;
-import com.google.common.collect.Lists;
import org.apache.commons.configuration.ConfigurationException;
import org.junit.Ignore;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
index c4d55f4..5a1049c 100644
--- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
+++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
@@ -135,7 +135,7 @@
}, {
"column" : "lstg_format_name",
"length" : 12,
- "dictionary" : null,
+ "dictionary" : "true",
"mandatory" : false
}, {
"column" : "lstg_site_id",
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
index f3d398a..273d1e6 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
@@ -34,10 +34,14 @@
package org.apache.kylin.invertedindex.model;
+import com.google.common.collect.Lists;
import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.kylin.common.util.BytesUtil;
+import java.util.List;
+
/**
* Created by qianzhou on 3/10/15.
*/
@@ -77,4 +81,10 @@ public final class IIRow {
this.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
}
}
+
+ public List<Cell> makeCells() {
+ Cell a = new KeyValue(this.getKey().copyBytes(), IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_QUALIFIER_BYTES, this.getValue().copyBytes());
+ Cell b = new KeyValue(this.getKey().copyBytes(), IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_DICTIONARY_BYTES, this.getDictionary().copyBytes());
+ return Lists.newArrayList(a, b);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 6a236fd..75709f6 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -51,14 +51,9 @@ import com.google.common.collect.Lists;
*/
public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
- private IIJoinedFlatTableDesc intermediateTableDesc;
private Queue<IIRow> buffer = Lists.newLinkedList();
private Iterator<Slice> slices;
- private String iiName;
- private IIInstance ii;
- private IIDesc iiDesc;
-
private int[] baseCuboidCol2FlattenTableCol;
@Override
@@ -68,11 +63,11 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
Configuration conf = context.getConfiguration();
KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
- iiName = conf.get(BatchConstants.CFG_II_NAME);
- ii = IIManager.getInstance(config).getII(iiName);
- iiDesc = ii.getDescriptor();
+ String iiName = conf.get(BatchConstants.CFG_II_NAME);
+ IIInstance ii = IIManager.getInstance(config).getII(iiName);
+ IIDesc iiDesc = ii.getDescriptor();
- intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
+ IIJoinedFlatTableDesc intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
TableRecordInfo info = new TableRecordInfo(iiDesc);
KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
@@ -116,7 +111,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
vBytesBuffer = new byte[dictionary.getSizeOfValue() * 2];
}
- int vid = record.getValueID(baseCuboidIndex);
+ int vid = record.getValueID(indexInRecord);
if (vid == dictionary.nullId()) {
continue;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
new file mode 100644
index 0000000..6832dcf
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -0,0 +1,146 @@
+package org.apache.kylin.job.hadoop.invertedindex;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.annotation.Nullable;
+
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
+import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
+import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.kylin.common.util.FIFOIterable;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
+import org.apache.kylin.streaming.Stream;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class II2CubeTest extends LocalFileMetadataTestCase {
+
+ String iiName = "test_kylin_ii_inner_join";
+ IIInstance ii;
+ IIDesc iiDesc;
+ String cubeName = "test_kylin_cube_with_slr_empty";
+
+ List<IIRow> iiRows;
+
+ final String[] inputs = new String[] { //
+ "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+ "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
+ "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+
+ @Before
+ public void setUp() throws Exception {
+ this.createTestMetadata();
+ this.ii = IIManager.getInstance(getTestConfig()).getII(iiName);
+ this.iiDesc = ii.getDescriptor();
+
+ Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+ @Nullable
+ @Override
+ public Stream apply(String input) {
+ return new Stream(0, input.getBytes());
+ }
+ });
+ LinkedBlockingQueue q = new LinkedBlockingQueue();
+ q.addAll(streams);
+ q.put(new Stream(-1, null));//a stop sign for builder
+
+ iiRows = Lists.newArrayList();
+ ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRows);
+ ExecutorService executorService = Executors.newSingleThreadExecutor();
+ Future<?> future = executorService.submit(builder);
+ future.get();
+
+ }
+
+ @After
+ public void after() throws Exception {
+ cleanupTestMetadata();
+ }
+
+ /**
+ * simulate stream building into slices, and encode the slice into IIRows.
+ * Then reconstruct the IIRows to slice.
+ */
+ @Test
+ public void basicTest() {
+ Queue<IIRow> buffer = Lists.newLinkedList();
+ FIFOIterable bufferIterable = new FIFOIterable(buffer);
+ TableRecordInfo info = new TableRecordInfo(iiDesc);
+ TableRecordInfoDigest digest = info.getDigest();
+ KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
+ Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
+
+ Assert.assertTrue(!slices.hasNext());
+ Assert.assertEquals(iiRows.size(), digest.getColumnCount());
+
+ for (int i = 0; i < digest.getColumnCount(); ++i) {
+ buffer.add(iiRows.get(i));
+
+ if (i != digest.getColumnCount() - 1) {
+ Assert.assertTrue(!slices.hasNext());
+ } else {
+ Assert.assertTrue(slices.hasNext());
+ }
+ }
+
+ Slice newSlice = slices.next();
+ Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
+ }
+
+ @Test
+ public void factDistinctIIColumnsMapperTest() throws IOException {
+ MapDriver<ImmutableBytesWritable, Result, ShortWritable, Text> mapDriver;
+ FactDistinctIIColumnsMapper mapper = new FactDistinctIIColumnsMapper();
+ mapDriver = MapDriver.newMapDriver(mapper);
+
+ mapDriver.getConfiguration().set(BatchConstants.CFG_II_NAME, iiName);
+ mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+ mapDriver.getConfiguration().setStrings("io.serializations", mapDriver.getConfiguration().get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName());
+ mapDriver.addAll(Lists.newArrayList(Collections2.transform(iiRows, new Function<IIRow, Pair<ImmutableBytesWritable, Result>>() {
+ @Nullable
+ @Override
+ public Pair<ImmutableBytesWritable, Result> apply(@Nullable IIRow input) {
+ return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[] { 1 }), Result.create(input.makeCells()));
+ }
+ })));
+
+ List<Pair<ShortWritable, Text>> result = mapDriver.run();
+ Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
+ for(Pair<ShortWritable, Text> pair : result)
+ {
+ Assert.assertEquals(pair.getFirst().get(),6);
+ Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
new file mode 100644
index 0000000..3e2a892
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
@@ -0,0 +1,36 @@
+package org.apache.kylin.job.hadoop.invertedindex;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ *
+ * A IIStreamBuilder that can hold all the built slices in form of IIRow
+ * This is only for test use
+ */
+public class ToyIIStreamBuilder extends IIStreamBuilder {
+ private List<IIRow> result;
+
+ public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
+ super(queue, null, desc, partitionId);
+ this.result = result;
+ }
+
+ protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+ IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
+ for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+ result.add(iiRow);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 650c9ac..0c084d5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -16,6 +16,13 @@
<dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <classifier>hadoop2</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.kylin</groupId>
<artifactId>kylin-invertedindex</artifactId>
<version>${project.parent.version}</version>
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 9554797..5c2efdc 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -156,7 +156,6 @@ public class CubeStreamBuilder extends StreamBuilder {
logger.info("Totally " + generatedCuboids.size() + " cuboids be calculated, takes " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}
-
private void calculateCuboid(GridTable parentCuboid, long parentCuboidId, long cuboidId, Map<Long, GridTable> result) throws IOException {
GridTable thisCuboid;
@@ -220,7 +219,6 @@ public class CubeStreamBuilder extends StreamBuilder {
return gridTable;
}
-
private GridTable aggregateCuboid(GridTable parentCuboid, long parentCuboidId, long cuboidId) throws IOException {
//logger.info("Calculating cuboid " + cuboidId + " from parent " + parentCuboidId);
Pair<BitSet, BitSet> columnBitSets = getDimensionAndMetricColumBitSet(parentCuboidId);
@@ -281,14 +279,12 @@ public class CubeStreamBuilder extends StreamBuilder {
}
private Pair<BitSet, BitSet> getDimensionAndMetricColumBitSet(long cuboidId) {
- BitSet bitSet = BitSet.valueOf(new long[]{cuboidId});
+ BitSet bitSet = BitSet.valueOf(new long[] { cuboidId });
BitSet dimension = new BitSet();
dimension.set(0, bitSet.cardinality());
BitSet metrics = new BitSet();
metrics.set(bitSet.cardinality(), bitSet.cardinality() + this.measureNumber);
- return new Pair<BitSet, BitSet>(
- dimension, metrics
- );
+ return new Pair<BitSet, BitSet>(dimension, metrics);
}
private Object[] buildKey(List<String> row, DataTypeSerializer[] serializers) {
@@ -302,7 +298,6 @@ public class CubeStreamBuilder extends StreamBuilder {
return key;
}
-
private Object[] buildValue(List<String> row) {
Object[] values = new Object[desc.getMeasures().size()];
@@ -340,11 +335,10 @@ public class CubeStreamBuilder extends StreamBuilder {
return values;
}
-
private GTInfo newGTInfo(long cuboidID) {
Pair<BitSet, BitSet> dimensionMetricsBitSet = getDimensionAndMetricColumBitSet(cuboidID);
GTInfo.Builder builder = infoBuilder(cuboidID);
- builder.enableColumnBlock(new BitSet[]{dimensionMetricsBitSet.getFirst(), dimensionMetricsBitSet.getSecond()});
+ builder.enableColumnBlock(new BitSet[] { dimensionMetricsBitSet.getFirst(), dimensionMetricsBitSet.getSecond() });
builder.setPrimaryKey(dimensionMetricsBitSet.getFirst());
GTInfo info = builder.build();
return info;
@@ -374,7 +368,6 @@ public class CubeStreamBuilder extends StreamBuilder {
return builder;
}
-
private void buildDictionary(List<List<String>> table, CubeDesc desc, Map<TblColRef, Dictionary> dictionaryMap) {
SetMultimap<TblColRef, String> valueMap = HashMultimap.create();
@@ -399,9 +392,9 @@ public class CubeStreamBuilder extends StreamBuilder {
}));
logger.info("Building dictionary for " + col);
-// DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
-// dictInfo.setDictionaryObject(dict);
-// dictInfo.setDictionaryClass(dict.getClass().getName());
+ // DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
+ // dictInfo.setDictionaryObject(dict);
+ // dictInfo.setDictionaryClass(dict.getClass().getName());
dictionaryMap.put(col, dict);
}
}
@@ -413,5 +406,4 @@ public class CubeStreamBuilder extends StreamBuilder {
return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
}
-
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
deleted file mode 100644
index 5ade5f1..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package org.apache.kylin.streaming.invertedindex;
-
-import java.util.*;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import javax.annotation.Nullable;
-
-import org.apache.kylin.common.util.FIFOIterable;
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.invertedindex.IIInstance;
-import org.apache.kylin.invertedindex.IIManager;
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.invertedindex.model.KeyValueCodec;
-import org.apache.kylin.streaming.Stream;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.Lists;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- */
-public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
-
- IIInstance ii;
- IIDesc iiDesc;
- List<IIRow> iiRowList = Lists.newArrayList();
-
- final String[] inputs = new String[] { //
- "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
- "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
- "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
-
- @Before
- public void setUp() throws Exception {
- this.createTestMetadata();
- this.ii = IIManager.getInstance(getTestConfig()).getII("test_kylin_ii_inner_join");
- this.iiDesc = ii.getDescriptor();
-
- Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
- @Nullable
- @Override
- public Stream apply(String input) {
- return new Stream(0, input.getBytes());
- }
- });
- LinkedBlockingQueue q = new LinkedBlockingQueue();
- q.addAll(streams);
- q.put(new Stream(-1, null));//a stop sign for builder
-
- ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRowList);
- ExecutorService executorService = Executors.newSingleThreadExecutor();
- Future<?> future = executorService.submit(builder);
- future.get();
- }
-
- @After
- public void after() throws Exception {
- cleanupTestMetadata();
- }
-
- /**
- * simulate stream building into slices, and encode the slice into IIRows.
- * Then reconstruct the IIRows to slice.
- */
- @Test
- public void basicTest() {
- Queue<IIRow> buffer = Lists.newLinkedList();
- FIFOIterable bufferIterable = new FIFOIterable(buffer);
- TableRecordInfo info = new TableRecordInfo(iiDesc);
- TableRecordInfoDigest digest = info.getDigest();
- KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
- Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
-
- Assert.assertTrue(!slices.hasNext());
- Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
-
- for (int i = 0; i < digest.getColumnCount(); ++i) {
- buffer.add(iiRowList.get(i));
-
- if (i != digest.getColumnCount() - 1) {
- Assert.assertTrue(!slices.hasNext());
- } else {
- Assert.assertTrue(slices.hasNext());
- }
- }
-
- Slice newSlice = slices.next();
- Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
deleted file mode 100644
index 161b6f6..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package org.apache.kylin.streaming.invertedindex;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.streaming.Stream;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- *
- * A IIStreamBuilder that can hold all the built slices in form of IIRow
- * This is only for test use
- */
-public class ToyIIStreamBuilder extends IIStreamBuilder {
- private List<IIRow> result;
-
- public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
- super(queue, null, desc, partitionId);
- this.result = result;
- }
-
- protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
- IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
- for (IIRow iiRow : codec.encodeKeyValue(slice)) {
- result.add(iiRow);
- }
- }
-
-}
[03/50] incubator-kylin git commit: KYLIN-653 quick fix compile
Posted by li...@apache.org.
KYLIN-653 quick fix compile
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c8f4c2a5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c8f4c2a5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c8f4c2a5
Branch: refs/heads/streaming-localdict
Commit: c8f4c2a513ac51621046541a02a92fea9d41c7af
Parents: 1b52438
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 16:09:33 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 16:09:33 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c8f4c2a5/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
index 0a163e2..41b21a7 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
@@ -96,7 +96,7 @@ public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text
byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
cube = CubeManager.getInstance(config).getCube(cubeName);
cubeDesc = cube.getDescriptor();
[37/50] incubator-kylin git commit: KYLIN-653 add ii2basecuboid mapper
Posted by li...@apache.org.
KYLIN-653 add ii2basecuboid mapper
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/929b986d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/929b986d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/929b986d
Branch: refs/heads/streaming-localdict
Commit: 929b986d6d7396204d443aa6e420dd745a217611
Parents: d1c115d
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 15:56:10 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:56:10 2015 +0800
----------------------------------------------------------------------
.../invertedindex/index/RawTableRecord.java | 2 +
.../kylin/job/hadoop/cube/BaseCuboidJob.java | 2 +-
.../kylin/job/hadoop/cube/BaseCuboidMapper.java | 246 -------------------
.../job/hadoop/cube/BaseCuboidMapperBase.java | 205 ++++++++++++++++
.../job/hadoop/cube/HiveToBaseCuboidMapper.java | 49 ++++
.../job/hadoop/cube/IIToBaseCuboidMapper.java | 109 ++++++++
.../kylin/job/hadoop/cubev2/InMemCuboidJob.java | 5 -
.../cube/BaseCuboidMapperPerformanceTest.java | 65 -----
.../job/hadoop/cube/BaseCuboidMapperTest.java | 145 -----------
.../HiveToBaseCuboidMapperPerformanceTest.java | 65 +++++
.../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
11 files changed, 576 insertions(+), 462 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
index 895fd4f..ccfc5b1 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
@@ -18,6 +18,7 @@
package org.apache.kylin.invertedindex.index;
+import com.google.common.base.Preconditions;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.dict.Dictionary;
import org.apache.kylin.metadata.measure.fixedlen.FixedLenMeasureCodec;
@@ -100,6 +101,7 @@ public class RawTableRecord implements Cloneable {
bytes.set(buf, digest.offset(col), digest.length(col));
}
+
@Override
public Object clone() {
return new RawTableRecord(this);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
index 5f7802a..06046c5 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.util.ToolRunner;
public class BaseCuboidJob extends CuboidJob {
public BaseCuboidJob() {
- this.setMapperClass(BaseCuboidMapper.class);
+ this.setMapperClass(HiveToBaseCuboidMapper.class);
}
public static void main(String[] args) throws Exception {
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
deleted file mode 100644
index a023c0c..0000000
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.mr.KylinMapper;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.CubeSegment;
-import org.apache.kylin.common.util.BytesSplitter;
-import org.apache.kylin.common.util.SplittedBytes;
-import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.cube.kv.AbstractRowKeyEncoder;
-import org.apache.kylin.cube.kv.RowConstants;
-import org.apache.kylin.metadata.measure.MeasureCodec;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.metadata.model.MeasureDesc;
-import org.apache.kylin.job.constant.BatchConstants;
-import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.metadata.model.FunctionDesc;
-import org.apache.kylin.metadata.model.ParameterDesc;
-import org.apache.kylin.metadata.model.SegmentStatusEnum;
-
-/**
- * @author George Song (ysong1)
- */
-public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text> {
-
- private static final Logger logger = LoggerFactory.getLogger(BaseCuboidMapper.class);
-
- public static final byte[] HIVE_NULL = Bytes.toBytes("\\N");
- public static final byte[] ONE = Bytes.toBytes("1");
-
- private String cubeName;
- private String segmentName;
- private Cuboid baseCuboid;
- private CubeInstance cube;
- private CubeDesc cubeDesc;
- private CubeSegment cubeSegment;
- private List<byte[]> nullBytes;
-
- private CubeJoinedFlatTableDesc intermediateTableDesc;
- private String intermediateTableRowDelimiter;
- private byte byteRowDelimiter;
-
- private int counter;
- private int errorRecordCounter;
- private Text outputKey = new Text();
- private Text outputValue = new Text();
- private Object[] measures;
- private byte[][] keyBytesBuf;
- private ByteBuffer valueBuf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
-
- private BytesSplitter bytesSplitter;
- private AbstractRowKeyEncoder rowKeyEncoder;
- private MeasureCodec measureCodec;
-
- @Override
- protected void setup(Context context) throws IOException {
- super.publishConfiguration(context.getConfiguration());
-
- cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
- segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
- intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
- if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
- throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
- }
-
- byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
-
- KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
-
- cube = CubeManager.getInstance(config).getCube(cubeName);
- cubeDesc = cube.getDescriptor();
- cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
-
- long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
- baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
-
- intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
-
- bytesSplitter = new BytesSplitter(200, 4096);
- rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);
-
- measureCodec = new MeasureCodec(cubeDesc.getMeasures());
- measures = new Object[cubeDesc.getMeasures().size()];
-
- int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
- keyBytesBuf = new byte[colCount][];
-
- initNullBytes();
- }
-
- private void initNullBytes() {
- nullBytes = Lists.newArrayList();
- nullBytes.add(HIVE_NULL);
- String[] nullStrings = cubeDesc.getNullStrings();
- if (nullStrings != null) {
- for (String s : nullStrings) {
- nullBytes.add(Bytes.toBytes(s));
- }
- }
- }
-
- private boolean isNull(byte[] v) {
- for (byte[] nullByte : nullBytes) {
- if (Bytes.equals(v, nullByte))
- return true;
- }
- return false;
- }
-
- private byte[] buildKey(SplittedBytes[] splitBuffers) {
- int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
- for (int i = 0; i < baseCuboid.getColumns().size(); i++) {
- int index = rowKeyColumnIndexes[i];
- keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length);
- if (isNull(keyBytesBuf[i])) {
- keyBytesBuf[i] = null;
- }
- }
- return rowKeyEncoder.encode(keyBytesBuf);
- }
-
- private void buildValue(SplittedBytes[] splitBuffers) {
-
- for (int i = 0; i < measures.length; i++) {
- byte[] valueBytes = getValueBytes(splitBuffers, i);
- measures[i] = measureCodec.getSerializer(i).valueOf(valueBytes);
- }
-
- valueBuf.clear();
- measureCodec.encode(measures, valueBuf);
- }
-
- private byte[] getValueBytes(SplittedBytes[] splitBuffers, int measureIdx) {
- MeasureDesc desc = cubeDesc.getMeasures().get(measureIdx);
- FunctionDesc func = desc.getFunction();
- ParameterDesc paramDesc = func.getParameter();
- int[] flatTableIdx = intermediateTableDesc.getMeasureColumnIndexes()[measureIdx];
-
- byte[] result = null;
-
- // constant
- if (flatTableIdx == null) {
- result = Bytes.toBytes(paramDesc.getValue());
- }
- // column values
- else {
- // for multiple columns, their values are joined
- for (int i = 0; i < flatTableIdx.length; i++) {
- SplittedBytes split = splitBuffers[flatTableIdx[i]];
- if (result == null) {
- result = Arrays.copyOf(split.value, split.length);
- } else {
- byte[] newResult = new byte[result.length + split.length];
- System.arraycopy(result, 0, newResult, 0, result.length);
- System.arraycopy(split.value, 0, newResult, result.length, split.length);
- result = newResult;
- }
- }
- }
-
- if (func.isCount() || func.isHolisticCountDistinct()) {
- // note for holistic count distinct, this value will be ignored
- result = ONE;
- }
-
- if (isNull(result)) {
- result = null;
- }
-
- return result;
- }
-
- @Override
- public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
- counter++;
- if (counter % BatchConstants.COUNTER_MAX == 0) {
- logger.info("Handled " + counter + " records!");
- }
-
- try {
- bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
- outputKV(context);
-
- } catch (Exception ex) {
- handleErrorRecord(bytesSplitter, ex);
- }
- }
-
- private void outputKV(Context context) throws IOException, InterruptedException {
- intermediateTableDesc.sanityCheck(bytesSplitter);
-
- byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
- outputKey.set(rowKey, 0, rowKey.length);
-
- buildValue(bytesSplitter.getSplitBuffers());
- outputValue.set(valueBuf.array(), 0, valueBuf.position());
- context.write(outputKey, outputValue);
- }
-
- private void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
-
- System.err.println("Insane record: " + bytesSplitter);
- ex.printStackTrace(System.err);
-
- errorRecordCounter++;
- if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
- if (ex instanceof IOException)
- throw (IOException) ex;
- else if (ex instanceof RuntimeException)
- throw (RuntimeException) ex;
- else
- throw new RuntimeException("", ex);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
new file mode 100644
index 0000000..e2972dc
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
@@ -0,0 +1,205 @@
+package org.apache.kylin.job.hadoop.cube;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.mr.KylinMapper;
+import org.apache.kylin.common.util.BytesSplitter;
+import org.apache.kylin.common.util.SplittedBytes;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.kv.AbstractRowKeyEncoder;
+import org.apache.kylin.cube.kv.RowConstants;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.ParameterDesc;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ */
+public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, Text, Text> {
+ protected static final Logger logger = LoggerFactory.getLogger(HiveToBaseCuboidMapper.class);
+ public static final byte[] HIVE_NULL = Bytes.toBytes("\\N");
+ public static final byte[] ONE = Bytes.toBytes("1");
+ protected String cubeName;
+ protected String segmentName;
+ protected Cuboid baseCuboid;
+ protected CubeInstance cube;
+ protected CubeDesc cubeDesc;
+ protected CubeSegment cubeSegment;
+ protected List<byte[]> nullBytes;
+ protected CubeJoinedFlatTableDesc intermediateTableDesc;
+ protected String intermediateTableRowDelimiter;
+ protected byte byteRowDelimiter;
+ protected int counter;
+ protected Object[] measures;
+ protected byte[][] keyBytesBuf;
+ protected BytesSplitter bytesSplitter;
+ protected AbstractRowKeyEncoder rowKeyEncoder;
+ protected MeasureCodec measureCodec;
+ private int errorRecordCounter;
+ private Text outputKey = new Text();
+ private Text outputValue = new Text();
+ private ByteBuffer valueBuf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ super.publishConfiguration(context.getConfiguration());
+
+ cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
+ segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
+ intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
+ if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
+ throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
+ }
+
+ byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
+
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+ cube = CubeManager.getInstance(config).getCube(cubeName);
+ cubeDesc = cube.getDescriptor();
+ cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
+
+ long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+ baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
+
+ intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
+
+ bytesSplitter = new BytesSplitter(200, 4096);
+ rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);
+
+ measureCodec = new MeasureCodec(cubeDesc.getMeasures());
+ measures = new Object[cubeDesc.getMeasures().size()];
+
+ int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
+ keyBytesBuf = new byte[colCount][];
+
+ initNullBytes();
+ }
+
+ private void initNullBytes() {
+ nullBytes = Lists.newArrayList();
+ nullBytes.add(HIVE_NULL);
+ String[] nullStrings = cubeDesc.getNullStrings();
+ if (nullStrings != null) {
+ for (String s : nullStrings) {
+ nullBytes.add(Bytes.toBytes(s));
+ }
+ }
+ }
+
+ private boolean isNull(byte[] v) {
+ for (byte[] nullByte : nullBytes) {
+ if (Bytes.equals(v, nullByte))
+ return true;
+ }
+ return false;
+ }
+
+ private byte[] buildKey(SplittedBytes[] splitBuffers) {
+ int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
+ for (int i = 0; i < baseCuboid.getColumns().size(); i++) {
+ int index = rowKeyColumnIndexes[i];
+ keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length);
+ if (isNull(keyBytesBuf[i])) {
+ keyBytesBuf[i] = null;
+ }
+ }
+ return rowKeyEncoder.encode(keyBytesBuf);
+ }
+
+ private void buildValue(SplittedBytes[] splitBuffers) {
+
+ for (int i = 0; i < measures.length; i++) {
+ byte[] valueBytes = getValueBytes(splitBuffers, i);
+ measures[i] = measureCodec.getSerializer(i).valueOf(valueBytes);
+ }
+
+ valueBuf.clear();
+ measureCodec.encode(measures, valueBuf);
+ }
+
+ private byte[] getValueBytes(SplittedBytes[] splitBuffers, int measureIdx) {
+ MeasureDesc desc = cubeDesc.getMeasures().get(measureIdx);
+ FunctionDesc func = desc.getFunction();
+ ParameterDesc paramDesc = func.getParameter();
+ int[] flatTableIdx = intermediateTableDesc.getMeasureColumnIndexes()[measureIdx];
+
+ byte[] result = null;
+
+ // constant
+ if (flatTableIdx == null) {
+ result = Bytes.toBytes(paramDesc.getValue());
+ }
+ // column values
+ else {
+ // for multiple columns, their values are joined
+ for (int i = 0; i < flatTableIdx.length; i++) {
+ SplittedBytes split = splitBuffers[flatTableIdx[i]];
+ if (result == null) {
+ result = Arrays.copyOf(split.value, split.length);
+ } else {
+ byte[] newResult = new byte[result.length + split.length];
+ System.arraycopy(result, 0, newResult, 0, result.length);
+ System.arraycopy(split.value, 0, newResult, result.length, split.length);
+ result = newResult;
+ }
+ }
+ }
+
+ if (func.isCount() || func.isHolisticCountDistinct()) {
+ // note for holistic count distinct, this value will be ignored
+ result = ONE;
+ }
+
+ if (isNull(result)) {
+ result = null;
+ }
+
+ return result;
+ }
+
+ protected void outputKV(Context context) throws IOException, InterruptedException {
+ intermediateTableDesc.sanityCheck(bytesSplitter);
+
+ byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
+ outputKey.set(rowKey, 0, rowKey.length);
+
+ buildValue(bytesSplitter.getSplitBuffers());
+ outputValue.set(valueBuf.array(), 0, valueBuf.position());
+ context.write(outputKey, outputValue);
+ }
+
+ protected void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
+
+ System.err.println("Insane record: " + bytesSplitter);
+ ex.printStackTrace(System.err);
+
+ errorRecordCounter++;
+ if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
+ if (ex instanceof IOException)
+ throw (IOException) ex;
+ else if (ex instanceof RuntimeException)
+ throw (RuntimeException) ex;
+ else
+ throw new RuntimeException("", ex);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
new file mode 100644
index 0000000..599dde8
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.job.constant.BatchConstants;
+
+/**
+ * @author George Song (ysong1)
+ */
+public class HiveToBaseCuboidMapper<KEYIN> extends BaseCuboidMapperBase<KEYIN, Text> {
+
+ @Override
+ public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
+ counter++;
+ if (counter % BatchConstants.COUNTER_MAX == 0) {
+ logger.info("Handled " + counter + " records!");
+ }
+
+ try {
+ //put a record into the shared bytesSplitter
+ bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
+ //take care of the data in bytesSplitter
+ outputKV(context);
+
+ } catch (Exception ex) {
+ handleErrorRecord(bytesSplitter, ex);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
new file mode 100644
index 0000000..68886c0
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Queue;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.FIFOIterable;
+import org.apache.kylin.common.util.SplittedBytes;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.RawTableRecord;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.*;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+
+/**
+ * honma
+ */
+public class IIToBaseCuboidMapper extends BaseCuboidMapperBase<ImmutableBytesWritable, Result> {
+ private Queue<IIRow> buffer = Lists.newLinkedList();
+ private Iterator<Slice> slices;
+
+ @Override
+ protected void setup(Context context) throws IOException {
+ super.setup(context);
+
+ Configuration conf = context.getConfiguration();
+ KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+ String iiName = conf.get(BatchConstants.CFG_II_NAME);
+ IIInstance ii = IIManager.getInstance(config).getII(iiName);
+ IIDesc iiDesc = ii.getDescriptor();
+
+ TableRecordInfo info = new TableRecordInfo(iiDesc);
+ KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
+ slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
+ }
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result cells, Context context) throws IOException, InterruptedException {
+ try {
+ IIRow iiRow = new IIRow();
+ for (Cell c : cells.rawCells()) {
+ iiRow.updateWith(c);
+ }
+ buffer.add(iiRow);
+
+ if (slices.hasNext()) {
+ Slice slice = slices.next();
+ TableRecordInfoDigest localDigest = slice.getInfo();
+ for (RawTableRecord record : slice) {
+
+ counter++;
+ if (counter % BatchConstants.COUNTER_MAX == 0) {
+ logger.info("Handled " + counter + " records!");
+ }
+
+ for (int indexInRecord = 0; indexInRecord < localDigest.getColumnCount(); ++indexInRecord) {
+ SplittedBytes columnBuffer = bytesSplitter.getSplitBuffer(indexInRecord);
+ if (!localDigest.isMetrics(indexInRecord)) {
+ String v = record.getValueMetric(indexInRecord);
+ byte[] metricBytes = v.getBytes();
+ System.arraycopy(metricBytes, 0, columnBuffer.value, 0, metricBytes.length);
+ columnBuffer.length = metricBytes.length;
+ } else {
+ Dictionary<?> dictionary = slice.getLocalDictionaries().get(indexInRecord);
+ Preconditions.checkArgument(columnBuffer.value.length > dictionary.getSizeOfValue(), "Column length too big");
+ int vid = record.getValueID(indexInRecord);
+ columnBuffer.length = dictionary.getValueBytesFromId(vid, columnBuffer.value, 0);
+ }
+ }
+
+ outputKV(context);
+ }
+ }
+ } catch (Exception ex) {
+ handleErrorRecord(bytesSplitter, ex);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
index f83e9d7..7a7c62e 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
@@ -23,10 +23,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
@@ -40,8 +37,6 @@ import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.job.constant.BatchConstants;
import org.apache.kylin.job.exception.JobException;
import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.job.hadoop.cube.BaseCuboidMapper;
-import org.apache.kylin.job.hadoop.cube.CuboidJob;
import org.apache.kylin.job.hadoop.cube.CuboidReducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
deleted file mode 100644
index 7826e86..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.Reader;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-/**
- * @author yangli9
- *
- */
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class BaseCuboidMapperPerformanceTest {
-
- String metadataUrl = "hbase:yadesk00:2181:/hbase-unsecure";
- String cubeName = "test_kylin_cube_with_slr";
- Path srcPath = new Path("/download/test_kylin_cube_with_slr_intermediate_table_64mb.seq");
-
- @Ignore("convenient trial tool for dev")
- @Test
- public void test() throws IOException, InterruptedException {
- Configuration hconf = new Configuration();
- BaseCuboidMapper mapper = new BaseCuboidMapper();
- Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
-
- mapper.setup(context);
-
- Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
- Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
- Text value = new Text();
-
- while (reader.next(key, value)) {
- mapper.map(key, value, context);
- }
-
- reader.close();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
deleted file mode 100644
index c3632b7..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import static org.junit.Assert.*;
-
-import java.io.File;
-import java.math.BigDecimal;
-import java.util.List;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.apache.kylin.job.constant.BatchConstants;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.kv.RowKeyDecoder;
-import org.apache.kylin.metadata.measure.MeasureCodec;
-import org.apache.kylin.metadata.model.MeasureDesc;
-
-/**
- * @author George Song (ysong1)
- *
- */
-public class BaseCuboidMapperTest extends LocalFileMetadataTestCase {
-
- MapDriver<Text, Text, Text, Text> mapDriver;
- String localTempDir = System.getProperty("java.io.tmpdir") + File.separator;
-
- @Before
- public void setUp() throws Exception {
- createTestMetadata();
-
- // hack for distributed cache
- FileUtils.deleteDirectory(new File("../job/meta"));
- FileUtils.copyDirectory(new File(getTestConfig().getMetadataUrl()), new File("../job/meta"));
-
- BaseCuboidMapper<Text> mapper = new BaseCuboidMapper<Text>();
- mapDriver = MapDriver.newMapDriver(mapper);
- }
-
- @After
- public void after() throws Exception {
- cleanupTestMetadata();
- FileUtils.deleteDirectory(new File("../job/meta"));
- }
-
- @Test
- public void testMapperWithHeader() throws Exception {
- String cubeName = "test_kylin_cube_with_slr_1_new_segment";
- String segmentName = "20130331080000_20131212080000";
- mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
- mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
- // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
- // metadata);
- mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
- List<Pair<Text, Text>> result = mapDriver.run();
-
- CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
- CubeInstance cube = cubeMgr.getCube(cubeName);
-
- assertEquals(1, result.size());
- Text rowkey = result.get(0).getFirst();
- byte[] key = rowkey.getBytes();
- byte[] header = Bytes.head(key, 26);
- byte[] sellerId = Bytes.tail(header, 18);
- byte[] cuboidId = Bytes.head(header, 8);
- byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
-
- RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
- decoder.decode(key);
- assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());
-
- assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
- assertEquals(511, Bytes.toLong(cuboidId));
- assertEquals(22, restKey.length);
-
- verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
- }
-
- private void verifyMeasures(List<MeasureDesc> measures, Text valueBytes, String m1, String m2, String m3) {
- MeasureCodec codec = new MeasureCodec(measures);
- Object[] values = new Object[measures.size()];
- codec.decode(valueBytes, values);
- assertTrue(new BigDecimal(m1).equals(values[0]));
- assertTrue(new BigDecimal(m2).equals(values[1]));
- assertTrue(new BigDecimal(m3).equals(values[2]));
- }
-
- @Test
- public void testMapperWithNull() throws Exception {
- String cubeName = "test_kylin_cube_with_slr_1_new_segment";
- String segmentName = "20130331080000_20131212080000";
- mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
- mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
- // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
- // metadata);
- mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
- List<Pair<Text, Text>> result = mapDriver.run();
-
- CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
- CubeInstance cube = cubeMgr.getCube(cubeName);
-
- assertEquals(1, result.size());
- Text rowkey = result.get(0).getFirst();
- byte[] key = rowkey.getBytes();
- byte[] header = Bytes.head(key, 26);
- byte[] sellerId = Bytes.tail(header, 18);
- byte[] cuboidId = Bytes.head(header, 8);
- byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
-
- RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
- decoder.decode(key);
- assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());
-
- assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
- assertEquals(511, Bytes.toLong(cuboidId));
- assertEquals(22, restKey.length);
-
- verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
new file mode 100644
index 0000000..cf9cfe0
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * @author yangli9
+ *
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class HiveToBaseCuboidMapperPerformanceTest {
+
+ String metadataUrl = "hbase:yadesk00:2181:/hbase-unsecure";
+ String cubeName = "test_kylin_cube_with_slr";
+ Path srcPath = new Path("/download/test_kylin_cube_with_slr_intermediate_table_64mb.seq");
+
+ @Ignore("convenient trial tool for dev")
+ @Test
+ public void test() throws IOException, InterruptedException {
+ Configuration hconf = new Configuration();
+ HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
+ Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
+
+ mapper.setup(context);
+
+ Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
+ Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
+ Text value = new Text();
+
+ while (reader.next(key, value)) {
+ mapper.map(key, value, context);
+ }
+
+ reader.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
new file mode 100644
index 0000000..f906fcb
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.kv.RowKeyDecoder;
+import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.metadata.model.MeasureDesc;
+
+/**
+ * @author George Song (ysong1)
+ *
+ */
+public class HiveToBaseCuboidMapperTest extends LocalFileMetadataTestCase {
+
+ MapDriver<Text, Text, Text, Text> mapDriver;
+ String localTempDir = System.getProperty("java.io.tmpdir") + File.separator;
+
+ @Before
+ public void setUp() throws Exception {
+ createTestMetadata();
+
+ // hack for distributed cache
+ FileUtils.deleteDirectory(new File("../job/meta"));
+ FileUtils.copyDirectory(new File(getTestConfig().getMetadataUrl()), new File("../job/meta"));
+
+ HiveToBaseCuboidMapper<Text> mapper = new HiveToBaseCuboidMapper<Text>();
+ mapDriver = MapDriver.newMapDriver(mapper);
+ }
+
+ @After
+ public void after() throws Exception {
+ cleanupTestMetadata();
+ FileUtils.deleteDirectory(new File("../job/meta"));
+ }
+
+ @Test
+ public void testMapperWithHeader() throws Exception {
+ String cubeName = "test_kylin_cube_with_slr_1_new_segment";
+ String segmentName = "20130331080000_20131212080000";
+ mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+ mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
+ // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
+ // metadata);
+ mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
+ List<Pair<Text, Text>> result = mapDriver.run();
+
+ CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
+ CubeInstance cube = cubeMgr.getCube(cubeName);
+
+ assertEquals(1, result.size());
+ Text rowkey = result.get(0).getFirst();
+ byte[] key = rowkey.getBytes();
+ byte[] header = Bytes.head(key, 26);
+ byte[] sellerId = Bytes.tail(header, 18);
+ byte[] cuboidId = Bytes.head(header, 8);
+ byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
+
+ RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
+ decoder.decode(key);
+ assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());
+
+ assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
+ assertEquals(511, Bytes.toLong(cuboidId));
+ assertEquals(22, restKey.length);
+
+ verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
+ }
+
+ private void verifyMeasures(List<MeasureDesc> measures, Text valueBytes, String m1, String m2, String m3) {
+ MeasureCodec codec = new MeasureCodec(measures);
+ Object[] values = new Object[measures.size()];
+ codec.decode(valueBytes, values);
+ assertTrue(new BigDecimal(m1).equals(values[0]));
+ assertTrue(new BigDecimal(m2).equals(values[1]));
+ assertTrue(new BigDecimal(m3).equals(values[2]));
+ }
+
+ @Test
+ public void testMapperWithNull() throws Exception {
+ String cubeName = "test_kylin_cube_with_slr_1_new_segment";
+ String segmentName = "20130331080000_20131212080000";
+ mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+ mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
+ // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
+ // metadata);
+ mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
+ List<Pair<Text, Text>> result = mapDriver.run();
+
+ CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
+ CubeInstance cube = cubeMgr.getCube(cubeName);
+
+ assertEquals(1, result.size());
+ Text rowkey = result.get(0).getFirst();
+ byte[] key = rowkey.getBytes();
+ byte[] header = Bytes.head(key, 26);
+ byte[] sellerId = Bytes.tail(header, 18);
+ byte[] cuboidId = Bytes.head(header, 8);
+ byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
+
+ RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
+ decoder.decode(key);
+ assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());
+
+ assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
+ assertEquals(511, Bytes.toLong(cuboidId));
+ assertEquals(22, restKey.length);
+
+ verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
+ }
+}
[25/50] incubator-kylin git commit: accept bigint as II record
timestamp
Posted by li...@apache.org.
accept bigint as II record timestamp
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/71bbd0c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/71bbd0c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/71bbd0c6
Branch: refs/heads/streaming-localdict
Commit: 71bbd0c6ceabdb53fdf485da35b60508578f9bd2
Parents: d136933
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 13:55:39 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 13:55:39 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/dict/DateStrDictionary.java | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/71bbd0c6/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 7cace15..95f67ff 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -89,7 +89,9 @@ public class DateStrDictionary extends Dictionary<String> {
}
public static long stringToMillis(String str) {
- if (str.length() == 10) {
+ if (isAllDigits(str)) {
+ return Long.parseLong(str);
+ } else if (str.length() == 10) {
return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
} else if (str.length() == 19) {
return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
@@ -99,6 +101,14 @@ public class DateStrDictionary extends Dictionary<String> {
throw new IllegalArgumentException("there is no valid date pattern for:" + str);
}
}
+
+ private static boolean isAllDigits(String str) {
+ for (int i = 0, n = str.length(); i < n; i++) {
+ if (Character.isDigit(str.charAt(i)) == false)
+ return false;
+ }
+ return true;
+ }
// ============================================================================