You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/03/28 01:04:38 UTC

[01/50] incubator-kylin git commit: KYLIN-653 add a special IIKeyValueCodecs and refactor FactDistinctColumnsMapper

Repository: incubator-kylin
Updated Branches:
  refs/heads/streaming-localdict cc1fed44d -> 1ad301044


KYLIN-653 add a special IIKeyValueCodecs and refactor FactDistinctColumnsMapper


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8e0695b2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8e0695b2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8e0695b2

Branch: refs/heads/streaming-localdict
Commit: 8e0695b26517d08675d7417c40ee773561c9e3cf
Parents: 9dd1512
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 15:16:41 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 15:17:32 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/model/IIKeyValueCodec.java    |  91 ++++++++-------
 .../model/IIKeyValueCodecWithState.java         |  68 +++++++++++
 .../hadoop/cube/FactDistinctColumnsMapper.java  | 115 ++++---------------
 .../cube/FactDistinctColumnsMapperBase.java     |  81 +++++++++++++
 4 files changed, 216 insertions(+), 139 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
index eedda4b..d9e20c4 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
@@ -37,42 +37,40 @@ import java.util.*;
  */
 public class IIKeyValueCodec implements KeyValueCodec {
 
-	public static final int SHARD_LEN = 2;
-	public static final int TIMEPART_LEN = 8;
-	public static final int COLNO_LEN = 2;
-    private final TableRecordInfoDigest digest;
+    public static final int SHARD_LEN = 2;
+    public static final int TIMEPART_LEN = 8;
+    public static final int COLNO_LEN = 2;
+    protected final TableRecordInfoDigest digest;
 
     public IIKeyValueCodec(TableRecordInfoDigest digest) {
         this.digest = digest;
-	}
+    }
 
     @Override
-	public Collection<IIRow> encodeKeyValue(Slice slice) {
-		ArrayList<IIRow> result = Lists
-				.newArrayList();
-		ColumnValueContainer[] containers = slice.getColumnValueContainers();
-		for (int col = 0; col < containers.length; col++) {
-			if (containers[col] instanceof CompressedValueContainer) {
+    public Collection<IIRow> encodeKeyValue(Slice slice) {
+        ArrayList<IIRow> result = Lists.newArrayList();
+        ColumnValueContainer[] containers = slice.getColumnValueContainers();
+        for (int col = 0; col < containers.length; col++) {
+            if (containers[col] instanceof CompressedValueContainer) {
                 final IIRow row = collectKeyValues(slice, col, (CompressedValueContainer) containers[col]);
                 result.add(row);
             } else {
-                throw new IllegalArgumentException("Unknown container class "
-						+ containers[col].getClass());
+                throw new IllegalArgumentException("Unknown container class " + containers[col].getClass());
             }
         }
-		return result;
-	}
+        return result;
+    }
 
-	private IIRow collectKeyValues(Slice slice, int col, CompressedValueContainer container) {
-		ImmutableBytesWritable key = encodeKey(slice.getShard(), slice.getTimestamp(), col);
-		ImmutableBytesWritable value = container.toBytes();
+    private IIRow collectKeyValues(Slice slice, int col, CompressedValueContainer container) {
+        ImmutableBytesWritable key = encodeKey(slice.getShard(), slice.getTimestamp(), col);
+        ImmutableBytesWritable value = container.toBytes();
         final Dictionary<?> dictionary = slice.getLocalDictionaries().get(col);
         if (dictionary == null) {
             return new IIRow(key, value, new ImmutableBytesWritable(BytesUtil.EMPTY_BYTE_ARRAY));
         } else {
             return new IIRow(key, value, serialize(dictionary));
         }
-	}
+    }
 
     private static Dictionary<?> deserialize(ImmutableBytesWritable dictBytes) {
         try {
@@ -98,31 +96,31 @@ public class IIKeyValueCodec implements KeyValueCodec {
         }
     }
 
-	ImmutableBytesWritable encodeKey(short shard, long timestamp, int col) {
-		byte[] bytes = new byte[20];
-		int len = encodeKey(shard, timestamp, col, bytes, 0);
-		return new ImmutableBytesWritable(bytes, 0, len);
-	}
+    ImmutableBytesWritable encodeKey(short shard, long timestamp, int col) {
+        byte[] bytes = new byte[20];
+        int len = encodeKey(shard, timestamp, col, bytes, 0);
+        return new ImmutableBytesWritable(bytes, 0, len);
+    }
 
-	int encodeKey(short shard, long timestamp, int col, byte[] buf, int offset) {
-		int i = offset;
+    int encodeKey(short shard, long timestamp, int col, byte[] buf, int offset) {
+        int i = offset;
 
-		BytesUtil.writeUnsigned(shard, buf, i, SHARD_LEN);
-		i += SHARD_LEN;
-		BytesUtil.writeLong(timestamp, buf, i, TIMEPART_LEN);
-		i += TIMEPART_LEN;
+        BytesUtil.writeUnsigned(shard, buf, i, SHARD_LEN);
+        i += SHARD_LEN;
+        BytesUtil.writeLong(timestamp, buf, i, TIMEPART_LEN);
+        i += TIMEPART_LEN;
 
-		BytesUtil.writeUnsigned(col, buf, i, COLNO_LEN);
-		i += COLNO_LEN;
+        BytesUtil.writeUnsigned(col, buf, i, COLNO_LEN);
+        i += COLNO_LEN;
 
-		return i - offset;
-	}
+        return i - offset;
+    }
 
     @Override
-	public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+    public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
         return new IIRowDecoder(digest, kvs.iterator());
-//		return new Decoder(kvs, incompleteDigest);
-	}
+        //		return new Decoder(kvs, incompleteDigest);
+    }
 
     private static TableRecordInfoDigest createDigest(int nColumns, boolean[] isMetric, String[] dataTypes, Map<Integer, Dictionary<?>> dictionaryMap) {
         int[] dictMaxIds = new int[nColumns];
@@ -152,14 +150,16 @@ public class IIKeyValueCodec implements KeyValueCodec {
         return new TableRecordInfoDigest(nColumns, byteFormLen, offsets, dictMaxIds, lengths, isMetric, dataTypes);
     }
 
-    private static class IIRowDecoder implements Iterable<Slice> {
+    protected static class IIRowDecoder implements Iterable<Slice> {
 
-        private final TableRecordInfoDigest incompleteDigest;
-        private final Iterator<IIRow> iterator;
+        protected final TableRecordInfoDigest incompleteDigest;
+        protected final Iterator<IIRow> iiRowIterator;
+        protected Iterator<IIRow> feedingIterator;//this is for extending
 
-        private IIRowDecoder(TableRecordInfoDigest digest, Iterator<IIRow> iterator) {
+        protected IIRowDecoder(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
             this.incompleteDigest = digest;
-            this.iterator = iterator;
+            this.iiRowIterator = iiRowIterator;
+            this.feedingIterator = this.iiRowIterator;
         }
 
         @Override
@@ -167,7 +167,7 @@ public class IIKeyValueCodec implements KeyValueCodec {
             return new Iterator<Slice>() {
                 @Override
                 public boolean hasNext() {
-                    return iterator.hasNext();
+                    return iiRowIterator.hasNext();
                 }
 
                 @Override
@@ -181,8 +181,8 @@ public class IIKeyValueCodec implements KeyValueCodec {
                     short lastShard = 0;
                     long lastTimestamp = 0;
 
-                    while (iterator.hasNext() && columns < incompleteDigest.getColumnCount()) {
-                        final IIRow row = iterator.next();
+                    while (feedingIterator.hasNext() && columns < incompleteDigest.getColumnCount()) {
+                        final IIRow row = feedingIterator.next();
                         final ImmutableBytesWritable key = row.getKey();
                         int i = key.getOffset();
                         curShard = (short) BytesUtil.readUnsigned(key.get(), i, SHARD_LEN);
@@ -220,7 +220,6 @@ public class IIKeyValueCodec implements KeyValueCodec {
                     return slice;
                 }
 
-
                 @Override
                 public void remove() {
                     throw new UnsupportedOperationException();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
new file mode 100644
index 0000000..a8e149a
--- /dev/null
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -0,0 +1,68 @@
+package org.apache.kylin.invertedindex.model;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import com.google.common.base.Preconditions;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class IIKeyValueCodecWithState extends IIKeyValueCodec {
+
+    public IIKeyValueCodecWithState(TableRecordInfoDigest digest) {
+        super(digest);
+    }
+
+    @Override
+    public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+        return new IIRowDecoderWithState(digest, kvs.iterator());
+    }
+
+    protected static class IIRowDecoderWithState extends IIRowDecoder {
+
+        final ArrayList<IIRow> buffer = Lists.newArrayList();
+
+        private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
+            super(digest, iiRowIterator);
+            this.feedingIterator = buffer.iterator();
+        }
+
+        private Iterator<Slice> getSuperIterator() {
+            return super.iterator();
+        }
+
+        @Override
+        public Iterator<Slice> iterator() {
+            return new Iterator<Slice>() {
+                @Override
+                public boolean hasNext() {
+                    while (buffer.size() < incompleteDigest.getColumnCount() && iiRowIterator.hasNext()) {
+                        buffer.add(iiRowIterator.next());
+                    }
+                    return buffer.size() == incompleteDigest.getColumnCount();
+                }
+
+                @Override
+                public Slice next() {
+                    while (buffer.size() < incompleteDigest.getColumnCount() && iiRowIterator.hasNext()) {
+                        buffer.add(iiRowIterator.next());
+                    }
+                    Preconditions.checkArgument(buffer.size() == incompleteDigest.getColumnCount(), "not enough IIRows!");
+                    Slice ret = IIRowDecoderWithState.this.getSuperIterator().next();
+                    buffer.clear();
+                    return ret;
+                }
+
+                @Override
+                public void remove() {
+                    throw new UnsupportedOperationException();
+                }
+            };
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
index d36fb95..3a50249 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
@@ -18,108 +18,53 @@
 
 package org.apache.kylin.job.hadoop.cube;
 
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
-import java.util.Set;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.ShortWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hive.hcatalog.data.HCatRecord;
 import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
 import org.apache.hive.hcatalog.data.schema.HCatSchema;
 import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
-
-import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.common.mr.KylinMapper;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.cube.model.RowKeyDesc;
-import org.apache.kylin.dict.DictionaryManager;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
 import org.apache.kylin.dict.lookup.HiveTableReader;
 import org.apache.kylin.job.constant.BatchConstants;
-import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Lists;
 
 /**
  * @author yangli9
  */
-public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, ShortWritable, Text> {
-
-    private String cubeName;
-    private CubeInstance cube;
-    private CubeDesc cubeDesc;
-    private int[] factDictCols;
+public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
 
+    private HCatSchema schema = null;
     private CubeJoinedFlatTableDesc intermediateTableDesc;
 
-    private ShortWritable outputKey = new ShortWritable();
-    private Text outputValue = new Text();
-    private int errorRecordCounter;
-
-    private HCatSchema schema = null;
-    private CuboidScheduler cuboidScheduler = null;
-    private List<String> rowKeyValues = null;
-    private HyperLogLogPlusCounter hll;
-    private long baseCuboidId;
-    private int nRowKey;
-    private boolean collectStatistics = false;
+    protected boolean collectStatistics = false;
+    protected CuboidScheduler cuboidScheduler = null;
+    protected List<String> rowKeyValues = null;
+    protected HyperLogLogPlusCounter hll;
+    protected int nRowKey;
 
     @Override
     protected void setup(Context context) throws IOException {
-        super.publishConfiguration(context.getConfiguration());
-
-        Configuration conf = context.getConfiguration();
+        super.setup(context);
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
-        cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
-        collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
-        cube = CubeManager.getInstance(config).getCube(cubeName);
-        cubeDesc = cube.getDescriptor();
+        schema = HCatInputFormat.getTableSchema(context.getConfiguration());
         intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
-        cuboidScheduler = new CuboidScheduler(cubeDesc);
-
-        baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
-        Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
-        List<TblColRef> columns = baseCuboid.getColumns();
-
-        ArrayList<Integer> factDictCols = new ArrayList<Integer>();
-        RowKeyDesc rowkey = cubeDesc.getRowkey();
-        DictionaryManager dictMgr = DictionaryManager.getInstance(config);
-        for (int i = 0; i < columns.size(); i++) {
-            TblColRef col = columns.get(i);
-            if (rowkey.isUseDictionary(col) == false)
-                continue;
-
-            String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
-            if (cubeDesc.getModel().isFactTable(scanTable)) {
-                factDictCols.add(i);
-            }
-        }
-        this.factDictCols = new int[factDictCols.size()];
-        for (int i = 0; i < factDictCols.size(); i++)
-            this.factDictCols[i] = factDictCols.get(i);
 
-        schema = HCatInputFormat.getTableSchema(context.getConfiguration());
-        rowKeyValues = Lists.newArrayList();
-        nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
 
-        if(collectStatistics) {
+        collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
+        if (collectStatistics) {
+            cuboidScheduler = new CuboidScheduler(cubeDesc);
             hll = new HyperLogLogPlusCounter(16);
+            rowKeyValues = Lists.newArrayList();
+            nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
         }
     }
 
@@ -127,7 +72,7 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
     public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
         try {
             int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
-            HCatFieldSchema fieldSchema = null;
+            HCatFieldSchema fieldSchema;
             for (int i : factDictCols) {
                 outputKey.set((short) i);
                 fieldSchema = schema.get(flatTableIndexes[i]);
@@ -142,28 +87,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
             handleErrorRecord(record, ex);
         }
 
-        if(collectStatistics) {
+        if (collectStatistics) {
             String[] row = HiveTableReader.getRowAsStringArray(record);
             putRowKeyToHLL(row, baseCuboidId);
         }
     }
 
-    private void handleErrorRecord(HCatRecord record, Exception ex) throws IOException {
-
-        System.err.println("Insane record: " + record.getAll());
-        ex.printStackTrace(System.err);
-
-        errorRecordCounter++;
-        if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
-            if (ex instanceof IOException)
-                throw (IOException) ex;
-            else if (ex instanceof RuntimeException)
-                throw (RuntimeException) ex;
-            else
-                throw new RuntimeException("", ex);
-        }
-    }
-
     private void putRowKeyToHLL(String[] row, long cuboidId) {
         rowKeyValues.clear();
         long mask = Long.highestOneBit(baseCuboidId);
@@ -184,8 +113,9 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
 
     }
 
-    protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
-        if(collectStatistics) {
+    @Override
+    protected void cleanup(Context context) throws IOException, InterruptedException {
+        if (collectStatistics) {
             // output hll to reducer, key is -1
             // keyBuf = Bytes.toBytes(-1);
             outputKey.set((short) -1);
@@ -196,5 +126,4 @@ public class FactDistinctColumnsMapper<KEYIN> extends KylinMapper<KEYIN, HCatRec
         }
     }
 
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e0695b2/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
new file mode 100644
index 0000000..603277c
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -0,0 +1,81 @@
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.mr.KylinMapper;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.RowKeyDesc;
+import org.apache.kylin.dict.DictionaryManager;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.model.TblColRef;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, ShortWritable, Text> {
+
+    protected String cubeName;
+    protected CubeInstance cube;
+    protected CubeDesc cubeDesc;
+    protected long baseCuboidId;
+    protected List<TblColRef> columns;
+    protected ArrayList<Integer> factDictCols;
+
+    protected ShortWritable outputKey = new ShortWritable();
+    protected Text outputValue = new Text();
+    protected int errorRecordCounter =0;
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        Configuration conf = context.getConfiguration();
+        publishConfiguration(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+
+        cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
+        cube = CubeManager.getInstance(config).getCube(cubeName);
+        cubeDesc = cube.getDescriptor();
+        baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+        columns = Cuboid.findById(cubeDesc, baseCuboidId).getColumns();
+
+        factDictCols = new ArrayList<Integer>();
+        RowKeyDesc rowKey = cubeDesc.getRowkey();
+        DictionaryManager dictMgr = DictionaryManager.getInstance(config);
+        for (int i = 0; i < columns.size(); i++) {
+            TblColRef col = columns.get(i);
+            if (!rowKey.isUseDictionary(col))
+                continue;
+
+            String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
+            if (cubeDesc.getModel().isFactTable(scanTable)) {
+                factDictCols.add(i);
+            }
+        }
+    }
+
+    protected void handleErrorRecord(HCatRecord record, Exception ex) throws IOException {
+
+        System.err.println("Insane record: " + record.getAll());
+        ex.printStackTrace(System.err);
+
+        errorRecordCounter++;
+        if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
+            if (ex instanceof IOException)
+                throw (IOException) ex;
+            else if (ex instanceof RuntimeException)
+                throw (RuntimeException) ex;
+            else
+                throw new RuntimeException("", ex);
+        }
+    }
+}


[10/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/227edf72
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/227edf72
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/227edf72

Branch: refs/heads/streaming-localdict
Commit: 227edf7275b0261720a188b05181d35f85fb4f5a
Parents: 21b8f0f 3bf6b37
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 18:04:29 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 18:04:29 2015 +0800

----------------------------------------------------------------------
 .../common/hll/HyperLogLogPlusCounter.java      | 29 ++++++++++++++++++--
 .../org/apache/kylin/common/util/BytesUtil.java | 16 +++++------
 .../apache/kylin/common/util/BytesUtilTest.java | 20 ++++++++++++++
 .../metadata/model_desc/kylin_sales_model.json  | 17 ++++++++++++
 .../kylin/metadata/model/DimensionDesc.java     | 12 +++++---
 5 files changed, 80 insertions(+), 14 deletions(-)
----------------------------------------------------------------------



[20/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Conflicts:
	invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
	invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodec.java
	job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
	job/src/test/java/org/apache/kylin/job/BuildCubeWithStreamTest.java
	storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
	storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
	streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
	streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
	streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7f73abe5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7f73abe5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7f73abe5

Branch: refs/heads/streaming-localdict
Commit: 7f73abe5c53fc165ff01b920850fe4caf8ab9e0d
Parents: 959d031 7088724
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 11:39:24 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 11:39:24 2015 +0800

----------------------------------------------------------------------
 .../common/hll/HyperLogLogPlusCounter.java      |  29 ++-
 .../kylin/common/persistence/ResourceStore.java |   1 +
 .../org/apache/kylin/common/util/ByteArray.java |   2 +-
 .../org/apache/kylin/common/util/BytesUtil.java |  16 +-
 .../apache/kylin/common/util/BytesUtilTest.java |  20 ++
 .../java/org/apache/kylin/dict/Dictionary.java  |  31 ++--
 .../org/apache/kylin/dict/TrieDictionary.java   |  48 ++---
 .../apache/kylin/dict/NumberDictionaryTest.java |   2 +-
 .../metadata/model_desc/kylin_sales_model.json  |  17 ++
 .../localmeta/streaming/kafka_test.json         |  15 ++
 .../apache/kylin/invertedindex/IIInstance.java  |  12 ++
 .../apache/kylin/invertedindex/IIManager.java   |  17 +-
 .../invertedindex/index/BatchSliceBuilder.java  |   8 +-
 .../model/IIJoinedFlatTableDesc.java            |  12 +-
 .../invertedindex/model/IIKeyValueCodec.java    |  91 +++++----
 .../model/IIKeyValueCodecWithState.java         |  68 +++++++
 .../apache/kylin/invertedindex/model/IIRow.java |  13 ++
 .../org/apache/kylin/job/JoinedFlatTable.java   |   1 -
 .../kylin/job/constant/BatchConstants.java      |   5 +
 .../kylin/job/constant/ExecutableConstants.java |   1 +
 .../apache/kylin/job/cube/CubingJobBuilder.java |  85 ++++++---
 .../kylin/job/hadoop/AbstractHadoopJob.java     |   5 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java |   2 +-
 .../kylin/job/hadoop/cube/CubeHFileMapper.java  |   2 +-
 .../kylin/job/hadoop/cube/CuboidReducer.java    |   2 +-
 .../cube/FactDistinctColumnsCombiner.java       |  26 ++-
 .../job/hadoop/cube/FactDistinctColumnsJob.java |  14 +-
 .../hadoop/cube/FactDistinctColumnsMapper.java  | 139 --------------
 .../cube/FactDistinctColumnsMapperBase.java     |  81 ++++++++
 .../hadoop/cube/FactDistinctColumnsReducer.java | 143 ++++++++++++---
 .../cube/FactDistinctHiveColumnsMapper.java     | 148 +++++++++++++++
 .../cube/FactDistinctIIColumnsMapper.java       | 129 +++++++++++++
 .../job/hadoop/cube/MergeCuboidMapper.java      |   2 +-
 .../kylin/job/hadoop/cube/NDCuboidMapper.java   |   2 +-
 .../job/hadoop/cube/NewBaseCuboidMapper.java    |   2 +-
 .../hadoop/cubev2/BuildDictionaryMapper.java    |   2 +-
 .../kylin/job/hadoop/cubev2/InMemCuboidJob.java | 183 +++++++++++++++++++
 .../job/hadoop/cubev2/InMemCuboidMapper.java    | 163 ++++++++++-------
 .../job/hadoop/cubev2/InMemCuboidReducer.java   |  82 +++++++++
 .../invertedindex/InvertedIndexMapper.java      |   2 +-
 .../invertedindex/InvertedIndexPartitioner.java |   2 +-
 .../invertedindex/InvertedIndexReducer.java     |   2 +-
 .../kylin/job/streaming/StreamingBootstrap.java | 117 ++++++++++++
 .../kylin/job/streaming/StreamingCLI.java       |  71 +++++++
 .../kylin/job/BuildCubeWithStreamTest.java      |   4 +-
 .../apache/kylin/job/IIStreamBuilderTest.java   |  80 ++++++++
 .../kylin/metadata/model/DimensionDesc.java     |  12 +-
 .../metadata/model/IJoinedFlatTableDesc.java    |   2 -
 .../metadata/model/IntermediateColumnDesc.java  |   4 +
 pom.xml                                         |   1 +
 .../gridtable/GTDictionaryCodeSystem.java       |  72 +++++---
 .../endpoint/HbaseServerKVIterator.java         |   9 +-
 streaming/pom.xml                               |   8 +
 .../apache/kylin/streaming/BrokerConfig.java    |  78 ++++++++
 .../kylin/streaming/JsonStreamParser.java       |  73 ++++++++
 .../org/apache/kylin/streaming/KafkaConfig.java |  99 +++++-----
 .../apache/kylin/streaming/KafkaConsumer.java   |  22 +--
 .../apache/kylin/streaming/KafkaRequester.java  | 128 +++++++------
 .../apache/kylin/streaming/StreamBuilder.java   |   9 +
 .../apache/kylin/streaming/StreamManager.java   | 114 ++++++++++++
 .../apache/kylin/streaming/StreamParser.java    |  47 +++++
 .../kylin/streaming/StringStreamParser.java     |  55 ++++++
 .../kylin/streaming/cube/CubeStreamBuilder.java |  37 ++--
 .../invertedindex/IIStreamBuilder.java          |   6 +-
 .../kylin/streaming/EternalStreamProducer.java  |   5 +-
 .../apache/kylin/streaming/KafkaBaseTest.java   |  23 ---
 .../apache/kylin/streaming/KafkaConfigTest.java |  65 -------
 .../kylin/streaming/KafkaConsumerTest.java      |   8 +-
 .../kylin/streaming/KafkaRequesterTest.java     |  11 +-
 .../kylin/streaming/Nous/NousMessageTest.java   |   4 +-
 .../kylin/streaming/OneOffStreamProducer.java   |   3 +-
 .../kylin/streaming/StreamManagerTest.java      |  69 +++++++
 .../invertedindex/IIStreamBuilderTest.java      |  41 -----
 .../invertedindex/PrintOutStreamBuilder.java    |  67 +++++++
 .../kafka_streaming_test/kafka.properties       |  10 -
 75 files changed, 2250 insertions(+), 731 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
----------------------------------------------------------------------
diff --cc dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
index a931359,a931359..815b06d
--- a/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/Dictionary.java
@@@ -73,15 -73,15 +73,16 @@@ abstract public class Dictionary<T> imp
      }
  
      /**
--     * Returns the ID integer of given value. In case of not found - if
--     * roundingFlag=0, throw IllegalArgumentException; - if roundingFlag<0, the
--     * closest smaller ID integer if exist; - if roundingFlag>0, the closest
--     * bigger ID integer if exist. The implementation often has cache, thus
--     * faster than the byte[] version getIdFromValueBytes()
++     * Returns the ID integer of given value. In case of not found
++     * - if roundingFlag=0, throw IllegalArgumentException;
++     * - if roundingFlag<0, the closest smaller ID integer if exist;
++     * - if roundingFlag>0, the closest bigger ID integer if exist.
++     * 
++     * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
       * 
       * @throws IllegalArgumentException
--     *             if value is not found in dictionary and rounding is off or
--     *             failed
++     *             if value is not found in dictionary and rounding is off;
++     *             or if rounding cannot find a smaller or bigger ID
       */
      final public int getIdFromValue(T value, int roundingFlag) {
          if (isNullObjectForm(value))
@@@ -119,16 -119,16 +120,16 @@@
      }
  
      /**
--     * A lower level API, return ID integer from raw value bytes. In case of not
--     * found - if roundingFlag=0, throw IllegalArgumentException; - if
--     * roundingFlag<0, the closest smaller ID integer if exist; - if
--     * roundingFlag>0, the closest bigger ID integer if exist. Bypassing the
--     * cache layer, this could be significantly slower than getIdFromValue(T
--     * value).
++     * A lower level API, return ID integer from raw value bytes. In case of not found 
++     * - if roundingFlag=0, throw IllegalArgumentException; 
++     * - if roundingFlag<0, the closest smaller ID integer if exist; 
++     * - if roundingFlag>0, the closest bigger ID integer if exist.
++     * 
++     * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value).
       * 
       * @throws IllegalArgumentException
--     *             if value is not found in dictionary and rounding is off or
--     *             failed
++     *             if value is not found in dictionary and rounding is off;
++     *             or if rounding cannot find a smaller or bigger ID
       */
      final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) {
          if (isNullByteForm(value, offset, len))

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --cc dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index ef845ce,ef845ce..bf40eac
--- a/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@@ -185,9 -185,9 +185,9 @@@ public class TrieDictionary<T> extends 
       * @param inpEnd
       *            -- end of input
       * @param roundingFlag
--     *            -- =0: return -1 if not found -- <0: return closest smaller if
--     *            not found, might be -1 -- >0: return closest bigger if not
--     *            found, might be nValues
++     *            -- =0: return -1 if not found
++     *            -- <0: return closest smaller if not found, return -1
++     *            -- >0: return closest bigger if not found, return nValues
       */
      private int lookupSeqNoFromValue(int n, byte[] inp, int o, int inpEnd, int roundingFlag) {
          if (inp.length == 0) // special 'empty' value
@@@ -199,11 -199,11 +199,8 @@@
              // match the current node, note [0] of node's value has been matched
              // when this node is selected by its parent
              int p = n + firstByteOffset; // start of node's value
--            int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of
--                                                                       // node's
--                                                                       // value
--            for (p++; p < end && o < inpEnd; p++, o++) { // note matching start
--                                                         // from [1]
++            int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of node's value
++            for (p++; p < end && o < inpEnd; p++, o++) { // note matching start from [1]
                  if (trieBytes[p] != inp[o]) {
                      int comp = BytesUtil.compareByteUnsigned(trieBytes[p], inp[o]);
                      if (comp < 0) {
@@@ -216,9 -216,9 +213,7 @@@
              // node completely matched, is input all consumed?
              boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE);
              if (o == inpEnd) {
--                return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input
--                                                                                                    // all
--                                                                                                    // matched
++                return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input all matched
              }
              if (isEndOfValue)
                  seq++;
@@@ -226,9 -226,9 +221,7 @@@
              // find a child to continue
              int c = headSize + (BytesUtil.readUnsigned(trieBytes, n, sizeChildOffset) & childOffsetMask);
              if (c == headSize) // has no children
--                return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only
--                                                                   // partially
--                                                                   // matched
++                return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only partially matched
              byte inpByte = inp[o];
              int comp;
              while (true) {
@@@ -242,26 -242,26 +235,10 @@@
                  } else if (comp < 0) { // try next child
                      seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
                      if (checkFlag(c, BIT_IS_LAST_CHILD))
--                        return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no
--                                                                           // child
--                                                                           // can
--                                                                           // match
--                                                                           // the
--                                                                           // next
--                                                                           // byte
--                                                                           // of
--                                                                           // input
++                        return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
                      c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
                  } else { // children are ordered by their first value byte
--                    return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no
--                                                                       // child
--                                                                       // can
--                                                                       // match
--                                                                       // the
--                                                                       // next
--                                                                       // byte
--                                                                       // of
--                                                                       // input
++                    return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
                  }
              }
          }
@@@ -279,9 -279,9 +256,7 @@@
      @Override
      final protected T getValueFromIdImpl(int id) {
          if (enableCache) {
--            Object[] cache = idToValueCache.get(); // SoftReference to skip
--                                                   // cache gracefully when
--                                                   // short of memory
++            Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory
              if (cache != null) {
                  int seq = calcSeqNoFromId(id);
                  if (seq < 0 || seq >= nValues)
@@@ -347,8 -347,8 +322,7 @@@
              int nValuesBeneath;
              while (true) {
                  nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
--                if (seq - nValuesBeneath < 0) { // value is under this child,
--                                                // reset n and loop again
++                if (seq - nValuesBeneath < 0) { // value is under this child, reset n and loop again
                      n = c;
                      break;
                  } else { // go to next child

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
----------------------------------------------------------------------
diff --cc dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
index e6d2ee9,e6d2ee9..f9af244
--- a/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
+++ b/dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java
@@@ -122,7 -122,7 +122,7 @@@ public class NumberDictionaryTest 
          }
  
          // test rounding
--        for (int i = 0; i < n; i++) {
++        for (int i = 0; i < n * 50; i++) {
              String randStr = randNumber();
              BigDecimal rand = new BigDecimal(randStr);
              int binarySearch = Collections.binarySearch(sorted, rand);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
----------------------------------------------------------------------
diff --cc job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
index 4d66186,0000000..a2c2c3b
mode 100644,000000..100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/BuildDictionaryMapper.java
@@@ -1,184 -1,0 +1,184 @@@
 +package org.apache.kylin.job.hadoop.cubev2;
 +
 +import com.google.common.base.Function;
 +import com.google.common.collect.*;
 +import org.apache.commons.lang3.StringUtils;
 +import org.apache.hadoop.conf.Configuration;
 +import org.apache.hadoop.hbase.util.Bytes;
 +import org.apache.hadoop.io.Text;
 +import org.apache.hadoop.mapreduce.Mapper;
 +import org.apache.hive.hcatalog.data.HCatRecord;
 +import org.apache.hive.hcatalog.data.schema.HCatSchema;
 +import org.apache.kylin.common.KylinConfig;
 +import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
 +import org.apache.kylin.common.mr.KylinMapper;
 +import org.apache.kylin.cube.CubeInstance;
 +import org.apache.kylin.cube.CubeManager;
 +import org.apache.kylin.cube.CubeSegment;
 +import org.apache.kylin.cube.cuboid.Cuboid;
 +import org.apache.kylin.cube.cuboid.CuboidScheduler;
 +import org.apache.kylin.cube.model.CubeDesc;
 +import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
 +import org.apache.kylin.dict.Dictionary;
 +import org.apache.kylin.dict.DictionaryGenerator;
 +import org.apache.kylin.dict.DictionaryInfo;
 +import org.apache.kylin.dict.DictionaryInfoSerializer;
 +import org.apache.kylin.dict.lookup.HiveTableReader;
 +import org.apache.kylin.job.constant.BatchConstants;
 +import org.apache.kylin.job.hadoop.AbstractHadoopJob;
 +import org.apache.kylin.metadata.model.SegmentStatusEnum;
 +import org.apache.kylin.metadata.model.TblColRef;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +import javax.annotation.Nullable;
 +import java.io.ByteArrayOutputStream;
 +import java.io.DataOutputStream;
 +import java.io.IOException;
 +import java.nio.ByteBuffer;
 +import java.util.Collection;
 +import java.util.List;
 +import java.util.Map;
 +
 +/**
 + * Created by shaoshi on 3/24/15.
 + */
 +public class BuildDictionaryMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Text, Text> {
 +
 +    private static final Logger logger = LoggerFactory.getLogger(BuildDictionaryMapper.class);
 +    private String cubeName;
 +    private CubeInstance cube;
 +    private CubeSegment cubeSegment;
 +    private CubeDesc cubeDesc;
 +
 +    private HCatSchema schema = null;
 +    private HyperLogLogPlusCounter hll;
 +
 +
 +    private Text outputKey = new Text();
 +    private Text outputValue = new Text();
 +    private List<TblColRef> dimColumns;
 +    private SetMultimap<Integer, String> columnDistinctValueMap;
 +    private CuboidScheduler cuboidScheduler = null;
 +    private CubeJoinedFlatTableDesc intermediateTableDesc;
 +    private long baseCuboidId;
 +    private List<String> rowKeyValues = null;
 +    private int nRowKey;
 +
 +    @Override
 +    protected void setup(Context context) throws IOException {
 +        super.publishConfiguration(context.getConfiguration());
 +
 +        Configuration conf = context.getConfiguration();
 +
-         KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
++        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 +        cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
 +        cube = CubeManager.getInstance(config).getCube(cubeName);
 +        String segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
 +        cubeDesc = cube.getDescriptor();
 +        cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
 +        dimColumns = cubeDesc.listDimensionColumnsExcludingDerived();
 +        hll = new HyperLogLogPlusCounter(16);
 +        columnDistinctValueMap = HashMultimap.create(); // key is col, value is a set of string values
 +        cuboidScheduler = new CuboidScheduler(cubeDesc);
 +        intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
 +        baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
 +        nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
 +
 +        rowKeyValues = Lists.newArrayList();
 +    }
 +
 +    @Override
 +    public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
 +        String[] row = HiveTableReader.getRowAsStringArray(record);
 +        buildDictAndCount(row);
 +    }
 +
 +    protected void buildDictAndCount(String[] row) {
 +        for (int i = 0; i < intermediateTableDesc.getRowKeyColumnIndexes().length; i++) {
 +            columnDistinctValueMap.put(i, row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
 +        }
 +
 +        putRowKeyToHLL(row, baseCuboidId); // recursively put all possible row keys to hll
 +    }
 +
 +    protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
 +        Map<Integer, DictionaryInfo> dictionaries = buildDictionary();
 +
 +        DictionaryInfoSerializer dictionaryInfoSerializer = new DictionaryInfoSerializer();
 +        Cuboid baseCuboid = Cuboid.findById(cubeDesc, this.baseCuboidId);
 +        byte[] keyBuf;
 +        // output dictionary to reducer, key is the index of the col on row key;
 +        for (Integer rowKeyIndex : dictionaries.keySet()) {
 +            keyBuf = Bytes.toBytes(rowKeyIndex);
 +            outputKey.set(keyBuf);
 +
 +            //serialize the dictionary to bytes;
 +            ByteArrayOutputStream buf = new ByteArrayOutputStream();
 +            DataOutputStream dout = new DataOutputStream(buf);
 +            dictionaryInfoSerializer.serialize(dictionaries.get(rowKeyIndex), dout);
 +            dout.close();
 +            buf.close();
 +            byte[] dictionaryBytes = buf.toByteArray();
 +            outputValue.set(dictionaryBytes);
 +
 +            context.write(outputKey, outputValue);
 +        }
 +
 +        // output hll to reducer, key is -1
 +        keyBuf = Bytes.toBytes(-1);
 +        outputKey.set(keyBuf);
 +        ByteBuffer hllBuf = ByteBuffer.allocate(1024 * 1024);
 +        hll.writeRegisters(hllBuf);
 +        outputValue.set(hllBuf.array());
 +        outputKey.set(keyBuf, 0, keyBuf.length);
 +        context.write(outputKey, outputValue);
 +    }
 +
 +    private void putRowKeyToHLL(String[] row, long cuboidId) {
 +        rowKeyValues.clear();
 +        long mask = Long.highestOneBit(baseCuboidId);
 +        // int actualLength = Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
 +        for (int i = 0; i < nRowKey; i++) {
 +            if ((mask & cuboidId) == 1) {
 +                rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
 +            }
 +            mask = mask >> 1;
 +        }
 +
 +        String key = StringUtils.join(rowKeyValues, ",");
 +        hll.add(key);
 +
 +        Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
 +        for (Long childId : children) {
 +            putRowKeyToHLL(row, childId);
 +        }
 +
 +    }
 +
 +    private Map<Integer, DictionaryInfo> buildDictionary() {
 +        Map<Integer, DictionaryInfo> dictionaryMap = Maps.newHashMap();
 +        for (int i = 0; i < intermediateTableDesc.getRowKeyColumnIndexes().length; i++) {
 +            // dictionary
 +            if (cubeDesc.getRowkey().isUseDictionary(i)) {
 +                TblColRef col = cubeDesc.getRowkey().getRowKeyColumns()[i].getColRef();
 +                Dictionary dict = DictionaryGenerator.buildDictionaryFromValueList(col.getType(), Collections2.transform(columnDistinctValueMap.get(i), new Function<String, byte[]>() {
 +                    @Nullable
 +                    @Override
 +                    public byte[] apply(String input) {
 +                        return input.getBytes();
 +                    }
 +                }));
 +
 +                logger.info("Building dictionary for " + col);
 +                DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
 +                dictInfo.setDictionaryObject(dict);
 +                dictInfo.setDictionaryClass(dict.getClass().getName());
 +                dictionaryMap.put(i, dictInfo);
 +            }
 +        }
 +
 +        return dictionaryMap;
 +    }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7f73abe5/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --cc storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index 03c7541,45b5d5f..6f2d9ce
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@@ -15,12 -13,11 +13,12 @@@ import java.util.Map
  /**
   * Created by shaoshi on 3/23/15.
   */
++@SuppressWarnings({ "rawtypes", "unchecked" })
  public class GTDictionaryCodeSystem implements IGTCodeSystem {
      private GTInfo info;
-     private BitSet encodedColumns = null;
      private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
-     private Map<Integer, DataTypeSerializer> serializerMap = null; // column index; value: serializer for this column;
      private IFilterCodeSystem<ByteArray> filterCS;
+     private DataTypeSerializer[] serializers;
  
      public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
          this.dictionaryMaps = dictionaryMaps;
@@@ -95,12 -87,7 +88,7 @@@
  
      @Override
      public Object decodeColumnValue(int col, ByteBuffer buf) {
-         if (useDictionary(col)) {
-             int id = BytesUtil.readUnsigned(buf, dictionaryMaps.get(col).getSizeOfId());
-             return dictionaryMaps.get(col).getValueFromId(id);
-         } else {
-             return serializerMap.get(col).deserialize(buf);
-         }
 -       return serializers[col].deserialize(buf);
++        return serializers[col].deserialize(buf);
      }
  
      @Override


[19/50] incubator-kylin git commit: KYLIN-625, consider null & code system in GTRecord comparison

Posted by li...@apache.org.
KYLIN-625, consider null & code system in GTRecord comparison


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/959d031c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/959d031c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/959d031c

Branch: refs/heads/streaming-localdict
Commit: 959d031ce310921b0c77f173f3a55b449df54c60
Parents: 5dda35f
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 10:35:19 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 10:35:19 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/util/ByteArray.java | 25 ++++++++++++++++----
 .../kylin/storage/gridtable/GTRecord.java       |  5 +++-
 2 files changed, 24 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/959d031c/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
index df107a5..8856fe8 100644
--- a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
+++ b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
@@ -30,7 +30,7 @@ public class ByteArray implements Comparable<ByteArray> {
     public static ByteArray allocate(int length) {
         return new ByteArray(new byte[length]);
     }
-    
+
     public static ByteArray copyOf(byte[] array, int offset, int length) {
         byte[] space = new byte[length];
         System.arraycopy(array, offset, space, 0, length);
@@ -88,7 +88,7 @@ public class ByteArray implements Comparable<ByteArray> {
     public void setLength(int length) {
         this.length = length;
     }
-    
+
     public ByteArray copy() {
         ByteArray copy = new ByteArray(length);
         copy.copyFrom(this);
@@ -111,7 +111,10 @@ public class ByteArray implements Comparable<ByteArray> {
 
     @Override
     public int hashCode() {
-        return Bytes.hashCode(data, offset, length);
+        if (data == null)
+            return 0;
+        else
+            return Bytes.hashCode(data, offset, length);
     }
 
     @Override
@@ -123,12 +126,24 @@ public class ByteArray implements Comparable<ByteArray> {
         if (getClass() != obj.getClass())
             return false;
         ByteArray o = (ByteArray) obj;
-        return Bytes.equals(this.data, this.offset, this.length, o.data, o.offset, o.length);
+        if (this.data == null && o.data == null)
+            return true;
+        else if (this.data == null || o.data == null)
+            return false;
+        else
+            return Bytes.equals(this.data, this.offset, this.length, o.data, o.offset, o.length);
     }
 
     @Override
     public int compareTo(ByteArray o) {
-        return Bytes.compareTo(this.data, this.offset, this.length, o.data, o.offset, o.length);
+        if (this.data == null && o.data == null)
+            return 0;
+        else if (this.data == null)
+            return -1;
+        else if (o.data == null)
+            return 1;
+        else
+            return Bytes.compareTo(this.data, this.offset, this.length, o.data, o.offset, o.length);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/959d031c/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
index 6eb38a9..605a469 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
@@ -5,6 +5,7 @@ import java.util.Arrays;
 import java.util.BitSet;
 
 import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.IFilterCodeSystem;
 
 public class GTRecord implements Comparable<GTRecord> {
 
@@ -128,11 +129,13 @@ public class GTRecord implements Comparable<GTRecord> {
 
     @Override
     public int compareTo(GTRecord o) {
+        assert this.info == o.info;
         assert this.maskForEqualHashComp == o.maskForEqualHashComp; // reference equal for performance
+        IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
         
         int comp = 0;
         for (int i = maskForEqualHashComp.nextSetBit(0); i >= 0; i = maskForEqualHashComp.nextSetBit(i + 1)) {
-            comp = this.cols[i].compareTo(o.cols[i]);
+            comp = cs.compare(cols[i], o.cols[i]);
             if (comp != 0)
                 return comp;
         }


[39/50] incubator-kylin git commit: KYLIN-653 minor fix

Posted by li...@apache.org.
KYLIN-653 minor fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/f3a592b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/f3a592b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/f3a592b3

Branch: refs/heads/streaming-localdict
Commit: f3a592b33ab6d171eaf9062fac3025c893f576b2
Parents: 929b986
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 16:13:22 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 16:13:22 2015 +0800

----------------------------------------------------------------------
 .../hadoop/cube/FactDistinctHiveColumnsMapper.java  | 16 ++++++++--------
 .../kylin/job/hadoop/invertedindex/II2CubeTest.java |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/f3a592b3/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
index 9e9c096..654bf4e 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -18,8 +18,12 @@
 
 package org.apache.kylin.job.hadoop.cube;
 
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hive.hcatalog.data.HCatRecord;
@@ -27,17 +31,13 @@ import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
 import org.apache.hive.hcatalog.data.schema.HCatSchema;
 import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
 import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
 import org.apache.kylin.dict.lookup.HiveTableReader;
 import org.apache.kylin.job.constant.BatchConstants;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 /**
  * @author yangli9

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/f3a592b3/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 62cf6e8..97c71f8 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -13,7 +13,7 @@ import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
 import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
-import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mrunit.mapreduce.MapDriver;
 import org.apache.hadoop.mrunit.types.Pair;
@@ -120,7 +120,7 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
 
     @Test
     public void factDistinctIIColumnsMapperTest() throws IOException {
-        MapDriver<ImmutableBytesWritable, Result, ShortWritable, Text> mapDriver;
+        MapDriver<ImmutableBytesWritable, Result, LongWritable, Text> mapDriver;
         FactDistinctIIColumnsMapper mapper = new FactDistinctIIColumnsMapper();
         mapDriver = MapDriver.newMapDriver(mapper);
 
@@ -135,9 +135,9 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
             }
         })));
 
-        List<Pair<ShortWritable, Text>> result = mapDriver.run();
+        List<Pair<LongWritable, Text>> result = mapDriver.run();
         Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
-        for(Pair<ShortWritable, Text> pair : result)
+        for(Pair<LongWritable, Text> pair : result)
         {
             Assert.assertEquals(pair.getFirst().get(),6);
             Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));


[11/50] incubator-kylin git commit: fix

Posted by li...@apache.org.
fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c3ff4f44
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c3ff4f44
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c3ff4f44

Branch: refs/heads/streaming-localdict
Commit: c3ff4f447f0884da9635c783ab5aa1d25243887b
Parents: 227edf7
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 19:38:38 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 19:38:38 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/job/IIStreamBuilderTest.java   | 80 ++++++++++++++++++++
 .../kylin/streaming/StreamingBootstrap.java     | 23 ++++--
 .../apache/kylin/streaming/StreamingCLI.java    |  3 +-
 .../invertedindex/IIStreamBuilderTest.java      | 41 ----------
 4 files changed, 98 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
new file mode 100644
index 0000000..35a0fe9
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -0,0 +1,80 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.AbstractKylinTestCase;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.streaming.StreamingBootstrap;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+
+/**
+ * Created by qianzhou on 3/6/15.
+ */
+public class IIStreamBuilderTest extends HBaseMetadataTestCase {
+
+    private KylinConfig kylinConfig;
+
+    @BeforeClass
+    public static void beforeClass() throws Exception {
+        ClassUtil.addClasspath(new File(HBaseMetadataTestCase.SANDBOX_TEST_DATA).getAbsolutePath());
+        System.setProperty("hdp.version", "2.2.0.0-2041"); // mapred-site.xml ref this
+    }
+
+    @Before
+    public void before() throws Exception {
+        HBaseMetadataTestCase.staticCreateTestMetadata(AbstractKylinTestCase.SANDBOX_TEST_DATA);
+        kylinConfig = KylinConfig.getInstanceFromEnv();
+        DeployUtil.initCliWorkDir();
+        DeployUtil.deployMetadata();
+        DeployUtil.overrideJobJarLocations();
+    }
+
+    @After
+    public void after() {
+        this.cleanupTestMetadata();
+    }
+
+    @Test
+    public void test() throws Exception {
+        StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
index 4b7c6b7..bd1ab42 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -39,7 +39,6 @@ import kafka.api.OffsetRequest;
 import kafka.cluster.Broker;
 import kafka.javaapi.PartitionMetadata;
 import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.invertedindex.IIDescManager;
 import org.apache.kylin.invertedindex.IIInstance;
 import org.apache.kylin.invertedindex.IIManager;
 import org.apache.kylin.invertedindex.model.IIDesc;
@@ -54,11 +53,19 @@ import java.util.concurrent.Future;
  */
 public class StreamingBootstrap {
 
-    private static KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
-    private static StreamManager streamManager = StreamManager.getInstance(kylinConfig);
-    private static IIManager iiManager = IIManager.getInstance(kylinConfig);
-    private static IIDescManager iiDescManager = IIDescManager.getInstance(kylinConfig);
+    private KylinConfig kylinConfig;
+    private StreamManager streamManager;
+    private IIManager iiManager;
 
+    public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
+        return new StreamingBootstrap(kylinConfig);
+    }
+
+    private StreamingBootstrap(KylinConfig kylinConfig) {
+        this.kylinConfig = kylinConfig;
+        this.streamManager = StreamManager.getInstance(kylinConfig);
+        this.iiManager = IIManager.getInstance(kylinConfig);
+    }
 
     private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
         final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
@@ -69,7 +76,7 @@ public class StreamingBootstrap {
         }
     }
 
-    public static void startStreaming(String streamingConf, int partitionId) throws Exception {
+    public void startStreaming(String streamingConf, int partitionId) throws Exception {
         final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
         Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
         final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
@@ -94,7 +101,9 @@ public class StreamingBootstrap {
         };
         final IIDesc desc = ii.getDescriptor();
         Executors.newSingleThreadExecutor().submit(consumer);
-        final Future<?> future = Executors.newSingleThreadExecutor().submit(new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId));
+        final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId);
+        task.setStreamParser(JsonStreamParser.instance);
+        final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
         future.get();
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
index 70290f1..dac8ce0 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
@@ -35,6 +35,7 @@
 package org.apache.kylin.streaming;
 
 import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -53,7 +54,7 @@ public class StreamingCLI {
             }
             if (args[0].equals("start")) {
                 String kafkaConfName = args[1];
-                StreamingBootstrap.startStreaming(kafkaConfName, 0);
+                StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
             } else if (args.equals("stop")) {
 
             } else {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c3ff4f44/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
deleted file mode 100644
index 11b8868..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilderTest.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *
- *
- *  Licensed to the Apache Software Foundation (ASF) under one or more
- *
- *  contributor license agreements. See the NOTICE file distributed with
- *
- *  this work for additional information regarding copyright ownership.
- *
- *  The ASF licenses this file to You under the Apache License, Version 2.0
- *
- *  (the "License"); you may not use this file except in compliance with
- *
- *  the License. You may obtain a copy of the License at
- *
- *
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- *  Unless required by applicable law or agreed to in writing, software
- *
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- *  See the License for the specific language governing permissions and
- *
- *  limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming.invertedindex;
-
-/**
- * Created by qianzhou on 3/6/15.
- */
-public class IIStreamBuilderTest {
-}


[28/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/dee29553
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/dee29553
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/dee29553

Branch: refs/heads/streaming-localdict
Commit: dee295531f255f616abc4ae36575b75632a0d37b
Parents: b201040 bbbcae8
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 14:06:32 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 14:06:32 2015 +0800

----------------------------------------------------------------------
 .../kylin/streaming/cube/CubeStreamBuilder.java |   4 +-
 .../kylin/streaming/EternalStreamProducer.java  |   2 +-
 .../Nous/NousEternalStreamProducer.java         |  46 --------
 .../kylin/streaming/Nous/NousMessage.java       | 118 -------------------
 .../kylin/streaming/Nous/NousMessageTest.java   |  31 -----
 .../kylin/streaming/OneOffStreamProducer.java   |   5 +-
 .../nous/NousEternalStreamProducer.java         |  46 ++++++++
 .../kylin/streaming/nous/NousMessage.java       | 118 +++++++++++++++++++
 .../kylin/streaming/nous/NousMessageTest.java   |  31 +++++
 9 files changed, 202 insertions(+), 199 deletions(-)
----------------------------------------------------------------------



[07/50] incubator-kylin git commit: Bug fix in BytesUtil.writeUnsigned

Posted by li...@apache.org.
Bug fix in BytesUtil.writeUnsigned

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d564876c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d564876c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d564876c

Branch: refs/heads/streaming-localdict
Commit: d564876c6776c91895f150e0b9512a18c1e34d35
Parents: 1153150
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 18:03:33 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 18:03:33 2015 +0800

----------------------------------------------------------------------
 .../common/hll/HyperLogLogPlusCounter.java      | 29 ++++++++++++++++++--
 .../org/apache/kylin/common/util/BytesUtil.java | 16 +++++------
 .../apache/kylin/common/util/BytesUtilTest.java | 20 ++++++++++++++
 3 files changed, 55 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
index 686321b..d817bd2 100644
--- a/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
+++ b/common/src/main/java/org/apache/kylin/common/hll/HyperLogLogPlusCounter.java
@@ -186,7 +186,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
             BytesUtil.writeVInt(size, out);
             for (int i = 0; i < m; i++) {
                 if (registers[i] > 0) {
-                    BytesUtil.writeUnsigned(i, indexLen, out);
+                    writeUnsigned(i, indexLen, out);
                     out.put(registers[i]);
                 }
             }
@@ -207,7 +207,7 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
                 throw new IllegalArgumentException("register size (" + size + ") cannot be larger than m (" + m + ")");
             int indexLen = getRegisterIndexSize();
             for (int i = 0; i < size; i++) {
-                int key = BytesUtil.readUnsigned(in, indexLen);
+                int key = readUnsigned(in, indexLen);
                 registers[key] = in.get();
             }
         } else { // array scheme
@@ -306,4 +306,29 @@ public class HyperLogLogPlusCounter implements Comparable<HyperLogLogPlusCounter
             System.out.println("HLLC" + p + ",\t" + size + " bytes,\t68% err<" + er + "%" + ",\t95% err<" + er2 + "%" + ",\t99.7% err<" + er3 + "%");
         }
     }
+
+    /**
+     *
+     * @param num
+     * @param size
+     * @param out
+     */
+    public static void writeUnsigned(int num, int size, ByteBuffer out) {
+        for (int i = 0; i < size; i++) {
+            out.put((byte) num);
+            num >>>= 8;
+        }
+    }
+
+    public static int readUnsigned(ByteBuffer in, int size) {
+        int integer = 0;
+        int mask = 0xff;
+        int shift = 0;
+        for (int i = 0; i < size; i++) {
+            integer |= (in.get() << shift) & mask;
+            mask = mask << 8;
+            shift += 8;
+        }
+        return integer;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
index dbd459d..ca1deaf 100644
--- a/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
+++ b/common/src/main/java/org/apache/kylin/common/util/BytesUtil.java
@@ -184,21 +184,21 @@ public class BytesUtil {
     }
 
     public static void writeUnsigned(int num, int size, ByteBuffer out) {
-        for (int i = 0; i < size; i++) {
-            out.put((byte) num);
-            num >>>= 8;
+        int mask = 0xff << ((size - 1) * 8);
+        for (int i = size; i > 0; i--) {
+            int v = (num & mask) >> (i - 1) * 8;
+            out.put((byte) v);
+            mask = mask >> 8;
         }
     }
 
     public static int readUnsigned(ByteBuffer in, int size) {
         int integer = 0;
-        int mask = 0xff;
-        int shift = 0;
         for (int i = 0; i < size; i++) {
-            integer |= (in.get() << shift) & mask;
-            mask = mask << 8;
-            shift += 8;
+            integer = integer << 8;
+            integer += in.get();
         }
+
         return integer;
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d564876c/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java b/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
index 4c88cbe..e34f391 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BytesUtilTest.java
@@ -22,6 +22,7 @@ import junit.framework.TestCase;
 import org.junit.Test;
 
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 
 /**
  * by honma
@@ -53,4 +54,23 @@ public class BytesUtilTest extends TestCase {
         assertEquals(y[1], false);
     }
 
+    @Test
+    public void testWriteReadUnsignedInt() {
+
+        int testInt = 735033;
+        ByteArray ba = new ByteArray(new byte[3]);
+        BytesUtil.writeUnsigned(testInt, 3, ba.asBuffer());
+
+        byte[] newBytes = new byte[3];
+        System.arraycopy(ba.array(), 0, newBytes, 0, 3);
+        int value = BytesUtil.readUnsigned(new ByteArray(newBytes).asBuffer(), 3);
+
+        assertEquals(value, testInt);
+
+        byte[] anOtherNewBytes = new byte[3];
+        BytesUtil.writeUnsigned(testInt, anOtherNewBytes, 0, 3);
+
+        assertTrue(Arrays.equals(anOtherNewBytes, ba.array()));
+    }
+
 }


[38/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/12920dc2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/12920dc2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/12920dc2

Branch: refs/heads/streaming-localdict
Commit: 12920dc236a61a3956d718151909e797c83c715e
Parents: 5837af0 929b986
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:10:23 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:10:23 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/index/RawTableRecord.java     |   2 +
 .../kylin/job/hadoop/cube/BaseCuboidJob.java    |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java | 246 -------------------
 .../job/hadoop/cube/BaseCuboidMapperBase.java   | 205 ++++++++++++++++
 .../job/hadoop/cube/HiveToBaseCuboidMapper.java |  49 ++++
 .../job/hadoop/cube/IIToBaseCuboidMapper.java   | 109 ++++++++
 .../kylin/job/hadoop/cubev2/InMemCuboidJob.java |   5 -
 .../cube/BaseCuboidMapperPerformanceTest.java   |  65 -----
 .../job/hadoop/cube/BaseCuboidMapperTest.java   | 145 -----------
 .../HiveToBaseCuboidMapperPerformanceTest.java  |  65 +++++
 .../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
 11 files changed, 576 insertions(+), 462 deletions(-)
----------------------------------------------------------------------



[24/50] incubator-kylin git commit: KYLIN-625, refactor interface to use GTScanRange

Posted by li...@apache.org.
KYLIN-625, refactor interface to use GTScanRange


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d1369339
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d1369339
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d1369339

Branch: refs/heads/streaming-localdict
Commit: d1369339d458dfc974de3f63ef3d7c496e910c8a
Parents: b38206d
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 13:25:47 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 13:25:47 2015 +0800

----------------------------------------------------------------------
 .../job/hadoop/cubev2/InMemCuboidMapper.java    |   2 +-
 .../kylin/storage/gridtable/GTScanRange.java    |  61 +++
 .../storage/gridtable/GTScanRangePlanner.java   | 474 +++++++++++++++++++
 .../kylin/storage/gridtable/GTScanRequest.java  |  22 +-
 .../kylin/storage/gridtable/GridTableTest.java  |   2 +-
 5 files changed, 548 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
index 5a3565a..ebc65a1 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
@@ -164,7 +164,7 @@ public class InMemCuboidMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Tex
             System.arraycopy(Bytes.toBytes(cuboidId), 0, keyBuf, 0, Bytes.toBytes(cuboidId).length);
 
             GridTable gt = cuboidsMap.get(cuboidId);
-            GTScanRequest req = new GTScanRequest(gt.getInfo(), null, null, null, null);
+            GTScanRequest req = new GTScanRequest(gt.getInfo(), null, null, null);
             IGTScanner scanner = gt.scan(req);
             int offSet = 0;
             for (GTRecord record : scanner) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
new file mode 100644
index 0000000..08513f7
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
@@ -0,0 +1,61 @@
+package org.apache.kylin.storage.gridtable;
+
+import java.util.Collections;
+import java.util.List;
+
+public class GTScanRange {
+
+    final public GTRecord pkStart; // inclusive
+    final public GTRecord pkEnd; // inclusive
+    final public List<GTRecord> hbaseFuzzyKeys; // partial matching primary keys
+
+    public GTScanRange(GTRecord pkStart, GTRecord pkEnd) {
+        this(pkStart, pkEnd, null);
+    }
+
+    public GTScanRange(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> hbaseFuzzyKeys) {
+        assert pkStart.info == pkEnd.info;
+        assert pkStart.maskForEqualHashComp() == pkStart.info.primaryKey;
+        assert pkEnd.maskForEqualHashComp() == pkEnd.info.primaryKey;
+        this.pkStart = pkStart;
+        this.pkEnd = pkEnd;
+        this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord>emptyList() : hbaseFuzzyKeys;
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((hbaseFuzzyKeys == null) ? 0 : hbaseFuzzyKeys.hashCode());
+        result = prime * result + ((pkEnd == null) ? 0 : pkEnd.hashCode());
+        result = prime * result + ((pkStart == null) ? 0 : pkStart.hashCode());
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        GTScanRange other = (GTScanRange) obj;
+        if (hbaseFuzzyKeys == null) {
+            if (other.hbaseFuzzyKeys != null)
+                return false;
+        } else if (!hbaseFuzzyKeys.equals(other.hbaseFuzzyKeys))
+            return false;
+        if (pkEnd == null) {
+            if (other.pkEnd != null)
+                return false;
+        } else if (!pkEnd.equals(other.pkEnd))
+            return false;
+        if (pkStart == null) {
+            if (other.pkStart != null)
+                return false;
+        } else if (!pkStart.equals(other.pkStart))
+            return false;
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
new file mode 100644
index 0000000..cc58253
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRangePlanner.java
@@ -0,0 +1,474 @@
+package org.apache.kylin.storage.gridtable;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.IFilterCodeSystem;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class GTScanRangePlanner {
+
+    private static final int MAX_HBASE_FUZZY_KEYS = 100;
+    
+    final private GTInfo info;
+    final private ComparatorEx<ByteArray> byteUnknownIsSmaller;
+    final private ComparatorEx<ByteArray> byteUnknownIsBigger;
+    final private ComparatorEx<GTRecord> recordUnknownIsSmaller;
+    final private ComparatorEx<GTRecord> recordUnknownIsBigger;
+
+    public GTScanRangePlanner(GTInfo info) {
+        this.info = info;
+        
+        IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
+        this.byteUnknownIsSmaller = byteComparatorTreatsUnknownSmaller(cs);
+        this.byteUnknownIsBigger = byteComparatorTreatsUnknownBigger(cs);
+        this.recordUnknownIsSmaller = recordComparatorTreatsUnknownSmaller(cs);
+        this.recordUnknownIsBigger = recordComparatorTreatsUnknownBigger(cs);
+    }
+
+    public List<GTScanRange> planScanRanges(TupleFilter filter, int maxRanges) {
+
+        TupleFilter flatFilter = flattenToOrAndFilter(filter);
+
+        List<Collection<ColumnRange>> orAndDimRanges = translateToOrAndDimRanges(flatFilter);
+
+        List<GTScanRange> scanRanges = Lists.newArrayListWithCapacity(orAndDimRanges.size());
+        for (Collection<ColumnRange> andDimRanges : orAndDimRanges) {
+            GTScanRange scanRange = newScanRange(andDimRanges);
+            scanRanges.add(scanRange);
+        }
+
+        List<GTScanRange> mergedRanges = mergeOverlapRanges(scanRanges);
+        mergedRanges = mergeTooManyRanges(mergedRanges, maxRanges);
+
+        return mergedRanges;
+    }
+
+    private GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) {
+        GTRecord pkStart = new GTRecord(info);
+        GTRecord pkEnd = new GTRecord(info);
+        List<GTRecord> hbaseFuzzyKeys = Lists.newArrayList();
+
+        for (ColumnRange range : andDimRanges) {
+            int col = range.column.getColumn().getZeroBasedIndex();
+            if (info.primaryKey.get(col) == false)
+                continue;
+
+            pkStart.set(col, range.begin);
+            pkEnd.set(col, range.end);
+
+            BitSet fuzzyMask = new BitSet();
+            fuzzyMask.set(col);
+            for (ByteArray v : range.equals) {
+                GTRecord fuzzy = new GTRecord(info);
+                fuzzy.set(col, v);
+                fuzzy.maskForEqualHashComp(fuzzyMask);
+                hbaseFuzzyKeys.add(fuzzy);
+            }
+        }
+
+        pkStart.maskForEqualHashComp(info.primaryKey);
+        pkEnd.maskForEqualHashComp(info.primaryKey);
+        return new GTScanRange(pkStart, pkEnd, hbaseFuzzyKeys);
+    }
+
+    private TupleFilter flattenToOrAndFilter(TupleFilter filter) {
+        if (filter == null)
+            return null;
+
+        TupleFilter flatFilter = filter.flatFilter();
+
+        // normalize to OR-AND filter
+        if (flatFilter.getOperator() == FilterOperatorEnum.AND) {
+            LogicalTupleFilter f = new LogicalTupleFilter(FilterOperatorEnum.OR);
+            f.addChild(flatFilter);
+            flatFilter = f;
+        }
+
+        if (flatFilter.getOperator() != FilterOperatorEnum.OR)
+            throw new IllegalStateException();
+
+        return flatFilter;
+    }
+
+    private List<Collection<ColumnRange>> translateToOrAndDimRanges(TupleFilter flatFilter) {
+        List<Collection<ColumnRange>> result = Lists.newArrayList();
+
+        if (flatFilter == null) {
+            result.add(Collections.<ColumnRange> emptyList());
+            return result;
+        }
+
+        for (TupleFilter andFilter : flatFilter.getChildren()) {
+            if (andFilter.getOperator() != FilterOperatorEnum.AND)
+                throw new IllegalStateException("Filter should be AND instead of " + andFilter);
+
+            Collection<ColumnRange> andRanges = translateToAndDimRanges(andFilter.getChildren());
+            result.add(andRanges);
+        }
+
+        return preprocessConstantConditions(result);
+    }
+
+    private Collection<ColumnRange> translateToAndDimRanges(List<? extends TupleFilter> andFilters) {
+        Map<TblColRef, ColumnRange> rangeMap = new HashMap<TblColRef, ColumnRange>();
+        for (TupleFilter filter : andFilters) {
+            if ((filter instanceof CompareTupleFilter) == false) {
+                continue;
+            }
+
+            CompareTupleFilter comp = (CompareTupleFilter) filter;
+            if (comp.getColumn() == null) {
+                continue;
+            }
+
+            @SuppressWarnings("unchecked")
+            ColumnRange newRange = new ColumnRange(comp.getColumn(), (Set<ByteArray>) comp.getValues(), comp.getOperator());
+            ColumnRange existing = rangeMap.get(newRange.column);
+            if (existing == null) {
+                rangeMap.put(newRange.column, newRange);
+            } else {
+                existing.andMerge(newRange);
+            }
+        }
+        return rangeMap.values();
+    }
+
+    private List<Collection<ColumnRange>> preprocessConstantConditions(List<Collection<ColumnRange>> orAndRanges) {
+        boolean globalAlwaysTrue = false;
+        Iterator<Collection<ColumnRange>> iterator = orAndRanges.iterator();
+        while (iterator.hasNext()) {
+            Collection<ColumnRange> andRanges = iterator.next();
+            Iterator<ColumnRange> iterator2 = andRanges.iterator();
+            boolean hasAlwaysFalse = false;
+            while (iterator2.hasNext()) {
+                ColumnRange range = iterator2.next();
+                if (range.satisfyAll())
+                    iterator2.remove();
+                else if (range.satisfyNone())
+                    hasAlwaysFalse = true;
+            }
+            if (hasAlwaysFalse) {
+                iterator.remove();
+            } else if (andRanges.isEmpty()) {
+                globalAlwaysTrue = true;
+                break;
+            }
+        }
+        if (globalAlwaysTrue) {
+            orAndRanges.clear();
+            orAndRanges.add(Collections.<ColumnRange> emptyList());
+        }
+        return orAndRanges;
+    }
+
+    private List<GTScanRange> mergeOverlapRanges(List<GTScanRange> ranges) {
+        if (ranges.size() <= 1) {
+            return ranges;
+        }
+
+        // sort ranges by start key
+        Collections.sort(ranges, new Comparator<GTScanRange>() {
+            @Override
+            public int compare(GTScanRange a, GTScanRange b) {
+                return recordUnknownIsSmaller.compare(a.pkStart, b.pkStart);
+            }
+        });
+
+        // merge the overlap range
+        List<GTScanRange> mergedRanges = new ArrayList<GTScanRange>();
+        int mergeBeginIndex = 0;
+        GTRecord mergeEnd = ranges.get(0).pkEnd;
+        for (int index = 0; index < ranges.size(); index++) {
+            GTScanRange range = ranges.get(index);
+            
+            // if overlap, swallow it
+            if (recordUnknownIsSmaller.min(range.pkStart, mergeEnd) == range.pkStart //
+                    || recordUnknownIsBigger.max(mergeEnd, range.pkStart) == mergeEnd) {
+                mergeEnd = recordUnknownIsBigger.max(mergeEnd, range.pkEnd);
+                continue;
+            }
+            
+            // not overlap, split here
+            GTScanRange mergedRange = mergeKeyRange(ranges.subList(mergeBeginIndex, index));
+            mergedRanges.add(mergedRange);
+            
+            // start new split
+            mergeBeginIndex = index;
+            mergeEnd = recordUnknownIsBigger.max(mergeEnd, range.pkEnd);
+        }
+        
+        // don't miss the last range
+        GTScanRange mergedRange = mergeKeyRange(ranges.subList(mergeBeginIndex, ranges.size()));
+        mergedRanges.add(mergedRange);
+        
+        return mergedRanges;
+    }
+
+    private GTScanRange mergeKeyRange(List<GTScanRange> ranges) {
+        GTScanRange first = ranges.get(0);
+        if (ranges.size() == 1)
+            return first;
+
+        GTRecord start = first.pkStart;
+        GTRecord end = first.pkEnd;
+        List<GTRecord> newFuzzyKeys = new ArrayList<GTRecord>();
+
+        boolean hasNonFuzzyRange = false;
+        for (GTScanRange range : ranges) {
+            hasNonFuzzyRange = hasNonFuzzyRange || range.hbaseFuzzyKeys.isEmpty();
+            newFuzzyKeys.addAll(range.hbaseFuzzyKeys);
+            end = recordUnknownIsBigger.max(end, range.pkEnd);
+        }
+
+        // if any range is non-fuzzy, then all fuzzy keys must be cleared
+        // also too many fuzzy keys will slow down HBase scan
+        if (hasNonFuzzyRange || newFuzzyKeys.size() > MAX_HBASE_FUZZY_KEYS) {
+            newFuzzyKeys.clear();
+        }
+
+        return new GTScanRange(start, end, newFuzzyKeys);
+    }
+
+    private List<GTScanRange> mergeTooManyRanges(List<GTScanRange> ranges, int maxRanges) {
+        if (ranges.size() < maxRanges) {
+            return ranges;
+        }
+        
+        // TODO: check the distance between range and merge the large distance range
+        List<GTScanRange> result = new ArrayList<GTScanRange>(1);
+        GTScanRange mergedRange = mergeKeyRange(ranges);
+        result.add(mergedRange);
+        return result;
+    }
+
+    private class ColumnRange {
+        private TblColRef column;
+        private ByteArray begin = new ByteArray();
+        private ByteArray end = new ByteArray();
+        private Set<ByteArray> equals;
+
+        public ColumnRange(TblColRef column, Set<ByteArray> values, FilterOperatorEnum op) {
+            this.column = column;
+
+            switch (op) {
+            case EQ:
+            case IN:
+                equals = new HashSet<ByteArray>(values);
+                refreshBeginEndFromEquals();
+                break;
+            case LT:
+            case LTE:
+                end = byteUnknownIsBigger.max(values);
+                break;
+            case GT:
+            case GTE:
+                begin = byteUnknownIsSmaller.min(values);
+                break;
+            case NEQ:
+            case NOTIN:
+            case ISNULL:
+            case ISNOTNULL:
+                // let Optiq filter it!
+                break;
+            default:
+                throw new UnsupportedOperationException(op.name());
+            }
+        }
+
+        void copy(TblColRef column, ByteArray beginValue, ByteArray endValue, Set<ByteArray> equalValues) {
+            this.column = column;
+            this.begin = beginValue;
+            this.end = endValue;
+            this.equals = equalValues;
+        }
+
+        private void refreshBeginEndFromEquals() {
+            this.begin = byteUnknownIsSmaller.min(this.equals);
+            this.end = byteUnknownIsBigger.max(this.equals);
+        }
+
+        public boolean satisfyAll() {
+            return begin.array() == null && end.array() == null; // the NEQ case
+        }
+
+        public boolean satisfyNone() {
+            if (equals != null) {
+                return equals.isEmpty();
+            } else if (begin.array() != null && end.array() != null) {
+                return info.codeSystem.getFilterCodeSystem().compare(begin, end) > 0;
+            } else {
+                return false;
+            }
+        }
+
+        public void andMerge(ColumnRange another) {
+            assert this.column.equals(another.column);
+
+            if (another.satisfyAll()) {
+                return;
+            }
+
+            if (this.satisfyAll()) {
+                copy(another.column, another.begin, another.end, another.equals);
+                return;
+            }
+
+            if (this.equals != null && another.equals != null) {
+                this.equals.retainAll(another.equals);
+                refreshBeginEndFromEquals();
+                return;
+            }
+
+            if (this.equals != null) {
+                this.equals = filter(this.equals, another.begin, another.end);
+                refreshBeginEndFromEquals();
+                return;
+            }
+
+            if (another.equals != null) {
+                this.equals = filter(another.equals, this.begin, this.end);
+                refreshBeginEndFromEquals();
+                return;
+            }
+
+            this.begin = byteUnknownIsSmaller.min(this.begin, another.begin);
+            this.end = byteUnknownIsBigger.max(this.end, another.end);
+        }
+
+        private Set<ByteArray> filter(Set<ByteArray> equalValues, ByteArray beginValue, ByteArray endValue) {
+            Set<ByteArray> result = Sets.newHashSetWithExpectedSize(equalValues.size());
+            for (ByteArray v : equalValues) {
+                if (byteUnknownIsSmaller.compare(beginValue, v) <= 0 && byteUnknownIsBigger.compare(v, endValue) <= 0) {
+                    result.add(v);
+                }
+            }
+            return equalValues;
+        }
+
+        public String toString() {
+            if (equals == null) {
+                return column.getName() + " between " + begin + " and " + end;
+            } else {
+                return column.getName() + " in " + equals;
+            }
+        }
+    }
+
+    public static abstract class ComparatorEx<T> implements Comparator<T> {
+
+        public T min(Collection<T> v) {
+            if (v.size() < 0) {
+                return null;
+            }
+
+            Iterator<T> iterator = v.iterator();
+            T min = iterator.next();
+            while (iterator.hasNext()) {
+                min = min(min, iterator.next());
+            }
+            return min;
+        }
+
+        public T max(Collection<T> v) {
+            if (v.size() < 0) {
+                return null;
+            }
+
+            Iterator<T> iterator = v.iterator();
+            T max = iterator.next();
+            while (iterator.hasNext()) {
+                max = max(max, iterator.next());
+            }
+            return max;
+        }
+
+        public T min(T a, T b) {
+            return compare(a, b) <= 0 ? a : b;
+        }
+
+        public T max(T a, T b) {
+            return compare(a, b) >= 0 ? a : b;
+        }
+
+        public boolean between(T v, T start, T end) {
+            return compare(start, v) <= 0 && compare(v, end) <= 0;
+        }
+    }
+
+    public static ComparatorEx<ByteArray> byteComparatorTreatsUnknownSmaller(final IFilterCodeSystem<ByteArray> cs) {
+        return new ComparatorEx<ByteArray>() {
+            @Override
+            public int compare(ByteArray a, ByteArray b) {
+                if (a.array() == null)
+                    return -1;
+                else if (b.array() == null)
+                    return 1;
+                else
+                    return cs.compare(a, b);
+            }
+        };
+    }
+
+    public static ComparatorEx<ByteArray> byteComparatorTreatsUnknownBigger(final IFilterCodeSystem<ByteArray> cs) {
+        return new ComparatorEx<ByteArray>() {
+            @Override
+            public int compare(ByteArray a, ByteArray b) {
+                if (a.array() == null)
+                    return 1;
+                else if (b.array() == null)
+                    return -1;
+                else
+                    return cs.compare(a, b);
+            }
+        };
+    }
+
+    public static ComparatorEx<GTRecord> recordComparatorTreatsUnknownSmaller(IFilterCodeSystem<ByteArray> cs) {
+        return new RecordComparator(byteComparatorTreatsUnknownSmaller(cs));
+    }
+
+    public static ComparatorEx<GTRecord> recordComparatorTreatsUnknownBigger(IFilterCodeSystem<ByteArray> cs) {
+        return new RecordComparator(byteComparatorTreatsUnknownBigger(cs));
+    }
+
+    private static class RecordComparator extends ComparatorEx<GTRecord> {
+        final ComparatorEx<ByteArray> comparator;
+
+        RecordComparator(ComparatorEx<ByteArray> byteComparator) {
+            this.comparator = byteComparator;
+        }
+
+        @Override
+        public int compare(GTRecord a, GTRecord b) {
+            assert a.info == b.info;
+            assert a.maskForEqualHashComp() == b.maskForEqualHashComp();
+            BitSet mask = a.maskForEqualHashComp();
+
+            int comp = 0;
+            for (int i = mask.nextSetBit(0); i >= 0; i = mask.nextSetBit(i + 1)) {
+                comp = comparator.compare(a.cols[i], b.cols[i]);
+                if (comp != 0)
+                    return comp;
+            }
+            return 0; // equals
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
index 977363c..c92cba4 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
@@ -12,8 +12,7 @@ public class GTScanRequest {
 
     // basic
     private GTInfo info;
-    private GTRecord pkStart; // inclusive
-    private GTRecord pkEnd; // inclusive
+    private GTScanRange range;
     private BitSet columns;
 
     // optional filtering
@@ -25,23 +24,21 @@ public class GTScanRequest {
     private String[] aggrMetricsFuncs;
     
     public GTScanRequest(GTInfo info) {
-        this(info, null, null, null, null);
+        this(info, null, null, null);
     }
 
-    public GTScanRequest(GTInfo info, GTRecord pkStart, GTRecord pkEnd, BitSet columns, TupleFilter filterPushDown) {
+    public GTScanRequest(GTInfo info, GTScanRange range, BitSet columns, TupleFilter filterPushDown) {
         this.info = info;
-        this.pkStart = pkStart;
-        this.pkEnd = pkEnd;
+        this.range = range;
         this.columns = columns;
         this.filterPushDown = filterPushDown;
         validate();
     }
     
-    public GTScanRequest(GTInfo info, GTRecord pkStart, GTRecord pkEnd, BitSet aggrGroupBy, BitSet aggrMetrics, //
+    public GTScanRequest(GTInfo info, GTScanRange range, BitSet aggrGroupBy, BitSet aggrMetrics, //
             String[] aggrMetricsFuncs, TupleFilter filterPushDown) {
         this.info = info;
-        this.pkStart = pkStart;
-        this.pkEnd = pkEnd;
+        this.range = range;
         this.columns = new BitSet();
         this.filterPushDown = filterPushDown;
         
@@ -53,6 +50,9 @@ public class GTScanRequest {
     }
     
     private void validate() {
+        if (range == null)
+            range = new GTScanRange(null, null);
+        
         if (columns == null)
             columns = (BitSet) info.colAll.clone();
         
@@ -111,11 +111,11 @@ public class GTScanRequest {
     }
 
     public GTRecord getPkStart() {
-        return pkStart;
+        return range.pkStart;
     }
 
     public GTRecord getPkEnd() {
-        return pkEnd;
+        return range.pkEnd;
     }
 
     public BitSet getColumns() {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1369339/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
index e2071d6..1a69138 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
@@ -80,7 +80,7 @@ public class GridTableTest {
     }
 
     private IGTScanner scanAndAggregate(GridTable table) throws IOException {
-        GTScanRequest req = new GTScanRequest(table.getInfo(), null, null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
+        GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
         IGTScanner scanner = table.scan(req);
         int i = 0;
         for (GTRecord r : scanner) {


[23/50] incubator-kylin git commit: KYLIN-625, bug fix about dealing rounding when convert filter

Posted by li...@apache.org.
KYLIN-625, bug fix about dealing rounding when convert filter


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b38206db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b38206db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b38206db

Branch: refs/heads/streaming-localdict
Commit: b38206db488e6aef66a4e224adf0acebd56612b7
Parents: 7f73abe
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 11:58:31 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 11:58:31 2015 +0800

----------------------------------------------------------------------
 .../gridtable/GTDictionaryCodeSystem.java       | 16 +++++++++---
 .../apache/kylin/storage/gridtable/GTUtil.java  | 26 ++++++++++----------
 .../kylin/storage/gridtable/IGTCodeSystem.java  | 25 +++++++++++++------
 3 files changed, 44 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index 6f2d9ce..ada4ed7 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -83,7 +83,12 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
 
     @Override
     public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
-        throw new UnsupportedOperationException();
+        DataTypeSerializer serializer = serializers[col];
+        if (serializer instanceof DictionarySerializer) {
+            ((DictionarySerializer) serializer).serializeWithRounding(value,  roundingFlag, buf);
+        } else {
+            serializer.serialize(value,  buf);
+        }
     }
 
     @Override
@@ -103,10 +108,15 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
             this.dictionary = dictionary;
         }
 
+        public void serializeWithRounding(Object value, int roundingFlag, ByteBuffer buf) {
+            int id = dictionary.getIdFromValue(value, roundingFlag);
+            BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf);
+        }
+
         @Override
-        public void serialize(Object value, ByteBuffer out) {
+        public void serialize(Object value, ByteBuffer buf) {
             int id = dictionary.getIdFromValue(value);
-            BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), out);
+            BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf);
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index cf970b1..1fb0376 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -122,7 +122,7 @@ public class GTUtil {
                     Set newValues = Sets.newHashSet();
                     for (Object value : constValues) {
                         code = translate(col, value, 0);
-                        if (!isDictNull(code))
+                        if (code != null)
                             newValues.add(code);
                     }
                     if (newValues.isEmpty()) {
@@ -134,7 +134,7 @@ public class GTUtil {
                     break;
                 case NEQ:
                     code = translate(col, firstValue, 0);
-                    if (isDictNull(code)) {
+                    if (code == null) {
                         result = ConstantTupleFilter.TRUE;
                     } else {
                         newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -143,7 +143,7 @@ public class GTUtil {
                     break;
                 case LT:
                     code = translate(col, firstValue, 1);
-                    if (isDictNull(code)) {
+                    if (code == null) {
                         result = ConstantTupleFilter.TRUE;
                     } else {
                         newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -152,7 +152,7 @@ public class GTUtil {
                     break;
                 case LTE:
                     code = translate(col, firstValue, -1);
-                    if (isDictNull(code)) {
+                    if (code == null) {
                         result = ConstantTupleFilter.FALSE;
                     } else {
                         newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -161,7 +161,7 @@ public class GTUtil {
                     break;
                 case GT:
                     code = translate(col, firstValue, -1);
-                    if (isDictNull(code)) {
+                    if (code == null) {
                         result = ConstantTupleFilter.TRUE;
                     } else {
                         newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -170,7 +170,7 @@ public class GTUtil {
                     break;
                 case GTE:
                     code = translate(col, firstValue, 1);
-                    if (isDictNull(code)) {
+                    if (code == null) {
                         result = ConstantTupleFilter.FALSE;
                     } else {
                         newCompareFilter.addChild(new ConstantTupleFilter(code));
@@ -183,16 +183,16 @@ public class GTUtil {
                 return result;
             }
 
-            private boolean isDictNull(ByteArray code) {
-                return info.codeSystem.getFilterCodeSystem().isNull(code);
-            }
-
             transient ByteBuffer buf = ByteBuffer.allocate(info.maxRecordLength);
 
             private ByteArray translate(int col, Object value, int roundingFlag) {
-                buf.clear();
-                info.codeSystem.encodeColumnValue(col, value, roundingFlag, buf);
-                return ByteArray.copyOf(buf.array(), 0, buf.position());
+                try {
+                    buf.clear();
+                    info.codeSystem.encodeColumnValue(col, value, roundingFlag, buf);
+                    return ByteArray.copyOf(buf.array(), 0, buf.position());
+                } catch (IllegalArgumentException ex) {
+                    return null;
+                }
             }
         }, info.codeSystem.getFilterCodeSystem());
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b38206db/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
index 87b6643..4182604 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/IGTCodeSystem.java
@@ -12,19 +12,30 @@ public interface IGTCodeSystem {
 
     IFilterCodeSystem<ByteArray> getFilterCodeSystem();
     
-    /** return the length of code starting at the specified buffer, buffer position must not change after return */
+    /** Return the length of code starting at the specified buffer, buffer position must not change after return */
     int codeLength(int col, ByteBuffer buf);
     
-    /** encode a value into code */
-    void encodeColumnValue(int col, Object value, ByteBuffer buf);
+    /**
+     * Encode a value into code.
+     * 
+     * @throws IllegalArgumentException if the value is not in dictionary
+     */
+    void encodeColumnValue(int col, Object value, ByteBuffer buf) throws IllegalArgumentException;
     
-    /** encode a value into code, with option to floor rounding -1, no rounding 0,  or ceiling rounding 1 */
-    void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf);
+    /**
+     * Encode a value into code, with option to floor rounding -1, no rounding 0,  or ceiling rounding 1
+     * 
+     * @throws IllegalArgumentException
+     * - if rounding=0 and the value is not in dictionary
+     * - if rounding=-1 and there's no equal or smaller value in dictionary
+     * - if rounding=1 and there's no equal or bigger value in dictionary
+     */
+    void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) throws IllegalArgumentException;
     
-    /** decode a code into value */
+    /** Decode a code into value */
     Object decodeColumnValue(int col, ByteBuffer buf);
     
-    /** return an aggregator for metrics */
+    /** Return an aggregator for metrics */
     MeasureAggregator<?> newMetricsAggregator(String aggrFunction, int col);
     
 }


[16/50] incubator-kylin git commit: Small change.

Posted by li...@apache.org.
Small change.

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/0edf4004
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/0edf4004
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/0edf4004

Branch: refs/heads/streaming-localdict
Commit: 0edf4004a2125c01b2f3b8c151b899a03db4ae0c
Parents: 8d40a57
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Fri Mar 27 00:01:59 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Fri Mar 27 00:01:59 2015 +0800

----------------------------------------------------------------------
 .../job/hadoop/cube/FactDistinctColumnsReducer.java   | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0edf4004/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index e1529d3..165f66c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -42,10 +42,7 @@ import org.apache.kylin.job.hadoop.AbstractHadoopJob;
 import org.apache.kylin.metadata.model.TblColRef;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 /**
  * @author yangli9
@@ -167,8 +164,13 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text,
                 out.write(msg.getBytes());
                 out.write('\n');
                 out.write('\n');
-                
-                for (long i = 0; i < baseCuboidId; i++) {
+
+                List<Long> allCuboids = new ArrayList<Long>();
+                allCuboids.addAll(rowKeyCountInCuboids.keySet());
+                Collections.sort(allCuboids);
+                for (long i : allCuboids) {
+                    if (i > baseCuboidId)
+                        continue;
                     msg = "Cuboid " + i + " has " + rowKeyCountInCuboids.get(i) + " rows.";
                     out.write(msg.getBytes());
                     out.write('\n');


[30/50] incubator-kylin git commit: KYLIN-653 use a FIFOIterable to solve ConcurrentModificationException

Posted by li...@apache.org.
KYLIN-653 use a FIFOIterable to solve ConcurrentModificationException


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d09e00d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d09e00d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d09e00d6

Branch: refs/heads/streaming-localdict
Commit: d09e00d6c60e6d0e88c2512041c76456dd5fb64d
Parents: 4df0531
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 11:23:11 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/util/FIFOIterable.java  | 20 ++++++++++++
 .../apache/kylin/common/util/FIFOIterator.java  | 34 ++++++++++++++++++++
 .../org/apache/kylin/common/util/BasicTest.java | 12 +++++--
 .../model/IIKeyValueCodecWithState.java         |  6 ++--
 .../IIKeyValueCodecWithStateTest.java           | 16 ++++++---
 5 files changed, 80 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java b/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
new file mode 100644
index 0000000..c0f7d68
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/FIFOIterable.java
@@ -0,0 +1,20 @@
+package org.apache.kylin.common.util;
+
+import java.util.Iterator;
+import java.util.Queue;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ */
+public class FIFOIterable<T> implements Iterable<T> {
+    private Queue<T> q;
+
+    public FIFOIterable(Queue<T> q) {
+        this.q = q;
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+        return new FIFOIterator<T>(q);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java b/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
new file mode 100644
index 0000000..6751cb0
--- /dev/null
+++ b/common/src/main/java/org/apache/kylin/common/util/FIFOIterator.java
@@ -0,0 +1,34 @@
+package org.apache.kylin.common.util;
+
+import java.util.Iterator;
+import java.util.Queue;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ *
+ * Normal iterators in Collections are fail-safe,
+ * i.e. adding elements to a queue will break current iterator.
+ * The FIFOIterator is stateless, it only check the first element of a Queue
+ */
+public class FIFOIterator<T> implements Iterator<T> {
+    private Queue<T> q;
+
+    public FIFOIterator(Queue<T> q) {
+        this.q = q;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return !q.isEmpty();
+    }
+
+    @Override
+    public T next() {
+        return q.poll();
+    }
+
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
index a480ebd..0b92bf9 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
@@ -22,9 +22,10 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
-import java.util.Calendar;
+import java.util.*;
 import java.util.concurrent.*;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.configuration.ConfigurationException;
 import org.junit.Ignore;
 import org.junit.Test;
@@ -75,7 +76,7 @@ public class BasicTest {
 
         a.setTimeInMillis(current);
         b.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY), a.get(Calendar.MINUTE));
-        c.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY),0);
+        c.set(a.get(Calendar.YEAR), a.get(Calendar.MONTH), a.get(Calendar.DAY_OF_MONTH), a.get(Calendar.HOUR_OF_DAY), 0);
 
         System.out.println(time(b.getTimeInMillis()));
         System.out.println(time(c.getTimeInMillis()));
@@ -85,6 +86,13 @@ public class BasicTest {
     @Test
     @Ignore("fix it later")
     public void test2() throws IOException, ConfigurationException {
+        Queue<String> a = new LinkedList<>();
+        Iterator<String> i = new FIFOIterator<String>(a);
+        System.out.println(i.hasNext());
+        a.add("1");
+        System.out.println(i.hasNext());
+        System.out.println(i.next());
+
     }
 
     private static String time(long t) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index e838283..82f1020 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -2,8 +2,10 @@ package org.apache.kylin.invertedindex.model;
 
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.LinkedList;
 
 import com.google.common.base.Preconditions;
+import org.apache.kylin.common.util.FIFOIterator;
 import org.apache.kylin.invertedindex.index.Slice;
 import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
 
@@ -25,12 +27,12 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
 
     protected static class IIRowDecoderWithState extends IIRowDecoder {
 
-        final ArrayList<IIRow> buffer = Lists.newArrayList();
+        final LinkedList<IIRow> buffer = Lists.newLinkedList();
         private Iterator<Slice> superIterator = null;
 
         private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
             super(digest, iiRowIterator);
-            this.feedingIterator = buffer.iterator();
+            this.feedingIterator = new FIFOIterator<>(buffer);
         }
 
         private Iterator<Slice> getSuperIterator() {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d09e00d6/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
index 25e250c..416d31a 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -8,6 +8,7 @@ import java.util.concurrent.LinkedBlockingQueue;
 
 import javax.annotation.Nullable;
 
+import org.apache.kylin.common.util.FIFOIterable;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.invertedindex.IIInstance;
 import org.apache.kylin.invertedindex.IIManager;
@@ -19,6 +20,7 @@ import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
 import org.apache.kylin.invertedindex.model.IIRow;
 import org.apache.kylin.invertedindex.model.KeyValueCodec;
 import org.apache.kylin.streaming.Stream;
+import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
@@ -38,7 +40,7 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
 
     final String[] inputs = new String[] { //
     "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
-            "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,2012-08-16,43479,10000807,26.2474,0", //
+            "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
             "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
 
     @Before
@@ -64,14 +66,19 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
         future.get();
     }
 
+    @After
+    public void after() throws Exception {
+        cleanupTestMetadata();
+    }
+
     @Test
     public void basicTest() {
-        ArrayList<IIRow> buffer = Lists.newArrayList();
+        Queue<IIRow> buffer = Lists.newLinkedList();
+        FIFOIterable bufferIterable = new FIFOIterable(buffer);
         TableRecordInfo info = new TableRecordInfo(iiDesc);
         TableRecordInfoDigest digest = info.getDigest();
-        int columnCount = digest.getColumnCount();
         KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
-        Iterator<Slice> slices = codec.decodeKeyValue(buffer).iterator();
+        Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
 
         Assert.assertTrue(!slices.hasNext());
         Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
@@ -87,5 +94,6 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
         }
 
         Slice newSlice = slices.next();
+        Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
     }
 }


[40/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8e6afbf4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8e6afbf4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8e6afbf4

Branch: refs/heads/streaming-localdict
Commit: 8e6afbf44d5d4a96b0e55cf8c617fbcdb21a582e
Parents: 12920dc
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:50:50 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:50:50 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/ToyIIStreamBuilder.java       |  36 ------
 .../kylin/streaming/JsonStreamParser.java       |  12 +-
 .../apache/kylin/streaming/StreamParser.java    |   4 +-
 .../kylin/streaming/StringStreamParser.java     |   2 +-
 .../kylin/streaming/cube/CubeStreamBuilder.java |   2 +-
 .../invertedindex/IIStreamBuilder.java          | 120 +++++-------------
 .../streaming/invertedindex/SliceBuilder.java   | 126 +++++++++++++++++++
 .../invertedindex/PrintOutStreamBuilder.java    |   5 +-
 8 files changed, 167 insertions(+), 140 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
deleted file mode 100644
index 3e2a892..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package org.apache.kylin.job.hadoop.invertedindex;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.streaming.Stream;
-import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- *
- * A IIStreamBuilder that can hold all the built slices in form of IIRow
- * This is only for test use
- */
-public class ToyIIStreamBuilder extends IIStreamBuilder {
-    private List<IIRow> result;
-
-    public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
-        super(queue, null, desc, partitionId);
-        this.result = result;
-    }
-
-    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
-        IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
-        for (IIRow iiRow : codec.encodeKeyValue(slice)) {
-            result.add(iiRow);
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
index 5c8b49d..2912aa7 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
@@ -50,15 +50,17 @@ import java.util.Map;
  */
 public final class JsonStreamParser implements StreamParser {
 
-    public static final JsonStreamParser instance = new JsonStreamParser();
+    private final List<TblColRef> allColumns;
 
-    private final JsonParser jsonParser = new JsonParser();
+    private static final JsonParser JSON_PARSER = new JsonParser();
 
-    private JsonStreamParser(){}
+    public JsonStreamParser(List<TblColRef> allColumns){
+        this.allColumns = allColumns;
+    }
 
     @Override
-    public List<String> parse(Stream stream, List<TblColRef> allColumns) {
-        final JsonObject root = jsonParser.parse(new String(stream.getRawData())).getAsJsonObject();
+    public List<String> parse(Stream stream) {
+        final JsonObject root = JSON_PARSER.parse(new String(stream.getRawData())).getAsJsonObject();
         ArrayList<String> result = Lists.newArrayList();
 
         for (TblColRef column : allColumns) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
index 9b41c95..c6b23ff 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
@@ -34,8 +34,6 @@
 
 package org.apache.kylin.streaming;
 
-import org.apache.kylin.metadata.model.TblColRef;
-
 import java.util.List;
 
 /**
@@ -43,5 +41,5 @@ import java.util.List;
  */
 public interface StreamParser {
 
-    List<String> parse(Stream stream, List<TblColRef> allColumns);
+    List<String> parse(Stream stream);
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
index 3c62a3a..4fb26fa 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
@@ -49,7 +49,7 @@ public final class StringStreamParser implements StreamParser {
 
     private StringStreamParser(){}
     @Override
-    public List<String> parse(Stream stream, List<TblColRef> allColumns) {
+    public List<String> parse(Stream stream) {
         return Lists.newArrayList(new String(stream.getRawData()).split(","));
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 5c2efdc..ba3f495 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -403,7 +403,7 @@ public class CubeStreamBuilder extends StreamBuilder {
     }
 
     private List<String> parseStream(Stream stream, CubeDesc desc) {
-        return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
+        return getStreamParser().parse(stream);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index 0cf3c77..72e23ff 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -34,36 +34,27 @@
 
 package org.apache.kylin.streaming.invertedindex;
 
-import com.google.common.base.Function;
 import com.google.common.base.Stopwatch;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HTableInterface;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.kylin.dict.Dictionary;
-import org.apache.kylin.dict.DictionaryGenerator;
-import org.apache.kylin.invertedindex.index.BatchSliceBuilder;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
 import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecord;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
 import org.apache.kylin.invertedindex.model.IIDesc;
 import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
 import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.streaming.Stream;
 import org.apache.kylin.streaming.StreamBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.TimeUnit;
 
@@ -75,98 +66,36 @@ public class IIStreamBuilder extends StreamBuilder {
     private static Logger logger = LoggerFactory.getLogger(IIStreamBuilder.class);
 
     private final IIDesc desc;
+    private final IIInstance ii;
     private final HTableInterface hTable;
-    private final BatchSliceBuilder sliceBuilder;
-
-    public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
-        super(queue, desc.getSliceSize());
-        this.desc = desc;
+    private final SliceBuilder sliceBuilder;
+    private final int partitionId;
+
+    public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIInstance iiInstance, int partitionId) {
+        super(queue, iiInstance.getDescriptor().getSliceSize());
+        this.ii = iiInstance;
+        this.desc = iiInstance.getDescriptor();
+        this.partitionId = partitionId;
         try {
-            if (hTableName != null) {
-                this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
-            } else {
-                this.hTable = null;
-            }
+            this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
         } catch (IOException e) {
             logger.error("cannot open htable name:" + hTableName, e);
             throw new RuntimeException("cannot open htable name:" + hTableName, e);
         }
-        sliceBuilder = new BatchSliceBuilder(desc, (short) partitionId);
+        sliceBuilder = new SliceBuilder(desc, (short) partitionId);
     }
 
     @Override
     protected void build(List<Stream> streamsToBuild) throws IOException {
         logger.info("stream build start, size:" + streamsToBuild.size());
         Stopwatch stopwatch = new Stopwatch().start();
-        List<List<String>> table = Lists.transform(streamsToBuild, new Function<Stream, List<String>>() {
-            @Nullable
-            @Override
-            public List<String> apply(@Nullable Stream input) {
-                return parseStream(input, desc);
-            }
-        });
-        final Map<Integer, Dictionary<?>> dictionaryMap = buildDictionary(table, desc);
-        TableRecordInfo tableRecordInfo = new TableRecordInfo(desc, dictionaryMap);
-        final Slice slice = buildSlice(table, sliceBuilder, tableRecordInfo, dictionaryMap);
+        final Slice slice = sliceBuilder.buildSlice(streamsToBuild, getStreamParser());
         logger.info("slice info, shard:" + slice.getShard() + " timestamp:" + slice.getTimestamp() + " record count:" + slice.getRecordCount());
 
-        outputSlice(slice, tableRecordInfo);
-        submitOffset();
-
+        loadToHBase(hTable, slice, new IIKeyValueCodec(slice.getInfo()));
+        submitOffset(0);
         stopwatch.stop();
-        logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + TimeUnit.MILLISECONDS);
-    }
-
-    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
-        loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
-    }
-
-    private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
-        HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
-        final List<TblColRef> allColumns = desc.listAllColumns();
-        for (List<String> row : table) {
-            for (int i = 0; i < row.size(); i++) {
-                String cell = row.get(i);
-                if (!desc.isMetricsCol(i)) {
-                    valueMap.put(allColumns.get(i), cell);
-                }
-            }
-        }
-
-        Map<Integer, Dictionary<?>> result = Maps.newHashMap();
-        for (TblColRef tblColRef : valueMap.keySet()) {
-            result.put(desc.findColumn(tblColRef), //
-                    DictionaryGenerator.buildDictionaryFromValueList(//
-                            tblColRef.getType(), //
-                            Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
-                                @Nullable
-                                @Override
-                                public byte[] apply(String input) {
-                                    return input.getBytes();
-                                }
-                            })));
-        }
-        return result;
-    }
-
-    private List<String> parseStream(Stream stream, IIDesc desc) {
-        return getStreamParser().parse(stream, desc.listAllColumns());
-    }
-
-    private Slice buildSlice(List<List<String>> table, BatchSliceBuilder sliceBuilder, final TableRecordInfo tableRecordInfo, Map<Integer, Dictionary<?>> localDictionary) {
-        final Slice slice = sliceBuilder.build(tableRecordInfo.getDigest(), Lists.transform(table, new Function<List<String>, TableRecord>() {
-            @Nullable
-            @Override
-            public TableRecord apply(@Nullable List<String> input) {
-                TableRecord result = tableRecordInfo.createTableRecord();
-                for (int i = 0; i < input.size(); i++) {
-                    result.setValueString(i, input.get(i));
-                }
-                return result;
-            }
-        }));
-        slice.setLocalDictionaries(localDictionary);
-        return slice;
+        logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + " " + TimeUnit.MILLISECONDS);
     }
 
     private void loadToHBase(HTableInterface hTable, Slice slice, IIKeyValueCodec codec) throws IOException {
@@ -192,8 +121,17 @@ public class IIStreamBuilder extends StreamBuilder {
         }
     }
 
-    private void submitOffset() {
-
+    private void submitOffset(long offset) {
+        final IIManager iiManager = IIManager.getInstance(KylinConfig.getInstanceFromEnv());
+        final IIInstance instance = iiManager.getII(ii.getName());
+        instance.getStreamOffsets().set(partitionId, offset);
+        try {
+            iiManager.updateII(instance);
+            logger.info("submit offset");
+        } catch (IOException e) {
+            logger.error("error submit offset: + " + offset, e);
+            throw new RuntimeException(e);
+        }
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
new file mode 100644
index 0000000..ac2ce0f
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/SliceBuilder.java
@@ -0,0 +1,126 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming.invertedindex;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.DictionaryGenerator;
+import org.apache.kylin.invertedindex.index.BatchSliceBuilder;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecord;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.StreamParser;
+
+import javax.annotation.Nullable;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by qianzhou on 3/27/15.
+ */
+public final class SliceBuilder {
+
+    public SliceBuilder(IIDesc desc, short shard){
+        this.iiDesc = desc;
+        this.sliceBuilder = new BatchSliceBuilder(desc, shard);
+    }
+
+    private final BatchSliceBuilder sliceBuilder;
+    private final IIDesc iiDesc;
+
+    public Slice buildSlice(List<Stream> streams, final StreamParser streamParser) {
+        List<List<String>> table = Lists.transform(streams, new Function<Stream, List<String>>() {
+            @Nullable
+            @Override
+            public List<String> apply(@Nullable Stream input) {
+                return streamParser.parse(input);
+            }
+        });
+        final Map<Integer, Dictionary<?>> dictionaryMap = buildDictionary(table, iiDesc);
+        TableRecordInfo tableRecordInfo = new TableRecordInfo(iiDesc, dictionaryMap);
+        return build(table, sliceBuilder, tableRecordInfo, dictionaryMap);
+    }
+
+    private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
+        HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
+        final List<TblColRef> allColumns = desc.listAllColumns();
+        for (List<String> row : table) {
+            for (int i = 0; i < row.size(); i++) {
+                String cell = row.get(i);
+                if (!desc.isMetricsCol(i)) {
+                    valueMap.put(allColumns.get(i), cell);
+                }
+            }
+        }
+
+        Map<Integer, Dictionary<?>> result = Maps.newHashMap();
+        for (TblColRef tblColRef : valueMap.keySet()) {
+            final Collection<byte[]> bytes = Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
+                @Nullable
+                @Override
+                public byte[] apply(String input) {
+                    return input.getBytes();
+                }
+            });
+            final Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(tblColRef.getType(), bytes);
+            result.put(desc.findColumn(tblColRef), dict);
+        }
+        return result;
+    }
+
+    private Slice build(List<List<String>> table, BatchSliceBuilder sliceBuilder, final TableRecordInfo tableRecordInfo, Map<Integer, Dictionary<?>> localDictionary) {
+        final Slice slice = sliceBuilder.build(tableRecordInfo.getDigest(), Lists.transform(table, new Function<List<String>, TableRecord>() {
+            @Nullable
+            @Override
+            public TableRecord apply(@Nullable List<String> input) {
+                TableRecord result = tableRecordInfo.createTableRecord();
+                for (int i = 0; i < input.size(); i++) {
+                    result.setValueString(i, input.get(i));
+                }
+                return result;
+            }
+        }));
+        slice.setLocalDictionaries(localDictionary);
+        return slice;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8e6afbf4/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
index e83bdc5..e5873c0 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
@@ -40,7 +40,6 @@ import org.apache.kylin.streaming.JsonStreamParser;
 import org.apache.kylin.streaming.Stream;
 import org.apache.kylin.streaming.StreamBuilder;
 
-import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.BlockingQueue;
 
@@ -53,14 +52,14 @@ public class PrintOutStreamBuilder extends StreamBuilder {
 
     public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, List<TblColRef> allColumns) {
         super(streamQueue, sliceSize);
-        setStreamParser(JsonStreamParser.instance);
+        setStreamParser(new JsonStreamParser(allColumns));
         this.allColumns = allColumns;
     }
 
     @Override
     protected void build(List<Stream> streamsToBuild) throws Exception {
         for (Stream stream : streamsToBuild) {
-            final List<String> row = getStreamParser().parse(stream, allColumns);
+            final List<String> row = getStreamParser().parse(stream);
             System.out.println("offset:" + stream.getOffset() + " " + StringUtils.join(row, ","));
         }
     }


[06/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9a1c4cb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9a1c4cb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9a1c4cb6

Branch: refs/heads/streaming-localdict
Commit: 9a1c4cb6b3dcb967ab017c23de76cb910a103cb9
Parents: 71324f4
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 17:49:39 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 17:49:39 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/invertedindex/IIInstance.java  |  12 +++
 .../apache/kylin/invertedindex/IIManager.java   |  17 ++--
 .../invertedindex/index/BatchSliceBuilder.java  |   6 +-
 .../org/apache/kylin/streaming/KafkaConfig.java |  22 ++--
 .../apache/kylin/streaming/KafkaConsumer.java   |  17 ++--
 .../kylin/streaming/StreamingBootstrap.java     | 102 +++++++++++++++++++
 .../apache/kylin/streaming/StreamingCLI.java    |  70 +++++++++++++
 .../kylin/streaming/KafkaConsumerTest.java      |   2 +-
 8 files changed, 210 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
index 7684699..fd300e0 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIInstance.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonManagedReference;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Lists;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.RootPersistentEntity;
@@ -79,6 +80,9 @@ public class IIInstance extends RootPersistentEntity implements IRealization {
     @JsonProperty("segments")
     private List<IISegment> segments = new ArrayList<IISegment>();
 
+    @JsonProperty("stream_offset")
+    private List<Long> streamOffsets = Lists.newArrayList();
+
     @JsonProperty("create_time_utc")
     private long createTimeUTC;
 
@@ -357,4 +361,12 @@ public class IIInstance extends RootPersistentEntity implements IRealization {
     public void setCost(int cost) {
         this.cost = cost;
     }
+
+    public List<Long> getStreamOffsets() {
+        return streamOffsets;
+    }
+
+    public void setStreamOffsets(List<Long> streamOffsets) {
+        this.streamOffsets = streamOffsets;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
index 6ebfbf8..b086d5d 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/IIManager.java
@@ -240,19 +240,18 @@ public class IIManager implements IRealizationProvider {
     }
 
     private String generateStorageLocation() {
-        String namePrefix = IRealizationConstants.IIHbaseStorageLocationPrefix;
-        String tableName = "";
-        do {
-            StringBuffer sb = new StringBuffer();
-            sb.append(namePrefix);
+        while (true) {
+            StringBuilder sb = new StringBuilder(IRealizationConstants.IIHbaseStorageLocationPrefix);
             for (int i = 0; i < HBASE_TABLE_LENGTH; i++) {
                 int idx = (int) (Math.random() * ALPHA_NUM.length());
                 sb.append(ALPHA_NUM.charAt(idx));
             }
-            tableName = sb.toString();
-        } while (this.usedStorageLocation.contains(tableName));
-
-        return tableName;
+            if (usedStorageLocation.contains(sb.toString())) {
+                continue;
+            } else {
+                return sb.toString();
+            }
+        }
     }
 
     private void loadAllIIInstance() throws IOException {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
index 94b70c1..6ba328c 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
@@ -86,11 +86,7 @@ public class BatchSliceBuilder {
     }
 
     private long increaseSliceTimestamp(long timestamp) {
-        if (timestamp < sliceTimestamp) {
-            throw new IllegalStateException();
-        }
-
-        if (timestamp == sliceTimestamp) {
+        if (timestamp <= sliceTimestamp) {
             return ++timestamp; // ensure slice timestamp increases
         } else {
             return timestamp;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
index b22c7e0..5194e9d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
@@ -76,10 +76,8 @@ public class KafkaConfig extends RootPersistentEntity {
     @JsonProperty("bufferSize")
     private int bufferSize;
 
-    @JsonProperty("iiDesc")
-    private String iiDesc;
-
-    private int partitionId;
+    @JsonProperty("iiName")
+    private String iiName;
 
     public int getTimeout() {
         return timeout;
@@ -121,14 +119,6 @@ public class KafkaConfig extends RootPersistentEntity {
         this.topic = topic;
     }
 
-    public int getPartitionId() {
-        return partitionId;
-    }
-
-    public void setPartitionId(int partitionId) {
-        this.partitionId = partitionId;
-    }
-
     public void setBrokerConfigs(List<BrokerConfig> brokerConfigs) {
         this.brokerConfigs = brokerConfigs;
     }
@@ -143,6 +133,14 @@ public class KafkaConfig extends RootPersistentEntity {
         });
     }
 
+    public String getIiName() {
+        return iiName;
+    }
+
+    public void setIiName(String iiName) {
+        this.iiName = iiName;
+    }
+
     public String getName() {
         return name;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 42a0f1f..910041c 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -58,15 +58,16 @@ public abstract class KafkaConsumer implements Runnable {
 
     private KafkaConfig kafkaConfig;
     private List<Broker> replicaBrokers;
-    private AtomicLong offset = new AtomicLong();
+    private long offset;
     private BlockingQueue<Stream> streamQueue;
 
     private Logger logger;
 
-    public KafkaConsumer(String topic, int partitionId, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
+    public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
         this.topic = topic;
         this.partitionId = partitionId;
         this.kafkaConfig = kafkaConfig;
+        offset = startOffset;
         this.replicaBrokers = initialBrokers;
         logger = LoggerFactory.getLogger("KafkaConsumer_" + topic + "_" + partitionId);
         streamQueue = new ArrayBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
@@ -90,12 +91,6 @@ public abstract class KafkaConsumer implements Runnable {
     public void run() {
         try {
             Broker leadBroker = getLeadBroker();
-            if (leadBroker == null) {
-                logger.warn("cannot find lead broker");
-            } else {
-                final long lastOffset = KafkaRequester.getLastOffset(topic, partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
-                offset.set(lastOffset);
-            }
             while (true) {
                 if (leadBroker == null) {
                     leadBroker = getLeadBroker();
@@ -105,9 +100,9 @@ public abstract class KafkaConsumer implements Runnable {
                     continue;
                 }
 
-                final FetchResponse fetchResponse = KafkaRequester.fetchResponse(topic, partitionId, offset.get(), leadBroker, kafkaConfig);
+                final FetchResponse fetchResponse = KafkaRequester.fetchResponse(topic, partitionId, offset, leadBroker, kafkaConfig);
                 if (fetchResponse.errorCode(topic, partitionId) != 0) {
-                    logger.warn("fetch response offset:" + offset.get() + " errorCode:" + fetchResponse.errorCode(topic, partitionId));
+                    logger.warn("fetch response offset:" + offset + " errorCode:" + fetchResponse.errorCode(topic, partitionId));
                     continue;
                 }
                 for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(topic, partitionId)) {
@@ -117,7 +112,7 @@ public abstract class KafkaConsumer implements Runnable {
                         logger.error("error put streamQueue", e);
                         break;
                     }
-                    offset.incrementAndGet();
+                    offset++;
                 }
             }
         } catch (Exception e) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
new file mode 100644
index 0000000..4528a72
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -0,0 +1,102 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming;
+
+import com.google.common.base.Preconditions;
+import kafka.api.OffsetRequest;
+import kafka.cluster.Broker;
+import kafka.javaapi.PartitionMetadata;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIDescManager;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.model.IIDesc;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingBootstrap {
+
+    private static KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
+    private static StreamManager streamManager = StreamManager.getInstance(kylinConfig);
+    private static IIManager iiManager = IIManager.getInstance(kylinConfig);
+    private static IIDescManager iiDescManager = IIDescManager.getInstance(kylinConfig);
+
+
+    private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
+        final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
+        if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
+            return partitionMetadata.leader();
+        } else {
+            return null;
+        }
+    }
+
+    public static void startStreaming(String streamingConf, int partitionId) throws Exception {
+        final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
+        Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+        final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
+        Preconditions.checkNotNull(ii);
+
+        final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
+        Preconditions.checkState(leadBroker != null, "cannot find lead broker");
+        final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
+        long streamOffset = ii.getStreamOffsets().get(partitionId);
+        if (streamOffset < earliestOffset) {
+            streamOffset = earliestOffset;
+        }
+
+
+        KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
+            @Override
+            protected void consume(long offset, ByteBuffer payload) throws Exception {
+                byte[] bytes = new byte[payload.limit()];
+                payload.get(bytes);
+                getStreamQueue().put(new Stream(offset, bytes));
+            }
+        };
+        final IIDesc desc = ii.getDescriptor();
+        Executors.newSingleThreadExecutor().execute(consumer);
+        while (true) {
+            final Stream stream = consumer.getStreamQueue().poll();
+            if (stream != null) {
+                System.out.println("offset:" + stream.getOffset() + " content:" + new String(stream.getRawData()));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
new file mode 100644
index 0000000..70290f1
--- /dev/null
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
@@ -0,0 +1,70 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.streaming;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingCLI {
+
+    private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
+
+    public static void main(String[] args) {
+        try {
+            if (args.length < 2) {
+                printArgsError(args);
+                return;
+            }
+            if (args[0].equals("start")) {
+                String kafkaConfName = args[1];
+                StreamingBootstrap.startStreaming(kafkaConfName, 0);
+            } else if (args.equals("stop")) {
+
+            } else {
+                printArgsError(args);
+            }
+        } catch (Exception e) {
+        }
+    }
+
+    private static void printArgsError(String[] args) {
+        logger.warn("invalid args:" + StringUtils.join(args, " "));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a1c4cb6/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java b/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
index c824c48..337dfc7 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/KafkaConsumerTest.java
@@ -90,7 +90,7 @@ public class KafkaConsumerTest extends KafkaBaseTest {
         final ExecutorService executorService = Executors.newFixedThreadPool(kafkaTopicMeta.getPartitionIds().size());
         List<BlockingQueue<Stream>> queues = Lists.newArrayList();
         for (Integer partitionId : kafkaTopicMeta.getPartitionIds()) {
-            KafkaConsumer consumer = new KafkaConsumer(kafkaTopicMeta.getName(), partitionId, kafkaConfig.getBrokers(), kafkaConfig) {
+            KafkaConsumer consumer = new KafkaConsumer(kafkaTopicMeta.getName(), partitionId, 0, kafkaConfig.getBrokers(), kafkaConfig) {
                 @Override
                 protected void consume(long offset, ByteBuffer payload) throws Exception {
                     //TODO use ByteBuffer maybe


[50/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Conflicts:
	invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/1ad30104
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/1ad30104
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/1ad30104

Branch: refs/heads/streaming-localdict
Commit: 1ad301044e7ae2de2f05d55b98f54e7006de71f1
Parents: 48a7971 c043b85
Author: liyang@apache.org <ya...@D-SHC-00801746.corp.ebay.com>
Authored: Fri Mar 27 14:07:40 2015 +0100
Committer: liyang@apache.org <ya...@D-SHC-00801746.corp.ebay.com>
Committed: Fri Mar 27 14:07:40 2015 +0100

----------------------------------------------------------------------
 bin/kylin.sh                                    |  26 ++
 .../apache/kylin/common/util/FIFOIterable.java  |  20 ++
 .../apache/kylin/common/util/FIFOIterator.java  |  34 +++
 .../org/apache/kylin/common/util/BasicTest.java |  12 +-
 .../test_kylin_cube_with_slr_desc.json          |   2 +-
 .../invertedindex/index/RawTableRecord.java     |   2 +
 .../invertedindex/index/TableRecordInfo.java    |  10 +-
 .../kylin/invertedindex/model/IIDesc.java       |   1 +
 .../model/IIKeyValueCodecWithState.java         |  24 +-
 .../apache/kylin/invertedindex/model/IIRow.java |  10 +
 .../apache/kylin/job/cube/CubingJobBuilder.java |   2 -
 .../kylin/job/hadoop/cube/BaseCuboidJob.java    |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java | 242 -------------------
 .../job/hadoop/cube/BaseCuboidMapperBase.java   | 205 ++++++++++++++++
 .../cube/FactDistinctHiveColumnsMapper.java     |  16 +-
 .../cube/FactDistinctIIColumnsMapper.java       |  28 +--
 .../job/hadoop/cube/HiveToBaseCuboidMapper.java |  49 ++++
 .../job/hadoop/cube/IIToBaseCuboidMapper.java   | 109 +++++++++
 .../kylin/job/hadoop/cubev2/InMemCuboidJob.java |   5 -
 .../kylin/job/streaming/StreamingBootstrap.java |  45 +++-
 .../kylin/job/streaming/StreamingCLI.java       |   6 +-
 .../kylin/job/BuildCubeWithEngineTest.java      |   8 +-
 .../apache/kylin/job/BuildIIWithStreamTest.java |  30 +--
 .../apache/kylin/job/IIStreamBuilderTest.java   |  48 +++-
 .../cube/BaseCuboidMapperPerformanceTest.java   |  65 -----
 .../job/hadoop/cube/BaseCuboidMapperTest.java   | 145 -----------
 .../HiveToBaseCuboidMapperPerformanceTest.java  |  65 +++++
 .../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
 .../job/hadoop/invertedindex/II2CubeTest.java   | 138 +++++++++++
 streaming/pom.xml                               |   7 +
 .../kylin/streaming/JsonStreamParser.java       |  12 +-
 .../apache/kylin/streaming/KafkaConsumer.java   |  12 +-
 .../java/org/apache/kylin/streaming/Stream.java |   2 +
 .../apache/kylin/streaming/StreamParser.java    |   4 +-
 .../kylin/streaming/StringStreamParser.java     |   2 +-
 .../kylin/streaming/cube/CubeStreamBuilder.java |  22 +-
 .../invertedindex/IIStreamBuilder.java          | 107 +++-----
 .../streaming/invertedindex/SliceBuilder.java   | 126 ++++++++++
 .../invertedindex/PrintOutStreamBuilder.java    |   5 +-
 39 files changed, 1136 insertions(+), 657 deletions(-)
----------------------------------------------------------------------



[27/50] incubator-kylin git commit: fix compile with level 1.6

Posted by li...@apache.org.
fix compile with level 1.6


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/bbbcae8f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/bbbcae8f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/bbbcae8f

Branch: refs/heads/streaming-localdict
Commit: bbbcae8f6730540e615764533c32a7c12693d5a0
Parents: 71bbd0c
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 14:02:44 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 14:02:44 2015 +0800

----------------------------------------------------------------------
 .../kylin/streaming/cube/CubeStreamBuilder.java |   4 +-
 .../kylin/streaming/EternalStreamProducer.java  |   2 +-
 .../Nous/NousEternalStreamProducer.java         |  46 --------
 .../kylin/streaming/Nous/NousMessage.java       | 118 -------------------
 .../kylin/streaming/Nous/NousMessageTest.java   |  31 -----
 .../kylin/streaming/OneOffStreamProducer.java   |   5 +-
 .../nous/NousEternalStreamProducer.java         |  46 ++++++++
 .../kylin/streaming/nous/NousMessage.java       | 118 +++++++++++++++++++
 .../kylin/streaming/nous/NousMessageTest.java   |  31 +++++
 9 files changed, 202 insertions(+), 199 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 912c3cd..9554797 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -178,7 +178,7 @@ public class CubeStreamBuilder extends StreamBuilder {
     }
 
     private void outputGT(GridTable gridTable) throws IOException {
-        GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, null, null);
+        GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, null);
         IGTScanner scanner = gridTable.scan(req);
         for (GTRecord record : scanner) {
             logger.debug(record.toString());
@@ -248,7 +248,7 @@ public class CubeStreamBuilder extends StreamBuilder {
     }
 
     private GridTable scanAndAggregateGridTable(GridTable gridTable, long cuboidId, BitSet aggregationColumns, BitSet measureColumns) throws IOException {
-        GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, null, aggregationColumns, measureColumns, metricsAggrFuncs, null);
+        GTScanRequest req = new GTScanRequest(gridTable.getInfo(), null, aggregationColumns, measureColumns, metricsAggrFuncs, null);
         IGTScanner scanner = gridTable.scan(req);
         GridTable newGridTable = newGridTableByCuboidID(cuboidId);
         GTBuilder builder = newGridTable.rebuild();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
index 7406c4c..07660d3 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/EternalStreamProducer.java
@@ -67,7 +67,7 @@ public class EternalStreamProducer {
         scheduledExecutorService.scheduleAtFixedRate(new Thread(new Runnable() {
             @Override
             public void run() {
-                final KeyedMessage<String, String> message = new KeyedMessage<>(kafkaConfig.getTopic(), getOneMessage());
+                final KeyedMessage<String, String> message = new KeyedMessage<String, String>(kafkaConfig.getTopic(), getOneMessage());
                 producer.send(message);
                 try {
                     Thread.sleep(100);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
deleted file mode 100644
index a93128f..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousEternalStreamProducer.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import java.util.Calendar;
-import java.util.Random;
-
-import org.apache.commons.lang3.RandomStringUtils;
-import org.apache.kylin.common.util.JsonUtil;
-import org.apache.kylin.streaming.EternalStreamProducer;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- */
-public class NousEternalStreamProducer extends EternalStreamProducer {
-
-    /**
-     * @param frequency records added per second, 100 for recommendation
-     */
-    public NousEternalStreamProducer(int frequency) {
-        super(frequency);
-    }
-
-    @Override
-    protected String getOneMessage() {
-
-        Calendar currentTime = Calendar.getInstance();
-        Calendar minuteStart = Calendar.getInstance();
-        Calendar hourStart = Calendar.getInstance();
-
-        currentTime.setTimeInMillis(System.currentTimeMillis());
-        minuteStart.clear();
-        hourStart.clear();
-
-        minuteStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), currentTime.get(Calendar.MINUTE));
-        hourStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), 0);
-
-        Random r = new Random();
-        NousMessage temp = new NousMessage(minuteStart.getTimeInMillis(), hourStart.getTimeInMillis(), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), r.nextInt(5), r.nextDouble() * 100, r.nextInt(2));
-        try {
-            return JsonUtil.writeValueAsIndentString(temp);
-        } catch (JsonProcessingException e) {
-            return "";
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
deleted file mode 100644
index 3606514..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessage.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- *
- * The kafka message format for Nous
- */
-@JsonAutoDetect
-public class NousMessage {
-    private long minute_start;
-    private long hour_start;
-    private String itm;
-    private String t;
-    private String sid;
-    private String p;
-    private String m;
-    private long click;
-    private double gmv;
-    private long qty;
-
-    public NousMessage() {
-    }
-
-    public NousMessage(long minute_start, long hour_start, String itm, String t, String sid, String p, String m, long click, double gmv, long qty) {
-        this.minute_start = minute_start;
-        this.hour_start = hour_start;
-        this.itm = itm;
-        this.t = t;
-        this.sid = sid;
-        this.p = p;
-        this.m = m;
-        this.click = click;
-        this.gmv = gmv;
-        this.qty = qty;
-    }
-
-    public long getMinute_start() {
-        return minute_start;
-    }
-
-    public void setMinute_start(long minute_start) {
-        this.minute_start = minute_start;
-    }
-
-    public long getHour_start() {
-        return hour_start;
-    }
-
-    public void setHour_start(long hour_start) {
-        this.hour_start = hour_start;
-    }
-
-    public String getItm() {
-        return itm;
-    }
-
-    public void setItm(String itm) {
-        this.itm = itm;
-    }
-
-    public String getT() {
-        return t;
-    }
-
-    public void setT(String t) {
-        this.t = t;
-    }
-
-    public String getSid() {
-        return sid;
-    }
-
-    public void setSid(String sid) {
-        this.sid = sid;
-    }
-
-    public String getP() {
-        return p;
-    }
-
-    public void setP(String p) {
-        this.p = p;
-    }
-
-    public String getM() {
-        return m;
-    }
-
-    public void setM(String m) {
-        this.m = m;
-    }
-
-    public long getClick() {
-        return click;
-    }
-
-    public void setClick(long click) {
-        this.click = click;
-    }
-
-    public double getGmv() {
-        return gmv;
-    }
-
-    public void setGmv(double gmv) {
-        this.gmv = gmv;
-    }
-
-    public long getQty() {
-        return qty;
-    }
-
-    public void setQty(long qty) {
-        this.qty = qty;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java b/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
deleted file mode 100644
index 6cfc8f5..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/Nous/NousMessageTest.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package org.apache.kylin.streaming.nous;
-
-import java.io.IOException;
-
-import org.apache.kylin.common.util.JsonUtil;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/16/15.
- */
-public class NousMessageTest {
-    @Test
-    public void testJson() throws IOException {
-        NousMessage a = new NousMessage(1, 2, "a", "b", "c", "d", "e", 100, 200.0, 300);
-        String x = JsonUtil.writeValueAsIndentString(a);
-        NousMessage b = JsonUtil.readValue(x, NousMessage.class);
-        assertEquals(100, b.getClick());
-    }
-
-    @Ignore("disable this producer since it will make number of messages in a topic agnostic ")
-    @Test
-    public void testProducer() throws IOException, InterruptedException {
-        NousEternalStreamProducer p = new NousEternalStreamProducer(10);
-        p.start();
-        Thread.sleep(5000);
-        p.stop();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
index fbcf0a5..1f45cdb 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/OneOffStreamProducer.java
@@ -36,16 +36,19 @@ package org.apache.kylin.streaming;
 
 import com.google.common.base.Function;
 import com.google.common.collect.Iterators;
+
 import kafka.cluster.Broker;
 import kafka.javaapi.producer.Producer;
 import kafka.producer.KeyedMessage;
 import kafka.producer.ProducerConfig;
+
 import org.apache.commons.lang3.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.util.Properties;
 
@@ -87,7 +90,7 @@ public class OneOffStreamProducer {
             public void run() {
                 int count = 0;
                 while (!stopped && count < sendCount) {
-                    final KeyedMessage<String, String> message = new KeyedMessage<>(kafkaConfig.getTopic(), "current time is:" + System.currentTimeMillis());
+                    final KeyedMessage<String, String> message = new KeyedMessage<String, String>(kafkaConfig.getTopic(), "current time is:" + System.currentTimeMillis());
                     producer.send(message);
                     count++;
                     try {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
new file mode 100644
index 0000000..a93128f
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousEternalStreamProducer.java
@@ -0,0 +1,46 @@
+package org.apache.kylin.streaming.nous;
+
+import java.util.Calendar;
+import java.util.Random;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.streaming.EternalStreamProducer;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ */
+public class NousEternalStreamProducer extends EternalStreamProducer {
+
+    /**
+     * @param frequency records added per second, 100 for recommendation
+     */
+    public NousEternalStreamProducer(int frequency) {
+        super(frequency);
+    }
+
+    @Override
+    protected String getOneMessage() {
+
+        Calendar currentTime = Calendar.getInstance();
+        Calendar minuteStart = Calendar.getInstance();
+        Calendar hourStart = Calendar.getInstance();
+
+        currentTime.setTimeInMillis(System.currentTimeMillis());
+        minuteStart.clear();
+        hourStart.clear();
+
+        minuteStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), currentTime.get(Calendar.MINUTE));
+        hourStart.set(currentTime.get(Calendar.YEAR), currentTime.get(Calendar.MONTH), currentTime.get(Calendar.DAY_OF_MONTH), currentTime.get(Calendar.HOUR_OF_DAY), 0);
+
+        Random r = new Random();
+        NousMessage temp = new NousMessage(minuteStart.getTimeInMillis(), hourStart.getTimeInMillis(), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), RandomStringUtils.randomAlphabetic(1), r.nextInt(5), r.nextDouble() * 100, r.nextInt(2));
+        try {
+            return JsonUtil.writeValueAsIndentString(temp);
+        } catch (JsonProcessingException e) {
+            return "";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
new file mode 100644
index 0000000..3606514
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessage.java
@@ -0,0 +1,118 @@
+package org.apache.kylin.streaming.nous;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ *
+ * The kafka message format for Nous
+ */
+@JsonAutoDetect
+public class NousMessage {
+    private long minute_start;
+    private long hour_start;
+    private String itm;
+    private String t;
+    private String sid;
+    private String p;
+    private String m;
+    private long click;
+    private double gmv;
+    private long qty;
+
+    public NousMessage() {
+    }
+
+    public NousMessage(long minute_start, long hour_start, String itm, String t, String sid, String p, String m, long click, double gmv, long qty) {
+        this.minute_start = minute_start;
+        this.hour_start = hour_start;
+        this.itm = itm;
+        this.t = t;
+        this.sid = sid;
+        this.p = p;
+        this.m = m;
+        this.click = click;
+        this.gmv = gmv;
+        this.qty = qty;
+    }
+
+    public long getMinute_start() {
+        return minute_start;
+    }
+
+    public void setMinute_start(long minute_start) {
+        this.minute_start = minute_start;
+    }
+
+    public long getHour_start() {
+        return hour_start;
+    }
+
+    public void setHour_start(long hour_start) {
+        this.hour_start = hour_start;
+    }
+
+    public String getItm() {
+        return itm;
+    }
+
+    public void setItm(String itm) {
+        this.itm = itm;
+    }
+
+    public String getT() {
+        return t;
+    }
+
+    public void setT(String t) {
+        this.t = t;
+    }
+
+    public String getSid() {
+        return sid;
+    }
+
+    public void setSid(String sid) {
+        this.sid = sid;
+    }
+
+    public String getP() {
+        return p;
+    }
+
+    public void setP(String p) {
+        this.p = p;
+    }
+
+    public String getM() {
+        return m;
+    }
+
+    public void setM(String m) {
+        this.m = m;
+    }
+
+    public long getClick() {
+        return click;
+    }
+
+    public void setClick(long click) {
+        this.click = click;
+    }
+
+    public double getGmv() {
+        return gmv;
+    }
+
+    public void setGmv(double gmv) {
+        this.gmv = gmv;
+    }
+
+    public long getQty() {
+        return qty;
+    }
+
+    public void setQty(long qty) {
+        this.qty = qty;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/bbbcae8f/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
new file mode 100644
index 0000000..6cfc8f5
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/nous/NousMessageTest.java
@@ -0,0 +1,31 @@
+package org.apache.kylin.streaming.nous;
+
+import java.io.IOException;
+
+import org.apache.kylin.common.util.JsonUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/16/15.
+ */
+public class NousMessageTest {
+    @Test
+    public void testJson() throws IOException {
+        NousMessage a = new NousMessage(1, 2, "a", "b", "c", "d", "e", 100, 200.0, 300);
+        String x = JsonUtil.writeValueAsIndentString(a);
+        NousMessage b = JsonUtil.readValue(x, NousMessage.class);
+        assertEquals(100, b.getClick());
+    }
+
+    @Ignore("disable this producer since it will make number of messages in a topic agnostic ")
+    @Test
+    public void testProducer() throws IOException, InterruptedException {
+        NousEternalStreamProducer p = new NousEternalStreamProducer(10);
+        p.start();
+        Thread.sleep(5000);
+        p.stop();
+    }
+}


[47/50] incubator-kylin git commit: KYLIN-625, filter convert pass

Posted by li...@apache.org.
KYLIN-625, filter convert pass


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d7fc2312
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d7fc2312
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d7fc2312

Branch: refs/heads/streaming-localdict
Commit: d7fc2312c4c800cb2fec5264b6997feda6527521
Parents: 24acccc
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 18:26:39 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 18:26:39 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/util/ByteArray.java |   9 +-
 .../metadata/filter/ColumnTupleFilter.java      |   6 +-
 .../metadata/filter/CompareTupleFilter.java     |   2 +-
 .../metadata/filter/ConstantTupleFilter.java    |   4 +-
 .../metadata/filter/LogicalTupleFilter.java     |   2 +-
 .../metadata/serializer/DataTypeSerializer.java |   3 +-
 .../gridtable/GTDictionaryCodeSystem.java       |  47 +++-
 .../kylin/storage/gridtable/GTRecord.java       |  29 ++-
 .../storage/gridtable/GTSampleCodeSystem.java   |   5 +-
 .../kylin/storage/gridtable/GTScanRange.java    |  11 +-
 .../kylin/storage/gridtable/GTScanRequest.java  |   8 +-
 .../apache/kylin/storage/gridtable/GTUtil.java  |  35 ++-
 .../storage/gridtable/DictGridTableTest.java    | 214 +++++++++++++++++++
 .../storage/gridtable/GTInvertedIndexTest.java  | 165 --------------
 .../kylin/storage/gridtable/GridTableTest.java  | 208 ------------------
 .../storage/gridtable/SimpleGridTableTest.java  | 208 ++++++++++++++++++
 .../gridtable/SimpleInvertedIndexTest.java      | 165 ++++++++++++++
 17 files changed, 690 insertions(+), 431 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
index d09b350..8c6ae91 100644
--- a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
+++ b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
@@ -30,7 +30,7 @@ public class ByteArray implements Comparable<ByteArray> {
     public static ByteArray allocate(int length) {
         return new ByteArray(new byte[length]);
     }
-    
+
     public static ByteArray copyOf(byte[] array, int offset, int length) {
         byte[] space = new byte[length];
         System.arraycopy(array, offset, space, 0, length);
@@ -52,7 +52,7 @@ public class ByteArray implements Comparable<ByteArray> {
     }
 
     public ByteArray(byte[] data) {
-        set(data, 0, data.length);
+        set(data, 0, data == null ? 0 : data.length);
     }
 
     public ByteArray(byte[] data, int offset, int length) {
@@ -148,7 +148,10 @@ public class ByteArray implements Comparable<ByteArray> {
 
     @Override
     public String toString() {
-        return Bytes.toString(data, offset, length);
+        if (data == null)
+            return null;
+        else
+            return Bytes.toStringBinary(data, offset, length);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
index f689ccb..fde41b1 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/ColumnTupleFilter.java
@@ -63,7 +63,7 @@ public class ColumnTupleFilter extends TupleFilter {
 
     @Override
     public String toString() {
-        return "ColumnFilter [column=" + columnRef + "]";
+        return "" + columnRef;
     }
 
     @Override
@@ -79,7 +79,7 @@ public class ColumnTupleFilter extends TupleFilter {
 
     @Override
     public Collection<?> getValues() {
-        this.values.set(0, (String) this.tupleValue);
+        this.values.set(0, this.tupleValue);
         return this.values;
     }
 
@@ -114,7 +114,7 @@ public class ColumnTupleFilter extends TupleFilter {
             table = new TableDesc();
             table.setName(tableName);
         }
-        
+
         column.setId(BytesUtil.readUTFString(buffer));
         column.setName(BytesUtil.readUTFString(buffer));
         column.setDatatype(BytesUtil.readUTFString(buffer));

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
index 2b68469..57b50b7 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java
@@ -126,7 +126,7 @@ public class CompareTupleFilter extends TupleFilter {
 
     @Override
     public String toString() {
-        return "CompareFilter [" + column + " " + operator + " " + conditionValues + ", children=" + children + "]";
+        return column + " " + operator + " " + conditionValues;
     }
 
     // TODO requires generalize, currently only evaluates COLUMN {op} CONST

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
index f372b4a..cc3add2 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/ConstantTupleFilter.java
@@ -34,7 +34,7 @@ import org.apache.kylin.metadata.tuple.IEvaluatableTuple;
 public class ConstantTupleFilter extends TupleFilter {
 
     public static final ConstantTupleFilter FALSE = new ConstantTupleFilter();
-    public static final ConstantTupleFilter TRUE = new ConstantTupleFilter("TRUE");
+    public static final ConstantTupleFilter TRUE = new ConstantTupleFilter((Object) null); // not sure of underlying code system, null is the only value that applies to all types
 
     private Collection<Object> constantValues;
 
@@ -60,7 +60,7 @@ public class ConstantTupleFilter extends TupleFilter {
 
     @Override
     public String toString() {
-        return "ConstantFilter [constant=" + constantValues + "]";
+        return "" + constantValues;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java b/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
index 1844392..4d38565 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/filter/LogicalTupleFilter.java
@@ -67,7 +67,7 @@ public class LogicalTupleFilter extends TupleFilter {
 
     @Override
     public String toString() {
-        return "LogicalFilter [operator=" + operator + ", children=" + children + "]";
+        return operator + " " + children;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
index 63d4ddd..aafb1c2 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
@@ -70,9 +70,10 @@ abstract public class DataTypeSerializer<T> implements BytesSerializer<T> {
     /** peek into buffer and return the length of serialization */
     abstract public int peekLength(ByteBuffer in);
     
-    /** convert from String to obj */
+    /** convert from String to obj (string often come as byte[] in mapred) */
     abstract public T valueOf(byte[] value);
     
+    /** convert from String to obj */
     public T valueOf(String value) {
         try {
             return valueOf(value.getBytes("UTF-8"));

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index ada4ed7..c94c604 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -2,10 +2,13 @@ package org.apache.kylin.storage.gridtable;
 
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.metadata.filter.IFilterCodeSystem;
 import org.apache.kylin.metadata.measure.MeasureAggregator;
 import org.apache.kylin.metadata.serializer.DataTypeSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.nio.ByteBuffer;
 import java.util.Map;
@@ -15,13 +18,15 @@ import java.util.Map;
  */
 @SuppressWarnings({ "rawtypes", "unchecked" })
 public class GTDictionaryCodeSystem implements IGTCodeSystem {
+    private static final Logger logger = LoggerFactory.getLogger(CubeManager.class);
+
     private GTInfo info;
-    private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
+    private Map<Integer, Dictionary> dictionaryMap = null; // key: column index; value: dictionary for this column;
     private IFilterCodeSystem<ByteArray> filterCS;
     private DataTypeSerializer[] serializers;
 
-    public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
-        this.dictionaryMaps = dictionaryMaps;
+    public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMap) {
+        this.dictionaryMap = dictionaryMap;
     }
 
     @Override
@@ -30,8 +35,8 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
 
         serializers = new DataTypeSerializer[info.nColumns];
         for (int i = 0; i < info.nColumns; i++) {
-            if (dictionaryMaps.get(i) != null) {
-                serializers[i] = new DictionarySerializer(dictionaryMaps.get(i));
+            if (dictionaryMap.get(i) != null) {
+                serializers[i] = new DictionarySerializer(dictionaryMap.get(i));
             } else {
                 serializers[i] = DataTypeSerializer.create(info.colTypes[i]);
             }
@@ -56,7 +61,10 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
 
             @Override
             public void serialize(ByteArray code, ByteBuffer buffer) {
-                BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
+                if (code == null)
+                    BytesUtil.writeByteArray(null, 0, 0, buffer);
+                else
+                    BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
             }
 
             @Override
@@ -78,16 +86,33 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
 
     @Override
     public void encodeColumnValue(int col, Object value, ByteBuffer buf) {
-        serializers[col].serialize(value, buf);
+        encodeColumnValue(col, value, 0, buf);
     }
 
     @Override
     public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
+        // this is a bit too complicated, but encoding only happens once at build time, so it is OK
         DataTypeSerializer serializer = serializers[col];
-        if (serializer instanceof DictionarySerializer) {
-            ((DictionarySerializer) serializer).serializeWithRounding(value,  roundingFlag, buf);
-        } else {
-            serializer.serialize(value,  buf);
+        try {
+            if (serializer instanceof DictionarySerializer) {
+                ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf);
+            } else {
+                serializer.serialize(value, buf);
+            }
+        } catch (ClassCastException ex) {
+            // try convert string into a correct object type
+            try {
+                if (value instanceof String) {
+                    Object converted = serializer.valueOf((String) value);
+                    if ((converted instanceof String) == false) {
+                        encodeColumnValue(col, converted, roundingFlag, buf);
+                        return;
+                    }
+                }
+            } catch (Throwable e) {
+                logger.error("Fail to encode value '" + value + "'", e);
+            }
+            throw ex;
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
index 605a469..aeefc2b 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
@@ -21,7 +21,7 @@ public class GTRecord implements Comparable<GTRecord> {
             this.cols[i] = new ByteArray();
         this.maskForEqualHashComp = info.colAll;
     }
-    
+
     public ByteArray get(int i) {
         return cols[i];
     }
@@ -51,16 +51,17 @@ public class GTRecord implements Comparable<GTRecord> {
 
     /** decode and return the values of this record */
     public Object[] getValues() {
-        return getValues(new Object[info.nColumns]);
+        return getValues(info.colAll, new Object[info.nColumns]);
     }
 
     /** decode and return the values of this record */
-    public Object[] getValues(Object[] result) {
-        for (int i = 0; i < info.nColumns; i++) {
-            if (cols[i].array() == null)
+    public Object[] getValues(BitSet selectedColumns, Object[] result) {
+        assert selectedColumns.cardinality() <= result.length;
+        for (int i = 0, c = selectedColumns.nextSetBit(0); c >= 0; i++, c = selectedColumns.nextSetBit(c + 1)) {
+            if (cols[c].array() == null)
                 result[i] = null;
             else
-                result[i] = info.codeSystem.decodeColumnValue(i, cols[i].asBuffer());
+                result[i] = info.codeSystem.decodeColumnValue(c, cols[c].asBuffer());
         }
         return result;
     }
@@ -92,11 +93,11 @@ public class GTRecord implements Comparable<GTRecord> {
     public BitSet maskForEqualHashComp() {
         return maskForEqualHashComp;
     }
-    
+
     public void maskForEqualHashComp(BitSet set) {
         this.maskForEqualHashComp = set;
     }
-    
+
     @Override
     public boolean equals(Object obj) {
         if (this == obj)
@@ -132,7 +133,7 @@ public class GTRecord implements Comparable<GTRecord> {
         assert this.info == o.info;
         assert this.maskForEqualHashComp == o.maskForEqualHashComp; // reference equal for performance
         IFilterCodeSystem<ByteArray> cs = info.codeSystem.getFilterCodeSystem();
-        
+
         int comp = 0;
         for (int i = maskForEqualHashComp.nextSetBit(0); i >= 0; i = maskForEqualHashComp.nextSetBit(i + 1)) {
             comp = cs.compare(cols[i], o.cols[i]);
@@ -141,10 +142,16 @@ public class GTRecord implements Comparable<GTRecord> {
         }
         return comp;
     }
-    
+
     @Override
     public String toString() {
-        return Arrays.toString(getValues());
+        return toString(info.colAll);
+    }
+    
+    public String toString(BitSet selectedColumns) {
+        Object[] values = new Object[selectedColumns.cardinality()];
+        getValues(selectedColumns, values);
+        return Arrays.toString(values);
     }
 
     // ============================================================================

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
index 083d8c2..aea4e49 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
@@ -54,7 +54,10 @@ public class GTSampleCodeSystem implements IGTCodeSystem {
 
             @Override
             public void serialize(ByteArray code, ByteBuffer buffer) {
-                BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
+                if (code == null)
+                    BytesUtil.writeByteArray(null, 0, 0, buffer);
+                else
+                    BytesUtil.writeByteArray(code.array(), code.offset(), code.length(), buffer);
             }
 
             @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
index 08513f7..b09a01d 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRange.java
@@ -14,12 +14,9 @@ public class GTScanRange {
     }
 
     public GTScanRange(GTRecord pkStart, GTRecord pkEnd, List<GTRecord> hbaseFuzzyKeys) {
-        assert pkStart.info == pkEnd.info;
-        assert pkStart.maskForEqualHashComp() == pkStart.info.primaryKey;
-        assert pkEnd.maskForEqualHashComp() == pkEnd.info.primaryKey;
         this.pkStart = pkStart;
         this.pkEnd = pkEnd;
-        this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord>emptyList() : hbaseFuzzyKeys;
+        this.hbaseFuzzyKeys = hbaseFuzzyKeys == null ? Collections.<GTRecord> emptyList() : hbaseFuzzyKeys;
     }
 
     @Override
@@ -58,4 +55,10 @@ public class GTScanRange {
             return false;
         return true;
     }
+
+    @Override
+    public String toString() {
+        return (pkStart == null ? "null" : pkStart.toString(pkStart.info.primaryKey)) //
+                + "-" + (pkEnd == null ? "null" : pkEnd.toString(pkEnd.info.primaryKey));
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
index c92cba4..b71032c 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTScanRequest.java
@@ -1,5 +1,6 @@
 package org.apache.kylin.storage.gridtable;
 
+import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Set;
 
@@ -87,7 +88,7 @@ public class GTScanRequest {
         // un-evaluatable filter must be removed
         if (TupleFilter.isEvaluableRecursively(filterPushDown) == false) {
             Set<TblColRef> unevaluableColumns = Sets.newHashSet();
-            filterPushDown = GTUtil.convertFilterUnevaluatable(filterPushDown, unevaluableColumns);
+            filterPushDown = GTUtil.convertFilterUnevaluatable(filterPushDown, info, unevaluableColumns);
 
             // columns in un-evaluatable filter must be returned without loss so upper layer can do final evaluation
             if (hasAggregation()) {
@@ -138,4 +139,9 @@ public class GTScanRequest {
         return aggrMetricsFuncs;
     }
 
+    @Override
+    public String toString() {
+        return "GTScanRequest [range=" + range + ", columns=" + columns + ", filterPushDown=" + filterPushDown + ", aggrGroupBy=" + aggrGroupBy + ", aggrMetrics=" + aggrMetrics + ", aggrMetricsFuncs=" + Arrays.toString(aggrMetricsFuncs) + "]";
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index 1fb0376..7d042eb 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -34,18 +34,18 @@ public class GTUtil {
         return new TblColRef(desc);
     }
 
-    public static TupleFilter convertFilterUnevaluatable(TupleFilter rootFilter, //
-            final Set<TblColRef> unevaluatableColumnCollector) {
-        return convertFilter(rootFilter, null, null, false, unevaluatableColumnCollector);
+    public static TupleFilter convertFilterUnevaluatable(TupleFilter rootFilter, GTInfo info, //
+            Set<TblColRef> unevaluatableColumnCollector) {
+        return convertFilter(rootFilter, info, null, false, unevaluatableColumnCollector);
     }
 
-    public static TupleFilter convertFilterConstants(TupleFilter rootFilter, final GTInfo info) {
+    public static TupleFilter convertFilterConstants(TupleFilter rootFilter, GTInfo info) {
         return convertFilter(rootFilter, info, null, true, null);
     }
 
-    public static TupleFilter convertFilterColumnsAndConstants(TupleFilter rootFilter, final GTInfo info, //
-            final Map<TblColRef, Integer> colMapping, //
-            final Set<TblColRef> unevaluatableColumnCollector) {
+    public static TupleFilter convertFilterColumnsAndConstants(TupleFilter rootFilter, GTInfo info, //
+            Map<TblColRef, Integer> colMapping, //
+            Set<TblColRef> unevaluatableColumnCollector) {
         return convertFilter(rootFilter, info, colMapping, true, unevaluatableColumnCollector);
     }
 
@@ -68,6 +68,12 @@ public class GTUtil {
                     return ConstantTupleFilter.TRUE;
                 }
 
+                // shortcut for unEvaluatable filter
+                if (filter.isEvaluable() == false) {
+                    TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
+                    return ConstantTupleFilter.TRUE;
+                }
+
                 // map to column onto grid table
                 if (colMapping != null && filter instanceof ColumnTupleFilter) {
                     ColumnTupleFilter colFilter = (ColumnTupleFilter) filter;
@@ -75,18 +81,9 @@ public class GTUtil {
                     return new ColumnTupleFilter(info.colRef(gtColIdx));
                 }
 
-                // below consider compare filter only
-                if (filter instanceof CompareTupleFilter) {
-
-                    // shortcut for unEvaluatable compare filter
-                    if (TupleFilter.isEvaluableRecursively(filter) == false) {
-                        TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
-                        return ConstantTupleFilter.TRUE;
-                    }
-
-                    if (encodeConstants) {
-                        return encodeConstants((CompareTupleFilter) filter);
-                    }
+                // encode constants
+                if (encodeConstants && filter instanceof CompareTupleFilter) {
+                    return encodeConstants((CompareTupleFilter) filter);
                 }
 
                 return filter;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
new file mode 100644
index 0000000..46ec66c
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
@@ -0,0 +1,214 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.BitSet;
+import java.util.Map;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.dict.NumberDictionaryBuilder;
+import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.dict.TrieDictionaryBuilder;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.ExtractTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.storage.gridtable.GTInfo.Builder;
+import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+public class DictGridTableTest {
+
+    @Test
+    public void test() throws IOException {
+        GridTable table = newTestTable();
+        verifyFirstRow(table);
+        verifyScanWithUnevaluatableFilter(table);
+        verifyScanWithEvaluatableFilter(table);
+    }
+
+    private void verifyFirstRow(GridTable table) throws IOException {
+        doScanAndVerify(table, new GTScanRequest(table.getInfo()), "[1421193600000, 30, Yang, 10, 10.5]");
+    }
+
+    private void verifyScanWithUnevaluatableFilter(GridTable table) throws IOException {
+        GTInfo info = table.getInfo();
+
+        CompareTupleFilter fcomp = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+        ExtractTupleFilter funevaluatable = unevaluatable(info.colRef(1));
+        LogicalTupleFilter filter = and(fcomp, funevaluatable);
+
+        GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+        // note the unEvaluatable column 1 in filter is added to group by
+        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+        
+        doScanAndVerify(table, req, "[1421280000000, 20, null, 20, null]");
+    }
+    
+    private void verifyScanWithEvaluatableFilter(GridTable table) throws IOException {
+        GTInfo info = table.getInfo();
+
+        CompareTupleFilter fcomp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14"));
+        CompareTupleFilter fcomp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10"));
+        LogicalTupleFilter filter = and(fcomp1, fcomp2);
+
+        GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+        // note the evaluatable column 1 in filter is added to returned columns but not in group by
+        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.2 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+        
+        doScanAndVerify(table, req, "[1421280000000, 30, null, 30, null]", "[1421366400000, 20, null, 40, null]");
+    }
+
+    private void doScanAndVerify(GridTable table, GTScanRequest req, String... verifyRows) throws IOException {
+        System.out.println(req);
+        IGTScanner scanner = table.scan(req);
+        int i = 0;
+        for (GTRecord r : scanner) {
+            System.out.println(r);
+            if (verifyRows != null && i < verifyRows.length) {
+                assertEquals(verifyRows[i], r.toString());
+            }
+            i++;
+        }
+        scanner.close();
+    }
+
+    private Object enc(GTInfo info, int col, String value) {
+        ByteBuffer buf = ByteBuffer.allocate(info.maxRecordLength);
+        info.codeSystem.encodeColumnValue(col, value, buf);
+        return ByteArray.copyOf(buf.array(), buf.arrayOffset(), buf.position());
+    }
+
+    private ExtractTupleFilter unevaluatable(TblColRef col) {
+        ExtractTupleFilter r = new ExtractTupleFilter(FilterOperatorEnum.EXTRACT);
+        r.addChild(new ColumnTupleFilter(col));
+        return r;
+    }
+
+    private CompareTupleFilter compare(TblColRef col, FilterOperatorEnum op, Object value) {
+        CompareTupleFilter result = new CompareTupleFilter(op);
+        result.addChild(new ColumnTupleFilter(col));
+        result.addChild(new ConstantTupleFilter(value));
+        return result;
+    }
+
+    private LogicalTupleFilter and(TupleFilter... children) {
+        return logic(FilterOperatorEnum.AND, children);
+    }
+
+    private LogicalTupleFilter or(TupleFilter... children) {
+        return logic(FilterOperatorEnum.AND, children);
+    }
+
+    private LogicalTupleFilter not(TupleFilter child) {
+        return logic(FilterOperatorEnum.AND, child);
+    }
+
+    private LogicalTupleFilter logic(FilterOperatorEnum op, TupleFilter... children) {
+        LogicalTupleFilter result = new LogicalTupleFilter(op);
+        for (TupleFilter c : children) {
+            result.addChild(c);
+        }
+        return result;
+    }
+
+    static GridTable newTestTable() throws IOException {
+        GTInfo info = newInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        GTRecord r = new GTRecord(table.getInfo());
+        GTBuilder builder = table.rebuild();
+
+        builder.write(r.setValues("2015-01-14", "30", "Yang", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-14", "30", "Luke", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "30", "Xu", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "20", "Dong", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "20", "Jason", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "20", "Mahone", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "20", "Qianhao", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "30", "George", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-17", "10", "Kejia", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+
+        return table;
+    }
+
+    static GTInfo newInfo() {
+        Builder builder = GTInfo.builder();
+        builder.setCodeSystem(newDictCodeSystem());
+        builder.setColumns( //
+                DataType.getInstance("timestamp"), //
+                DataType.getInstance("integer"), //
+                DataType.getInstance("varchar"), //
+                DataType.getInstance("bigint"), //
+                DataType.getInstance("decimal") //
+        );
+        builder.setPrimaryKey(setOf(0));
+        builder.setColumnPreferIndex(setOf(0));
+        builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
+        builder.enableRowBlock(4);
+        GTInfo info = builder.build();
+        return info;
+    }
+
+    @SuppressWarnings("rawtypes")
+    private static GTDictionaryCodeSystem newDictCodeSystem() {
+        Map<Integer, Dictionary> dictionaryMap = Maps.newHashMap();
+        dictionaryMap.put(1, newDictionaryOfInteger());
+        dictionaryMap.put(2, newDictionaryOfString());
+        return new GTDictionaryCodeSystem(dictionaryMap);
+    }
+
+    @SuppressWarnings("rawtypes")
+    private static Dictionary newDictionaryOfString() {
+        TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter());
+        builder.addValue("Dong");
+        builder.addValue("George");
+        builder.addValue("Jason");
+        builder.addValue("Kejia");
+        builder.addValue("Luke");
+        builder.addValue("Mahone");
+        builder.addValue("Qianhao");
+        builder.addValue("Shaofeng");
+        builder.addValue("Xu");
+        builder.addValue("Yang");
+        return builder.build(0);
+    }
+
+    @SuppressWarnings("rawtypes")
+    private static Dictionary newDictionaryOfInteger() {
+        NumberDictionaryBuilder<String> builder = new NumberDictionaryBuilder<>(new StringBytesConverter());
+        builder.addValue("10");
+        builder.addValue("20");
+        builder.addValue("30");
+        builder.addValue("40");
+        builder.addValue("50");
+        builder.addValue("60");
+        builder.addValue("70");
+        builder.addValue("80");
+        builder.addValue("90");
+        builder.addValue("100");
+        return builder.build(0);
+    }
+
+    private static BitSet setOf(int... values) {
+        BitSet set = new BitSet();
+        for (int i : values)
+            set.set(i);
+        return set;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
deleted file mode 100644
index 1460039..0000000
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
+++ /dev/null
@@ -1,165 +0,0 @@
-package org.apache.kylin.storage.gridtable;
-
-import static org.junit.Assert.*;
-import it.uniroma3.mat.extendedset.intset.ConciseSet;
-
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.kylin.common.util.ByteArray;
-import org.apache.kylin.metadata.filter.ColumnTupleFilter;
-import org.apache.kylin.metadata.filter.CompareTupleFilter;
-import org.apache.kylin.metadata.filter.ConstantTupleFilter;
-import org.apache.kylin.metadata.filter.LogicalTupleFilter;
-import org.apache.kylin.metadata.filter.TupleFilter;
-import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
-import org.apache.kylin.metadata.model.TblColRef;
-import org.apache.kylin.metadata.serializer.StringSerializer;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class GTInvertedIndexTest {
-
-    GTInfo info;
-    GTInvertedIndex index;
-    ArrayList<CompareTupleFilter> basicFilters = Lists.newArrayList();
-    ArrayList<ConciseSet> basicResults = Lists.newArrayList();
-
-    public GTInvertedIndexTest() {
-        
-        info = GridTableTest.advancedInfo();
-        TblColRef colA = info.colRef(0);
-        
-        // block i contains value "i", the last is NULL
-        index = new GTInvertedIndex(info);
-        GTRowBlock mockBlock = GTRowBlock.allocate(info);
-        GTRowBlock.Writer writer = mockBlock.getWriter();
-        GTRecord record = new GTRecord(info);
-        for (int i = 0; i < 10; i++) {
-            record.setValues(i < 9 ? "" + i : null, "", "", new LongWritable(0), new BigDecimal(0));
-            for (int j = 0; j < info.getRowBlockSize(); j++) {
-                writer.append(record);
-            }
-            writer.readyForFlush();
-            index.add(mockBlock);
-            
-            writer.clearForNext();
-        }
-        
-        basicFilters.add(compare(colA, FilterOperatorEnum.ISNULL));
-        basicResults.add(set(9));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.ISNOTNULL));
-        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.EQ, 0));
-        basicResults.add(set(0));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.NEQ, 0));
-        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.IN, 0, 5));
-        basicResults.add(set(0, 5));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.NOTIN, 0, 5));
-        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.LT, 3));
-        basicResults.add(set(0, 1, 2));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.LTE, 3));
-        basicResults.add(set(0, 1, 2, 3));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.GT, 3));
-        basicResults.add(set(4, 5, 6, 7, 8));
-
-        basicFilters.add(compare(colA, FilterOperatorEnum.GTE, 3));
-        basicResults.add(set(3, 4, 5, 6, 7, 8));
-    }
-
-    @Test
-    public void testBasics() {
-        for (int i = 0; i < basicFilters.size(); i++) {
-            assertEquals(basicResults.get(i), index.filter(basicFilters.get(i)));
-        }
-    }
-
-    @Test
-    public void testLogicalAnd() {
-        for (int i = 0; i < basicFilters.size(); i++) {
-            for (int j = 0; j < basicFilters.size(); j++) {
-                LogicalTupleFilter f = logical(FilterOperatorEnum.AND, basicFilters.get(i), basicFilters.get(j));
-                ConciseSet r = basicResults.get(i).clone();
-                r.retainAll(basicResults.get(j));
-                assertEquals(r, index.filter(f));
-            }
-        }
-    }
-
-    @Test
-    public void testLogicalOr() {
-        for (int i = 0; i < basicFilters.size(); i++) {
-            for (int j = 0; j < basicFilters.size(); j++) {
-                LogicalTupleFilter f = logical(FilterOperatorEnum.OR, basicFilters.get(i), basicFilters.get(j));
-                ConciseSet r = basicResults.get(i).clone();
-                r.addAll(basicResults.get(j));
-                assertEquals(r, index.filter(f));
-            }
-        }
-    }
-
-    @Test
-    public void testNotEvaluable() {
-        ConciseSet all = set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
-        
-        CompareTupleFilter notEvaluable = compare(info.colRef(1), FilterOperatorEnum.EQ, 0);
-        assertEquals(all, index.filter(notEvaluable));
-
-        LogicalTupleFilter or = logical(FilterOperatorEnum.OR, basicFilters.get(0), notEvaluable);
-        assertEquals(all, index.filter(or));
-
-        LogicalTupleFilter and = logical(FilterOperatorEnum.AND, basicFilters.get(0), notEvaluable);
-        assertEquals(basicResults.get(0), index.filter(and));
-    }
-
-    public static CompareTupleFilter compare(TblColRef col, TupleFilter.FilterOperatorEnum op, int... ids) {
-        CompareTupleFilter filter = new CompareTupleFilter(op);
-        filter.addChild(columnFilter(col));
-        for (int i : ids) {
-            filter.addChild(constFilter(i));
-        }
-        return filter;
-    }
-
-    public static LogicalTupleFilter logical(TupleFilter.FilterOperatorEnum op, TupleFilter... filters) {
-        LogicalTupleFilter filter = new LogicalTupleFilter(op);
-        for (TupleFilter f : filters)
-            filter.addChild(f);
-        return filter;
-    }
-
-    public static ColumnTupleFilter columnFilter(TblColRef col) {
-        return new ColumnTupleFilter(col);
-    }
-
-    public static ConstantTupleFilter constFilter(int id) {
-        byte[] space = new byte[10];
-        ByteBuffer buf = ByteBuffer.wrap(space);
-        StringSerializer stringSerializer = new StringSerializer();
-        stringSerializer.serialize("" + id, buf);
-        ByteArray data = new ByteArray(buf.array(), buf.arrayOffset(), buf.position());
-        return new ConstantTupleFilter(data);
-    }
-
-    public static ConciseSet set(int... ints) {
-        ConciseSet set = new ConciseSet();
-        for (int i : ints)
-            set.add(i);
-        return set;
-    }
-
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
deleted file mode 100644
index 6561c6e..0000000
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ /dev/null
@@ -1,208 +0,0 @@
-package org.apache.kylin.storage.gridtable;
-
-import static org.junit.Assert.*;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.util.BitSet;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.kylin.metadata.model.DataType;
-import org.apache.kylin.storage.gridtable.GTInfo.Builder;
-import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
-import org.junit.Test;
-
-public class GridTableTest {
-
-    @Test
-    public void testBasics() throws IOException {
-        GTInfo info = basicInfo();
-        GTSimpleMemStore store = new GTSimpleMemStore(info);
-        GridTable table = new GridTable(info, store);
-
-        GTBuilder builder = rebuild(table);
-        IGTScanner scanner = scan(table);
-        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
-        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
-    }
-
-    @Test
-    public void testAdvanced() throws IOException {
-        GTInfo info = advancedInfo();
-        GTSimpleMemStore store = new GTSimpleMemStore(info);
-        GridTable table = new GridTable(info, store);
-
-        GTBuilder builder = rebuild(table);
-        IGTScanner scanner = scan(table);
-        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
-        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
-    }
-
-    @Test
-    public void testAggregate() throws IOException {
-        GTInfo info = advancedInfo();
-        GTSimpleMemStore store = new GTSimpleMemStore(info);
-        GridTable table = new GridTable(info, store);
-
-        GTBuilder builder = rebuild(table);
-        IGTScanner scanner = scanAndAggregate(table);
-        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
-        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
-    }
-
-    @Test
-    public void testAppend() throws IOException {
-        GTInfo info = advancedInfo();
-        GTSimpleMemStore store = new GTSimpleMemStore(info);
-        GridTable table = new GridTable(info, store);
-
-        rebuildViaAppend(table);
-        IGTScanner scanner = scan(table);
-        assertEquals(3, scanner.getScannedRowBlockCount());
-        assertEquals(10, scanner.getScannedRowCount());
-    }
-
-    private IGTScanner scan(GridTable table) throws IOException {
-        GTScanRequest req = new GTScanRequest(table.getInfo());
-        IGTScanner scanner = table.scan(req);
-        for (GTRecord r : scanner) {
-            Object[] v = r.getValues();
-            assertTrue(((String) v[0]).startsWith("2015-"));
-            assertTrue(((String) v[2]).equals("Food"));
-            assertTrue(((LongWritable) v[3]).get() == 10);
-            assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
-            System.out.println(r);
-        }
-        scanner.close();
-        System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
-        System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
-        return scanner;
-    }
-
-    private IGTScanner scanAndAggregate(GridTable table) throws IOException {
-        GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
-        IGTScanner scanner = table.scan(req);
-        int i = 0;
-        for (GTRecord r : scanner) {
-            Object[] v = r.getValues();
-            switch (i) {
-            case 0:
-                assertTrue(((LongWritable) v[3]).get() == 20);
-                assertTrue(((BigDecimal) v[4]).doubleValue() == 21.0);
-                break;
-            case 1:
-                assertTrue(((LongWritable) v[3]).get() == 30);
-                assertTrue(((BigDecimal) v[4]).doubleValue() == 31.5);
-                break;
-            case 2:
-                assertTrue(((LongWritable) v[3]).get() == 40);
-                assertTrue(((BigDecimal) v[4]).doubleValue() == 42.0);
-                break;
-            case 3:
-                assertTrue(((LongWritable) v[3]).get() == 10);
-                assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
-                break;
-            default:
-                fail();
-            }
-            i++;
-            System.out.println(r);
-        }
-        scanner.close();
-        System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
-        System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
-        return scanner;
-    }
-
-    static GTBuilder rebuild(GridTable table) throws IOException {
-        GTRecord r = new GTRecord(table.getInfo());
-        GTBuilder builder = table.rebuild();
-
-        builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.close();
-
-        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
-        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-        return builder;
-    }
-
-    static void rebuildViaAppend(GridTable table) throws IOException {
-        GTRecord r = new GTRecord(table.getInfo());
-        GTBuilder builder;
-
-        builder = table.append();
-        builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.close();
-        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
-        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
-        builder = table.append();
-        builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.close();
-        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
-        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
-        builder = table.append();
-        builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.close();
-        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
-        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-
-        builder = table.append();
-        builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
-        builder.close();
-        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
-        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
-    }
-
-    static GTInfo basicInfo() {
-        Builder builder = infoBuilder();
-        GTInfo info = builder.build();
-        return info;
-    }
-
-    static GTInfo advancedInfo() {
-        Builder builder = infoBuilder();
-        builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
-        builder.enableRowBlock(4);
-        GTInfo info = builder.build();
-        return info;
-    }
-
-    private static Builder infoBuilder() {
-        Builder builder = GTInfo.builder();
-        builder.setCodeSystem(new GTSampleCodeSystem());
-        builder.setColumns( //
-                DataType.getInstance("varchar"), //
-                DataType.getInstance("varchar"), //
-                DataType.getInstance("varchar"), //
-                DataType.getInstance("bigint"), //
-                DataType.getInstance("decimal") //
-        );
-        builder.setPrimaryKey(setOf(0));
-        builder.setColumnPreferIndex(setOf(0));
-        return builder;
-    }
-
-    private static BitSet setOf(int... values) {
-        BitSet set = new BitSet();
-        for (int i : values)
-            set.set(i);
-        return set;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
new file mode 100644
index 0000000..c5878b3
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleGridTableTest.java
@@ -0,0 +1,208 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.BitSet;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.storage.gridtable.GTInfo.Builder;
+import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
+import org.junit.Test;
+
+public class SimpleGridTableTest {
+
+    @Test
+    public void testBasics() throws IOException {
+        GTInfo info = basicInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        GTBuilder builder = rebuild(table);
+        IGTScanner scanner = scan(table);
+        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+    }
+
+    @Test
+    public void testAdvanced() throws IOException {
+        GTInfo info = advancedInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        GTBuilder builder = rebuild(table);
+        IGTScanner scanner = scan(table);
+        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+    }
+
+    @Test
+    public void testAggregate() throws IOException {
+        GTInfo info = advancedInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        GTBuilder builder = rebuild(table);
+        IGTScanner scanner = scanAndAggregate(table);
+        assertEquals(builder.getWrittenRowBlockCount(), scanner.getScannedRowBlockCount());
+        assertEquals(builder.getWrittenRowCount(), scanner.getScannedRowCount());
+    }
+
+    @Test
+    public void testAppend() throws IOException {
+        GTInfo info = advancedInfo();
+        GTSimpleMemStore store = new GTSimpleMemStore(info);
+        GridTable table = new GridTable(info, store);
+
+        rebuildViaAppend(table);
+        IGTScanner scanner = scan(table);
+        assertEquals(3, scanner.getScannedRowBlockCount());
+        assertEquals(10, scanner.getScannedRowCount());
+    }
+
+    private IGTScanner scan(GridTable table) throws IOException {
+        GTScanRequest req = new GTScanRequest(table.getInfo());
+        IGTScanner scanner = table.scan(req);
+        for (GTRecord r : scanner) {
+            Object[] v = r.getValues();
+            assertTrue(((String) v[0]).startsWith("2015-"));
+            assertTrue(((String) v[2]).equals("Food"));
+            assertTrue(((LongWritable) v[3]).get() == 10);
+            assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
+            System.out.println(r);
+        }
+        scanner.close();
+        System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
+        System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
+        return scanner;
+    }
+
+    private IGTScanner scanAndAggregate(GridTable table) throws IOException {
+        GTScanRequest req = new GTScanRequest(table.getInfo(), null, setOf(0, 2), setOf(3, 4), new String[] { "count", "sum" }, null);
+        IGTScanner scanner = table.scan(req);
+        int i = 0;
+        for (GTRecord r : scanner) {
+            Object[] v = r.getValues();
+            switch (i) {
+            case 0:
+                assertTrue(((LongWritable) v[3]).get() == 20);
+                assertTrue(((BigDecimal) v[4]).doubleValue() == 21.0);
+                break;
+            case 1:
+                assertTrue(((LongWritable) v[3]).get() == 30);
+                assertTrue(((BigDecimal) v[4]).doubleValue() == 31.5);
+                break;
+            case 2:
+                assertTrue(((LongWritable) v[3]).get() == 40);
+                assertTrue(((BigDecimal) v[4]).doubleValue() == 42.0);
+                break;
+            case 3:
+                assertTrue(((LongWritable) v[3]).get() == 10);
+                assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5);
+                break;
+            default:
+                fail();
+            }
+            i++;
+            System.out.println(r);
+        }
+        scanner.close();
+        System.out.println("Scanned Row Block Count: " + scanner.getScannedRowBlockCount());
+        System.out.println("Scanned Row Count: " + scanner.getScannedRowCount());
+        return scanner;
+    }
+
+    static GTBuilder rebuild(GridTable table) throws IOException {
+        GTRecord r = new GTRecord(table.getInfo());
+        GTBuilder builder = table.rebuild();
+
+        builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+
+        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+        return builder;
+    }
+
+    static void rebuildViaAppend(GridTable table) throws IOException {
+        GTRecord r = new GTRecord(table.getInfo());
+        GTBuilder builder;
+
+        builder = table.append();
+        builder.write(r.setValues("2015-01-14", "Yang", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-14", "Luke", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "Xu", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-15", "Dong", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+        builder = table.append();
+        builder.write(r.setValues("2015-01-15", "Jason", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "Mahone", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "Shaofeng", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+        builder = table.append();
+        builder.write(r.setValues("2015-01-16", "Qianhao", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.write(r.setValues("2015-01-16", "George", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+
+        builder = table.append();
+        builder.write(r.setValues("2015-01-17", "Kejia", "Food", new LongWritable(10), new BigDecimal("10.5")));
+        builder.close();
+        System.out.println("Written Row Block Count: " + builder.getWrittenRowBlockCount());
+        System.out.println("Written Row Count: " + builder.getWrittenRowCount());
+    }
+
+    static GTInfo basicInfo() {
+        Builder builder = infoBuilder();
+        GTInfo info = builder.build();
+        return info;
+    }
+
+    static GTInfo advancedInfo() {
+        Builder builder = infoBuilder();
+        builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
+        builder.enableRowBlock(4);
+        GTInfo info = builder.build();
+        return info;
+    }
+
+    private static Builder infoBuilder() {
+        Builder builder = GTInfo.builder();
+        builder.setCodeSystem(new GTSampleCodeSystem());
+        builder.setColumns( //
+                DataType.getInstance("varchar"), //
+                DataType.getInstance("varchar"), //
+                DataType.getInstance("varchar"), //
+                DataType.getInstance("bigint"), //
+                DataType.getInstance("decimal") //
+        );
+        builder.setPrimaryKey(setOf(0));
+        builder.setColumnPreferIndex(setOf(0));
+        return builder;
+    }
+
+    private static BitSet setOf(int... values) {
+        BitSet set = new BitSet();
+        for (int i : values)
+            set.set(i);
+        return set;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d7fc2312/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
new file mode 100644
index 0000000..f96b709
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/SimpleInvertedIndexTest.java
@@ -0,0 +1,165 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.serializer.StringSerializer;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class SimpleInvertedIndexTest {
+
+    GTInfo info;
+    GTInvertedIndex index;
+    ArrayList<CompareTupleFilter> basicFilters = Lists.newArrayList();
+    ArrayList<ConciseSet> basicResults = Lists.newArrayList();
+
+    public SimpleInvertedIndexTest() {
+        
+        info = SimpleGridTableTest.advancedInfo();
+        TblColRef colA = info.colRef(0);
+        
+        // block i contains value "i", the last is NULL
+        index = new GTInvertedIndex(info);
+        GTRowBlock mockBlock = GTRowBlock.allocate(info);
+        GTRowBlock.Writer writer = mockBlock.getWriter();
+        GTRecord record = new GTRecord(info);
+        for (int i = 0; i < 10; i++) {
+            record.setValues(i < 9 ? "" + i : null, "", "", new LongWritable(0), new BigDecimal(0));
+            for (int j = 0; j < info.getRowBlockSize(); j++) {
+                writer.append(record);
+            }
+            writer.readyForFlush();
+            index.add(mockBlock);
+            
+            writer.clearForNext();
+        }
+        
+        basicFilters.add(compare(colA, FilterOperatorEnum.ISNULL));
+        basicResults.add(set(9));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.ISNOTNULL));
+        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.EQ, 0));
+        basicResults.add(set(0));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.NEQ, 0));
+        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.IN, 0, 5));
+        basicResults.add(set(0, 5));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.NOTIN, 0, 5));
+        basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.LT, 3));
+        basicResults.add(set(0, 1, 2));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.LTE, 3));
+        basicResults.add(set(0, 1, 2, 3));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.GT, 3));
+        basicResults.add(set(4, 5, 6, 7, 8));
+
+        basicFilters.add(compare(colA, FilterOperatorEnum.GTE, 3));
+        basicResults.add(set(3, 4, 5, 6, 7, 8));
+    }
+
+    @Test
+    public void testBasics() {
+        for (int i = 0; i < basicFilters.size(); i++) {
+            assertEquals(basicResults.get(i), index.filter(basicFilters.get(i)));
+        }
+    }
+
+    @Test
+    public void testLogicalAnd() {
+        for (int i = 0; i < basicFilters.size(); i++) {
+            for (int j = 0; j < basicFilters.size(); j++) {
+                LogicalTupleFilter f = logical(FilterOperatorEnum.AND, basicFilters.get(i), basicFilters.get(j));
+                ConciseSet r = basicResults.get(i).clone();
+                r.retainAll(basicResults.get(j));
+                assertEquals(r, index.filter(f));
+            }
+        }
+    }
+
+    @Test
+    public void testLogicalOr() {
+        for (int i = 0; i < basicFilters.size(); i++) {
+            for (int j = 0; j < basicFilters.size(); j++) {
+                LogicalTupleFilter f = logical(FilterOperatorEnum.OR, basicFilters.get(i), basicFilters.get(j));
+                ConciseSet r = basicResults.get(i).clone();
+                r.addAll(basicResults.get(j));
+                assertEquals(r, index.filter(f));
+            }
+        }
+    }
+
+    @Test
+    public void testNotEvaluable() {
+        ConciseSet all = set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+        
+        CompareTupleFilter notEvaluable = compare(info.colRef(1), FilterOperatorEnum.EQ, 0);
+        assertEquals(all, index.filter(notEvaluable));
+
+        LogicalTupleFilter or = logical(FilterOperatorEnum.OR, basicFilters.get(0), notEvaluable);
+        assertEquals(all, index.filter(or));
+
+        LogicalTupleFilter and = logical(FilterOperatorEnum.AND, basicFilters.get(0), notEvaluable);
+        assertEquals(basicResults.get(0), index.filter(and));
+    }
+
+    public static CompareTupleFilter compare(TblColRef col, TupleFilter.FilterOperatorEnum op, int... ids) {
+        CompareTupleFilter filter = new CompareTupleFilter(op);
+        filter.addChild(columnFilter(col));
+        for (int i : ids) {
+            filter.addChild(constFilter(i));
+        }
+        return filter;
+    }
+
+    public static LogicalTupleFilter logical(TupleFilter.FilterOperatorEnum op, TupleFilter... filters) {
+        LogicalTupleFilter filter = new LogicalTupleFilter(op);
+        for (TupleFilter f : filters)
+            filter.addChild(f);
+        return filter;
+    }
+
+    public static ColumnTupleFilter columnFilter(TblColRef col) {
+        return new ColumnTupleFilter(col);
+    }
+
+    public static ConstantTupleFilter constFilter(int id) {
+        byte[] space = new byte[10];
+        ByteBuffer buf = ByteBuffer.wrap(space);
+        StringSerializer stringSerializer = new StringSerializer();
+        stringSerializer.serialize("" + id, buf);
+        ByteArray data = new ByteArray(buf.array(), buf.arrayOffset(), buf.position());
+        return new ConstantTupleFilter(data);
+    }
+
+    public static ConciseSet set(int... ints) {
+        ConciseSet set = new ConciseSet();
+        for (int i : ints)
+            set.add(i);
+        return set;
+    }
+
+
+}


[36/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/5837af0f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/5837af0f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/5837af0f

Branch: refs/heads/streaming-localdict
Commit: 5837af0f426233150d61cb6cb40ab756ed34a8df
Parents: b979dfa d1c115d
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 15:36:01 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 15:36:01 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/util/FIFOIterable.java  |  20 +++
 .../apache/kylin/common/util/FIFOIterator.java  |  34 +++++
 .../org/apache/kylin/common/util/BasicTest.java |  12 +-
 .../test_kylin_cube_with_slr_desc.json          |   2 +-
 .../kylin/invertedindex/index/TableRecord.java  |   5 +-
 .../invertedindex/index/TableRecordInfo.java    |  10 +-
 .../kylin/invertedindex/model/IIDesc.java       |   1 +
 .../model/IIKeyValueCodecWithState.java         |  24 ++-
 .../apache/kylin/invertedindex/model/IIRow.java |  10 ++
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java |  20 ++-
 .../cube/FactDistinctIIColumnsMapper.java       |  28 ++--
 .../kylin/job/BuildCubeWithEngineTest.java      |   1 -
 .../job/hadoop/invertedindex/II2CubeTest.java   | 146 +++++++++++++++++++
 .../invertedindex/ToyIIStreamBuilder.java       |  36 +++++
 streaming/pom.xml                               |   7 +
 .../kylin/streaming/cube/CubeStreamBuilder.java |  20 +--
 .../invertedindex/IIStreamBuilder.java          |  33 +++--
 17 files changed, 342 insertions(+), 67 deletions(-)
----------------------------------------------------------------------



[14/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7658a500
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7658a500
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7658a500

Branch: refs/heads/streaming-localdict
Commit: 7658a5001d0ba835beedca76b4b8687bcfd03bc8
Parents: d4a271d c3ff4f4
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:16:06 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:16:06 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/job/IIStreamBuilderTest.java   | 80 ++++++++++++++++++++
 .../apache/kylin/streaming/KafkaConsumer.java   |  5 +-
 .../kylin/streaming/StreamingBootstrap.java     | 33 ++++----
 .../apache/kylin/streaming/StreamingCLI.java    |  3 +-
 .../invertedindex/IIStreamBuilder.java          |  4 +-
 .../invertedindex/IIStreamBuilderTest.java      | 41 ----------
 6 files changed, 107 insertions(+), 59 deletions(-)
----------------------------------------------------------------------



[45/50] incubator-kylin git commit: fix

Posted by li...@apache.org.
fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b5a78a60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b5a78a60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b5a78a60

Branch: refs/heads/streaming-localdict
Commit: b5a78a600cbd294ce1457bde93c1d682064b40d1
Parents: d72f2e6
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 18:18:21 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 18:18:21 2015 +0800

----------------------------------------------------------------------
 bin/kylin.sh                                    |  2 +-
 .../kylin/job/streaming/StreamingCLI.java       |  2 +
 .../apache/kylin/job/IIStreamBuilderTest.java   | 48 ++++++++++++++++----
 3 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/bin/kylin.sh b/bin/kylin.sh
index 95568e1..c300915 100644
--- a/bin/kylin.sh
+++ b/bin/kylin.sh
@@ -99,7 +99,7 @@ then
     -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
     -Dkylin.hive.dependency=${hive_dependency} \
     -Dspring.profiles.active=${spring_profile} \
-    org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${tomcat_root}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}/$2 &
+    org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${KYLIN_HOME}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}$2 &
     echo "streaming started $2"
     exit 0
 else

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 219ca41..1d6994f 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -59,6 +59,8 @@ public class StreamingCLI {
                 printArgsError(args);
             }
         } catch (Exception e) {
+            logger.error("error start streaming", e);
+            System.exit(-1);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b5a78a60/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index d42da33..bafcb61 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -34,23 +34,27 @@
 
 package org.apache.kylin.job;
 
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.AbstractKylinTestCase;
 import org.apache.kylin.common.util.ClassUtil;
 import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.job.hadoop.cube.StorageCleanupJob;
 import org.apache.kylin.job.streaming.StreamingBootstrap;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.io.IOException;
 
 /**
  * Created by qianzhou on 3/6/15.
  */
 public class IIStreamBuilderTest extends HBaseMetadataTestCase {
 
+    private static final Logger logger = LoggerFactory.getLogger(IIStreamBuilderTest.class);
+
     private KylinConfig kylinConfig;
 
     @BeforeClass
@@ -59,6 +63,30 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
         System.setProperty("hdp.version", "2.2.0.0-2041"); // mapred-site.xml ref this
     }
 
+    @AfterClass
+    public static void afterClass() throws Exception {
+//        backup();
+    }
+
+    private static void backup() throws Exception {
+        int exitCode = cleanupOldStorage();
+        if (exitCode == 0) {
+            exportHBaseData();
+        }
+    }
+
+    private static int cleanupOldStorage() throws Exception {
+        String[] args = {"--delete", "true"};
+
+        int exitCode = ToolRunner.run(new StorageCleanupJob(), args);
+        return exitCode;
+    }
+
+    private static void exportHBaseData() throws IOException {
+        ExportHBaseData export = new ExportHBaseData();
+        export.exportTables();
+    }
+
     @Before
     public void before() throws Exception {
         HBaseMetadataTestCase.staticCreateTestMetadata(AbstractKylinTestCase.SANDBOX_TEST_DATA);
@@ -68,13 +96,13 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
         DeployUtil.overrideJobJarLocations();
     }
 
-    @After
-    public void after() {
-        this.cleanupTestMetadata();
-    }
-
     @Test
     public void test() throws Exception {
-        StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+//        final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
+//        bootstrap.start("eagle", 0);
+//        Thread.sleep(30 * 60 * 1000);
+//        logger.info("time is up, stop streaming");
+//        bootstrap.stop();
+//        Thread.sleep(5 * 1000);
     }
 }


[42/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/a36d4166
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/a36d4166
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/a36d4166

Branch: refs/heads/streaming-localdict
Commit: a36d416602885ea546ea6a388ba2f3eb78188df7
Parents: ea96dc5
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:55:44 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:55:44 2015 +0800

----------------------------------------------------------------------
 .../kylin/job/streaming/StreamingBootstrap.java |  5 +-
 .../apache/kylin/job/BuildIIWithStreamTest.java |  4 +-
 .../job/hadoop/invertedindex/II2CubeTest.java   | 58 ++++++++------------
 3 files changed, 28 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index 5d1673c..f6abad7 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -123,11 +123,10 @@ public class StreamingBootstrap {
                 getStreamQueue().put(new Stream(offset, bytes));
             }
         };
-        final IIDesc desc = ii.getDescriptor();
         kafkaConsumers.put(getKey(streaming, partitionId), consumer);
 
-        final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
-        task.setStreamParser(JsonStreamParser.instance);
+        final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), iiSegment.getIIInstance(), partitionId);
+        task.setStreamParser(new JsonStreamParser(ii.getDescriptor().listAllColumns()));
 
         Executors.newSingleThreadExecutor().submit(consumer);
         Executors.newSingleThreadExecutor().submit(task).get();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
index dae2d03..a3a7489 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
@@ -215,14 +215,14 @@ public class BuildIIWithStreamTest {
 
 
         ExecutorService executorService = Executors.newSingleThreadExecutor();
-        final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), desc, 0);
+        final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), segment.getIIInstance(), 0);
         int count = 0;
         while (reader.next()) {
             queue.put(parse(reader.getRow()));
             count++;
         }
         logger.info("total record count:" + count + " htable:" + segment.getStorageLocationIdentifier());
-        queue.put(new Stream(-1, null));
+        queue.put(Stream.EOF);
         final Future<?> future = executorService.submit(streamBuilder);
         try {
             future.get();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a36d4166/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 97c71f8..da1cb18 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -1,14 +1,9 @@
 package org.apache.kylin.job.hadoop.invertedindex;
 
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import javax.annotation.Nullable;
-
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
@@ -24,22 +19,20 @@ import org.apache.kylin.invertedindex.IIManager;
 import org.apache.kylin.invertedindex.index.Slice;
 import org.apache.kylin.invertedindex.index.TableRecordInfo;
 import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.invertedindex.model.*;
 import org.apache.kylin.job.constant.BatchConstants;
 import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
 import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.StringStreamParser;
+import org.apache.kylin.streaming.invertedindex.SliceBuilder;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import com.google.common.base.Function;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.*;
 
 /**
  * Created by Hongbin Ma(Binmahone) on 3/26/15.
@@ -53,10 +46,10 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
 
     List<IIRow> iiRows;
 
-    final String[] inputs = new String[] { //
-    "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+    final String[] inputs = new String[]{ //
+            "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
             "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
-            "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+            "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0"};
 
     @Before
     public void setUp() throws Exception {
@@ -64,22 +57,20 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
         this.ii = IIManager.getInstance(getTestConfig()).getII(iiName);
         this.iiDesc = ii.getDescriptor();
 
-        Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+        List<Stream> streams = Lists.transform(Arrays.asList(inputs), new Function<String, Stream>() {
             @Nullable
             @Override
             public Stream apply(String input) {
-                return new Stream(0, input.getBytes());
+                return new Stream(System.currentTimeMillis(), input.getBytes());
             }
         });
-        LinkedBlockingQueue q = new LinkedBlockingQueue();
-        q.addAll(streams);
-        q.put(new Stream(-1, null));//a stop sign for builder
 
         iiRows = Lists.newArrayList();
-        ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRows);
-        ExecutorService executorService = Executors.newSingleThreadExecutor();
-        Future<?> future = executorService.submit(builder);
-        future.get();
+        final Slice slice = new SliceBuilder(iiDesc, (short) 0).buildSlice(streams, StringStreamParser.instance);
+        IIKeyValueCodec codec = new IIKeyValueCodec(slice.getInfo());
+        for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+            iiRows.add(iiRow);
+        }
 
     }
 
@@ -131,15 +122,14 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
             @Nullable
             @Override
             public Pair<ImmutableBytesWritable, Result> apply(@Nullable IIRow input) {
-                return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[] { 1 }), Result.create(input.makeCells()));
+                return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[]{1}), Result.create(input.makeCells()));
             }
         })));
 
         List<Pair<LongWritable, Text>> result = mapDriver.run();
-        Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
-        for(Pair<LongWritable, Text> pair : result)
-        {
-            Assert.assertEquals(pair.getFirst().get(),6);
+        Set<String> lstgNames = Sets.newHashSet("FP-non GTC", "ABIN");
+        for (Pair<LongWritable, Text> pair : result) {
+            Assert.assertEquals(pair.getFirst().get(), 6);
             Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
         }
     }


[31/50] incubator-kylin git commit: KYLIN-653 checking

Posted by li...@apache.org.
KYLIN-653 checking


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/fc5ab528
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/fc5ab528
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/fc5ab528

Branch: refs/heads/streaming-localdict
Commit: fc5ab528fcdb1d8fd90f861e4f937c353c09bbab
Parents: d09e00d
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 11:31:57 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/model/IIKeyValueCodecWithState.java   | 12 ++++++++++--
 .../job/hadoop/cube/FactDistinctIIColumnsMapper.java    |  6 ++++--
 .../invertedindex/IIKeyValueCodecWithStateTest.java     |  4 ++++
 3 files changed, 18 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index 82f1020..29ffd40 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -1,14 +1,14 @@
 package org.apache.kylin.invertedindex.model;
 
-import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedList;
 
-import com.google.common.base.Preconditions;
+import org.apache.kylin.common.util.FIFOIterable;
 import org.apache.kylin.common.util.FIFOIterator;
 import org.apache.kylin.invertedindex.index.Slice;
 import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 /**
@@ -20,8 +20,16 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
         super(digest);
     }
 
+    /**
+     * 
+     * @param kvs kvs must be a {@link org.apache.kylin.common.util.FIFOIterable } to avoid {@link java.util.ConcurrentModificationException}.
+     * @return
+     */
     @Override
     public Iterable<Slice> decodeKeyValue(Iterable<IIRow> kvs) {
+        if (!(kvs instanceof FIFOIterable)) {
+            throw new IllegalArgumentException("kvs must be a {@link org.apache.kylin.common.util.FIFOIterable } to avoid {@link java.util.ConcurrentModificationException}.");
+        }
         return new IIRowDecoderWithState(digest, kvs.iterator());
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 705e272..6a236fd 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -22,12 +22,14 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Queue;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.FIFOIterable;
 import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.invertedindex.IIInstance;
 import org.apache.kylin.invertedindex.IIManager;
@@ -50,7 +52,7 @@ import com.google.common.collect.Lists;
 public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
 
     private IIJoinedFlatTableDesc intermediateTableDesc;
-    private ArrayList<IIRow> buffer = Lists.newArrayList();
+    private Queue<IIRow> buffer = Lists.newLinkedList();
     private Iterator<Slice> slices;
 
     private String iiName;
@@ -73,7 +75,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
         intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
         TableRecordInfo info = new TableRecordInfo(iiDesc);
         KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
-        slices = codec.decodeKeyValue(buffer).iterator();
+        slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
 
         baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
         for (int i = 0; i < factDictCols.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/fc5ab528/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
index 416d31a..5ade5f1 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -71,6 +71,10 @@ public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
         cleanupTestMetadata();
     }
 
+    /**
+     * simulate stream building into slices, and encode the slice into IIRows.
+     * Then reconstruct the IIRows to slice.
+     */
     @Test
     public void basicTest() {
         Queue<IIRow> buffer = Lists.newLinkedList();


[02/50] incubator-kylin git commit: KYLIN-630 add distinct column mapper for II storage

Posted by li...@apache.org.
KYLIN-630 add distinct column mapper for II storage


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/1b52438e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/1b52438e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/1b52438e

Branch: refs/heads/streaming-localdict
Commit: 1b52438e2eec3dd271b66b6a6352ccf1bc0278d3
Parents: 8e0695b
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 16:03:30 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 16:03:30 2015 +0800

----------------------------------------------------------------------
 .../model/IIJoinedFlatTableDesc.java            |  12 +-
 .../apache/kylin/invertedindex/model/IIRow.java |  13 ++
 .../org/apache/kylin/job/JoinedFlatTable.java   |   1 -
 .../kylin/job/hadoop/AbstractHadoopJob.java     |   2 +-
 .../kylin/job/hadoop/cube/CubeHFileMapper.java  |   2 +-
 .../kylin/job/hadoop/cube/CuboidReducer.java    |   2 +-
 .../job/hadoop/cube/FactDistinctColumnsJob.java |   2 +-
 .../hadoop/cube/FactDistinctColumnsMapper.java  | 129 -------------------
 .../cube/FactDistinctColumnsMapperBase.java     |   2 +-
 .../hadoop/cube/FactDistinctColumnsReducer.java |   2 +-
 .../cube/FactDistinctHiveColumnsMapper.java     | 129 +++++++++++++++++++
 .../cube/FactDistinctIIColumnsMapper.java       | 129 +++++++++++++++++++
 .../job/hadoop/cube/MergeCuboidMapper.java      |   2 +-
 .../kylin/job/hadoop/cube/NDCuboidMapper.java   |   2 +-
 .../job/hadoop/cube/NewBaseCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidReducer.java   |   2 +-
 .../invertedindex/InvertedIndexMapper.java      |   2 +-
 .../invertedindex/InvertedIndexPartitioner.java |   2 +-
 .../invertedindex/InvertedIndexReducer.java     |   2 +-
 .../metadata/model/IJoinedFlatTableDesc.java    |   2 -
 .../metadata/model/IntermediateColumnDesc.java  |   4 +
 .../endpoint/HbaseServerKVIterator.java         |   9 +-
 23 files changed, 296 insertions(+), 160 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
index 44114da..14934dc 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIJoinedFlatTableDesc.java
@@ -19,13 +19,13 @@
 package org.apache.kylin.invertedindex.model;
 
 import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
 
-import com.google.common.collect.Lists;
+import org.apache.kylin.metadata.model.DataModelDesc;
+import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
+import org.apache.kylin.metadata.model.IntermediateColumnDesc;
+import org.apache.kylin.metadata.model.TblColRef;
 
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.metadata.model.*;
+import com.google.common.collect.Lists;
 
 /**
  * Created by Hongbin Ma(Binmahone) on 12/30/14.
@@ -35,7 +35,6 @@ public class IIJoinedFlatTableDesc implements IJoinedFlatTableDesc {
     private IIDesc iiDesc;
     private String tableName;
     private List<IntermediateColumnDesc> columnList = Lists.newArrayList();
-    private Map<String, String> tableAliasMap;
 
     public IIJoinedFlatTableDesc(IIDesc iiDesc) {
         this.iiDesc = iiDesc;
@@ -57,6 +56,7 @@ public class IIJoinedFlatTableDesc implements IJoinedFlatTableDesc {
         return tableName + "_" + jobUUID.replace("-", "_");
     }
 
+    @Override
     public List<IntermediateColumnDesc> getColumnList() {
         return columnList;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
index aba4fff..f3d398a 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
@@ -34,7 +34,9 @@
 
 package org.apache.kylin.invertedindex.model;
 
+import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.util.BytesUtil;
 
 /**
  * Created by qianzhou on 3/10/15.
@@ -50,6 +52,7 @@ public final class IIRow {
         this.value = value;
         this.dictionary = dictionary;
     }
+
     public IIRow() {
         this(new ImmutableBytesWritable(), new ImmutableBytesWritable(), new ImmutableBytesWritable());
     }
@@ -61,7 +64,17 @@ public final class IIRow {
     public ImmutableBytesWritable getValue() {
         return value;
     }
+
     public ImmutableBytesWritable getDictionary() {
         return dictionary;
     }
+
+    public void updateWith(Cell c) {
+        if (BytesUtil.compareBytes(IIDesc.HBASE_QUALIFIER_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_QUALIFIER_BYTES.length) == 0) {
+            this.getKey().set(c.getRowArray(), c.getRowOffset(), c.getRowLength());
+            this.getValue().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
+        } else if (BytesUtil.compareBytes(IIDesc.HBASE_DICTIONARY_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_DICTIONARY_BYTES.length) == 0) {
+            this.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
index cc3dc1b..100fbca 100644
--- a/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
+++ b/job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
@@ -27,7 +27,6 @@ import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 
-import org.apache.kylin.cube.model.DimensionDesc;
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
index 038fe2f..9f73488 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/AbstractHadoopJob.java
@@ -330,7 +330,7 @@ public abstract class AbstractHadoopJob extends Configured implements Tool {
         return input.getSplits(job).size();
     }
 
-    public static KylinConfig loadKylinPropsAndMetadata(Configuration conf) throws IOException {
+    public static KylinConfig loadKylinPropsAndMetadata() throws IOException {
         File metaDir = new File("meta");
         System.setProperty(KylinConfig.KYLIN_CONF, metaDir.getAbsolutePath());
         logger.info("The absolute path for meta dir is " + metaDir.getAbsolutePath());

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
index 17dc24e..1236f8c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CubeHFileMapper.java
@@ -62,7 +62,7 @@ public class CubeHFileMapper extends KylinMapper<Text, Text, ImmutableBytesWrita
         super.publishConfiguration(context.getConfiguration());
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         CubeManager cubeMgr = CubeManager.getInstance(config);
         cubeDesc = cubeMgr.getCube(cubeName).getDescriptor();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
index 7181fa1..b747dff 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/CuboidReducer.java
@@ -64,7 +64,7 @@ public class CuboidReducer extends KylinReducer<Text, Text, Text, Text> {
         super.publishConfiguration(context.getConfiguration());
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor();
         measuresDescs = cubeDesc.getMeasures();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
index 094014e..17c5e9b 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
@@ -101,7 +101,7 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
                 dbTableNames[1]);
         
         job.setInputFormatClass(HCatInputFormat.class);
-        job.setMapperClass(FactDistinctColumnsMapper.class);
+        job.setMapperClass(FactDistinctHiveColumnsMapper.class);
         job.setCombinerClass(FactDistinctColumnsCombiner.class);
         job.setMapOutputKeyClass(ShortWritable.class);
         job.setMapOutputValueClass(Text.class);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
deleted file mode 100644
index 3a50249..0000000
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapper.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hive.hcatalog.data.HCatRecord;
-import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
-import org.apache.hive.hcatalog.data.schema.HCatSchema;
-import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
-import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.cube.cuboid.CuboidScheduler;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.dict.lookup.HiveTableReader;
-import org.apache.kylin.job.constant.BatchConstants;
-
-import com.google.common.collect.Lists;
-
-/**
- * @author yangli9
- */
-public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
-
-    private HCatSchema schema = null;
-    private CubeJoinedFlatTableDesc intermediateTableDesc;
-
-    protected boolean collectStatistics = false;
-    protected CuboidScheduler cuboidScheduler = null;
-    protected List<String> rowKeyValues = null;
-    protected HyperLogLogPlusCounter hll;
-    protected int nRowKey;
-
-    @Override
-    protected void setup(Context context) throws IOException {
-        super.setup(context);
-
-        schema = HCatInputFormat.getTableSchema(context.getConfiguration());
-        intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
-
-
-        collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
-        if (collectStatistics) {
-            cuboidScheduler = new CuboidScheduler(cubeDesc);
-            hll = new HyperLogLogPlusCounter(16);
-            rowKeyValues = Lists.newArrayList();
-            nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
-        }
-    }
-
-    @Override
-    public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
-        try {
-            int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
-            HCatFieldSchema fieldSchema;
-            for (int i : factDictCols) {
-                outputKey.set((short) i);
-                fieldSchema = schema.get(flatTableIndexes[i]);
-                Object fieldValue = record.get(fieldSchema.getName(), schema);
-                if (fieldValue == null)
-                    continue;
-                byte[] bytes = Bytes.toBytes(fieldValue.toString());
-                outputValue.set(bytes, 0, bytes.length);
-                context.write(outputKey, outputValue);
-            }
-        } catch (Exception ex) {
-            handleErrorRecord(record, ex);
-        }
-
-        if (collectStatistics) {
-            String[] row = HiveTableReader.getRowAsStringArray(record);
-            putRowKeyToHLL(row, baseCuboidId);
-        }
-    }
-
-    private void putRowKeyToHLL(String[] row, long cuboidId) {
-        rowKeyValues.clear();
-        long mask = Long.highestOneBit(baseCuboidId);
-        for (int i = 0; i < nRowKey; i++) {
-            if ((mask & cuboidId) == 1) {
-                rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
-            }
-            mask = mask >> 1;
-        }
-
-        String key = StringUtils.join(rowKeyValues, ",");
-        hll.add(key);
-
-        Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
-        for (Long childId : children) {
-            putRowKeyToHLL(row, childId);
-        }
-
-    }
-
-    @Override
-    protected void cleanup(Context context) throws IOException, InterruptedException {
-        if (collectStatistics) {
-            // output hll to reducer, key is -1
-            // keyBuf = Bytes.toBytes(-1);
-            outputKey.set((short) -1);
-            ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
-            hll.writeRegisters(hllBuf);
-            outputValue.set(hllBuf.array());
-            context.write(outputKey, outputValue);
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index 603277c..c0455ff 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -40,7 +40,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
     protected void setup(Context context) throws IOException {
         Configuration conf = context.getConfiguration();
         publishConfiguration(conf);
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
         cube = CubeManager.getInstance(config).getCube(cubeName);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index 383def4..2052d08 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -61,7 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
         super.publishConfiguration(context.getConfiguration());
 
         Configuration conf = context.getConfiguration();
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
         CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
         CubeDesc cubeDesc = cube.getDescriptor();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
new file mode 100644
index 0000000..64ae353
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
+import org.apache.kylin.dict.lookup.HiveTableReader;
+import org.apache.kylin.job.constant.BatchConstants;
+
+import com.google.common.collect.Lists;
+
+/**
+ * @author yangli9
+ */
+public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, HCatRecord> {
+
+    private HCatSchema schema = null;
+    private CubeJoinedFlatTableDesc intermediateTableDesc;
+
+    protected boolean collectStatistics = false;
+    protected CuboidScheduler cuboidScheduler = null;
+    protected List<String> rowKeyValues = null;
+    protected HyperLogLogPlusCounter hll;
+    protected int nRowKey;
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        super.setup(context);
+
+        schema = HCatInputFormat.getTableSchema(context.getConfiguration());
+        intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
+
+
+        collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
+        if (collectStatistics) {
+            cuboidScheduler = new CuboidScheduler(cubeDesc);
+            hll = new HyperLogLogPlusCounter(16);
+            rowKeyValues = Lists.newArrayList();
+            nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
+        }
+    }
+
+    @Override
+    public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
+        try {
+            int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
+            HCatFieldSchema fieldSchema;
+            for (int i : factDictCols) {
+                outputKey.set((short) i);
+                fieldSchema = schema.get(flatTableIndexes[i]);
+                Object fieldValue = record.get(fieldSchema.getName(), schema);
+                if (fieldValue == null)
+                    continue;
+                byte[] bytes = Bytes.toBytes(fieldValue.toString());
+                outputValue.set(bytes, 0, bytes.length);
+                context.write(outputKey, outputValue);
+            }
+        } catch (Exception ex) {
+            handleErrorRecord(record, ex);
+        }
+
+        if (collectStatistics) {
+            String[] row = HiveTableReader.getRowAsStringArray(record);
+            putRowKeyToHLL(row, baseCuboidId);
+        }
+    }
+
+    private void putRowKeyToHLL(String[] row, long cuboidId) {
+        rowKeyValues.clear();
+        long mask = Long.highestOneBit(baseCuboidId);
+        for (int i = 0; i < nRowKey; i++) {
+            if ((mask & cuboidId) == 1) {
+                rowKeyValues.add(row[intermediateTableDesc.getRowKeyColumnIndexes()[i]]);
+            }
+            mask = mask >> 1;
+        }
+
+        String key = StringUtils.join(rowKeyValues, ",");
+        hll.add(key);
+
+        Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
+        for (Long childId : children) {
+            putRowKeyToHLL(row, childId);
+        }
+
+    }
+
+    @Override
+    protected void cleanup(Context context) throws IOException, InterruptedException {
+        if (collectStatistics) {
+            // output hll to reducer, key is -1
+            // keyBuf = Bytes.toBytes(-1);
+            outputKey.set((short) -1);
+            ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
+            hll.writeRegisters(hllBuf);
+            outputValue.set(hllBuf.array());
+            context.write(outputKey, outputValue);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
new file mode 100644
index 0000000..75e127e
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.RawTableRecord;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.*;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.model.IntermediateColumnDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+
+/**
+ * @author yangli9
+ */
+public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
+
+    private IIJoinedFlatTableDesc intermediateTableDesc;
+    private ArrayList<IIRow> buffer = Lists.newArrayList();
+    private Iterable<Slice> slices;
+
+    private String iiName;
+    private IIInstance ii;
+    private IIDesc iiDesc;
+
+    private int[] baseCuboidCol2FlattenTableCol;
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        super.setup(context);
+
+        Configuration conf = context.getConfiguration();
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+        iiName = conf.get(BatchConstants.CFG_II_NAME);
+        ii = IIManager.getInstance(config).getII(iiName);
+        iiDesc = ii.getDescriptor();
+
+        intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
+        TableRecordInfo info = new TableRecordInfo(iiDesc);
+        KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
+        slices = codec.decodeKeyValue(buffer);
+
+        baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
+        for (int i = 0; i < factDictCols.size(); ++i) {
+            int index = findTblCol(intermediateTableDesc.getColumnList(), columns.get(factDictCols.get(i)));
+            baseCuboidCol2FlattenTableCol[i] = index;
+        }
+    }
+
+    private int findTblCol(List<IntermediateColumnDesc> columns, final TblColRef col) {
+        return Iterators.indexOf(columns.iterator(), new Predicate<IntermediateColumnDesc>() {
+            @Override
+            public boolean apply(IntermediateColumnDesc input) {
+                return input.getColRef().equals(col);
+            }
+        });
+    }
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result cells, Context context) throws IOException, InterruptedException {
+        IIRow iiRow = new IIRow();
+        for (Cell c : cells.rawCells()) {
+            iiRow.updateWith(c);
+        }
+        buffer.add(iiRow);
+
+        if (slices.iterator().hasNext()) {
+            byte[] vBytesBuffer = null;
+            Slice slice = slices.iterator().next();
+
+            for (RawTableRecord record : slice) {
+                for (int i = 0; i < factDictCols.size(); ++i) {
+                    int baseCuboidIndex = factDictCols.get(i);
+                    outputKey.set((short) baseCuboidIndex);
+                    int indexInRecord = baseCuboidCol2FlattenTableCol[i];
+
+                    Dictionary<?> dictionary = slice.getLocalDictionaries().get(indexInRecord);
+                    if (vBytesBuffer == null || dictionary.getSizeOfValue() > vBytesBuffer.length) {
+                        vBytesBuffer = new byte[dictionary.getSizeOfValue() * 2];
+                    }
+
+                    int vid = record.getValueID(baseCuboidIndex);
+                    if (vid == dictionary.nullId()) {
+                        continue;
+                    }
+                    int vBytesSize = dictionary.getValueBytesFromId(vid, vBytesBuffer, 0);
+
+                    outputValue.set(vBytesBuffer, 0, vBytesSize);
+                    context.write(outputKey, outputValue);
+                }
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
index 431f2b7..417e996 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/MergeCuboidMapper.java
@@ -114,7 +114,7 @@ public class MergeCuboidMapper extends KylinMapper<Text, Text, Text, Text> {
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
         segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
 
-        config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         cubeManager = CubeManager.getInstance(config);
         cube = cubeManager.getCube(cubeName);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
index dc65baa..e476bd7 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NDCuboidMapper.java
@@ -68,7 +68,7 @@ public class NDCuboidMapper extends KylinMapper<Text, Text, Text, Text> {
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
         segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
         CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
index e75457e..79c334c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/NewBaseCuboidMapper.java
@@ -125,7 +125,7 @@ public class NewBaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, T
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
         segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         metadataManager = MetadataManager.getInstance(config);
         cube = CubeManager.getInstance(config).getCube(cubeName);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
index a58369f..5a3565a 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidMapper.java
@@ -81,7 +81,7 @@ public class InMemCuboidMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, Tex
 
         Configuration conf = context.getConfiguration();
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
         cube = CubeManager.getInstance(config).getCube(cubeName);
         cubeDesc = cube.getDescriptor();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
index de2539c..48fe3a1 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidReducer.java
@@ -44,7 +44,7 @@ public class InMemCuboidReducer extends KylinReducer<Text, Text, Text, Text> {
         super.publishConfiguration(context.getConfiguration());
         cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor();
         measuresDescs = cubeDesc.getMeasures();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
index 735a945..0344043 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexMapper.java
@@ -58,7 +58,7 @@ public class InvertedIndexMapper<KEYIN> extends KylinMapper<KEYIN, HCatRecord, L
 
         Configuration conf = context.getConfiguration();
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         IIManager mgr = IIManager.getInstance(config);
         IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
         IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
index 141565f..fa4dccf 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexPartitioner.java
@@ -54,7 +54,7 @@ public class InvertedIndexPartitioner extends Partitioner<LongWritable, Immutabl
     public void setConf(Configuration conf) {
         this.conf = conf;
         try {
-            KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+            KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
             IIManager mgr = IIManager.getInstance(config);
             IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
             IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
index a1c0811..9f238b0 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/invertedindex/InvertedIndexReducer.java
@@ -53,7 +53,7 @@ public class InvertedIndexReducer extends KylinReducer<LongWritable, ImmutableBy
         super.publishConfiguration(context.getConfiguration());
 
         Configuration conf = context.getConfiguration();
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         IIManager mgr = IIManager.getInstance(config);
         IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
         IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
index 917e12b..abf87b7 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/IJoinedFlatTableDesc.java
@@ -20,8 +20,6 @@ package org.apache.kylin.metadata.model;
 
 import java.util.List;
 
-import org.apache.kylin.metadata.model.DataModelDesc;
-
 /**
  * Created by Hongbin Ma(Binmahone) on 12/30/14.
  */

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
index 860773c..1b931a0 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/IntermediateColumnDesc.java
@@ -36,6 +36,10 @@ public class IntermediateColumnDesc {
         return id;
     }
 
+    public TblColRef getColRef() {
+        return this.colRef;
+    }
+
     public String getColumnName() {
         return colRef.getName();
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/1b52438e/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java b/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
index a7b67d8..5d9f633 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/coprocessor/endpoint/HbaseServerKVIterator.java
@@ -63,7 +63,6 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
             this.regionScanner = innerScanner;
         }
 
-
         @Override
         public boolean hasNext() {
             return hasMore;
@@ -81,12 +80,7 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
                 throw new IllegalStateException("Hbase row contains less than 1 cell");
             }
             for (Cell c : results) {
-                if (BytesUtil.compareBytes(IIDesc.HBASE_QUALIFIER_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_QUALIFIER_BYTES.length) == 0) {
-                    row.getKey().set(c.getRowArray(), c.getRowOffset(), c.getRowLength());
-                    row.getValue().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
-                } else if (BytesUtil.compareBytes(IIDesc.HBASE_DICTIONARY_BYTES, 0, c.getQualifierArray(), c.getQualifierOffset(), IIDesc.HBASE_DICTIONARY_BYTES.length) == 0) {
-                    row.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
-                }
+                row.updateWith(c);
             }
             return row;
         }
@@ -97,7 +91,6 @@ public class HbaseServerKVIterator implements Iterable<IIRow>, Closeable {
         }
     }
 
-
     @Override
     public Iterator<IIRow> iterator() {
         return new IIRowIterator(innerScanner);


[34/50] incubator-kylin git commit: KYLIN-653 minor change

Posted by li...@apache.org.
KYLIN-653 minor change


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d1c115d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d1c115d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d1c115d8

Branch: refs/heads/streaming-localdict
Commit: d1c115d8242b4fc819ab36153d8bffa89a265631
Parents: 0f8b7a4
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 15:11:06 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:17:00 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d1c115d8/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index 6832dcf..62cf6e8 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -9,7 +9,6 @@ import java.util.concurrent.LinkedBlockingQueue;
 
 import javax.annotation.Nullable;
 
-import com.google.common.collect.Sets;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
@@ -40,6 +39,7 @@ import org.junit.Test;
 import com.google.common.base.Function;
 import com.google.common.collect.Collections2;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 /**
  * Created by Hongbin Ma(Binmahone) on 3/26/15.


[17/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/2b5495ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/2b5495ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/2b5495ce

Branch: refs/heads/streaming-localdict
Commit: 2b5495ce1debe21be361e942428cfff0bd1dff36
Parents: c3ff4f4
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 10:05:20 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 10:05:20 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/index/BatchSliceBuilder.java  |   2 +-
 .../kylin/job/streaming/StreamingBootstrap.java | 117 +++++++++++++++++
 .../kylin/job/streaming/StreamingCLI.java       |  71 ++++++++++
 .../apache/kylin/job/IIStreamBuilderTest.java   |   2 +-
 pom.xml                                         |   1 +
 .../apache/kylin/streaming/KafkaRequester.java  | 128 +++++++++++--------
 .../kylin/streaming/StreamingBootstrap.java     | 109 ----------------
 .../apache/kylin/streaming/StreamingCLI.java    |  71 ----------
 8 files changed, 265 insertions(+), 236 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
index 6ba328c..037dd6c 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/BatchSliceBuilder.java
@@ -87,7 +87,7 @@ public class BatchSliceBuilder {
 
     private long increaseSliceTimestamp(long timestamp) {
         if (timestamp <= sliceTimestamp) {
-            return ++timestamp; // ensure slice timestamp increases
+            return sliceTimestamp+1; // ensure slice timestamp increases
         } else {
             return timestamp;
         }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
new file mode 100644
index 0000000..ddaae29
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -0,0 +1,117 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job.streaming;
+
+import com.google.common.base.Preconditions;
+import kafka.api.OffsetRequest;
+import kafka.cluster.Broker;
+import kafka.javaapi.PartitionMetadata;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.IISegment;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.job.hadoop.invertedindex.IICreateHTableJob;
+import org.apache.kylin.streaming.*;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingBootstrap {
+
+    private KylinConfig kylinConfig;
+    private StreamManager streamManager;
+    private IIManager iiManager;
+
+    public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
+        return new StreamingBootstrap(kylinConfig);
+    }
+
+    private StreamingBootstrap(KylinConfig kylinConfig) {
+        this.kylinConfig = kylinConfig;
+        this.streamManager = StreamManager.getInstance(kylinConfig);
+        this.iiManager = IIManager.getInstance(kylinConfig);
+    }
+
+    private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
+        final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
+        if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
+            return partitionMetadata.leader();
+        } else {
+            return null;
+        }
+    }
+
+    public void startStreaming(String streamingConf, int partitionId) throws Exception {
+        final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
+        Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+        final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
+        Preconditions.checkNotNull(ii);
+        Preconditions.checkArgument(ii.getSegments().size() > 0);
+        final IISegment iiSegment = ii.getSegments().get(0);
+
+        final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
+        Preconditions.checkState(leadBroker != null, "cannot find lead broker");
+        final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
+        long streamOffset = ii.getStreamOffsets().get(partitionId);
+        if (streamOffset < earliestOffset) {
+            streamOffset = earliestOffset;
+        }
+
+
+        KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
+            @Override
+            protected void consume(long offset, ByteBuffer payload) throws Exception {
+                byte[] bytes = new byte[payload.limit()];
+                payload.get(bytes);
+                getStreamQueue().put(new Stream(offset, bytes));
+            }
+        };
+        final IIDesc desc = ii.getDescriptor();
+
+        Executors.newSingleThreadExecutor().submit(consumer);
+        final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
+        task.setStreamParser(JsonStreamParser.instance);
+        final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
+        future.get();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
new file mode 100644
index 0000000..8813cb3
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -0,0 +1,71 @@
+/*
+ *
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *
+ *  contributor license agreements. See the NOTICE file distributed with
+ *
+ *  this work for additional information regarding copyright ownership.
+ *
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *
+ *  (the "License"); you may not use this file except in compliance with
+ *
+ *  the License. You may obtain a copy of the License at
+ *
+ *
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ *  See the License for the specific language governing permissions and
+ *
+ *  limitations under the License.
+ *
+ * /
+ */
+
+package org.apache.kylin.job.streaming;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Created by qianzhou on 3/26/15.
+ */
+public class StreamingCLI {
+
+    private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
+
+    public static void main(String[] args) {
+        try {
+            if (args.length < 2) {
+                printArgsError(args);
+                return;
+            }
+            if (args[0].equals("start")) {
+                String kafkaConfName = args[1];
+                StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
+            } else if (args.equals("stop")) {
+
+            } else {
+                printArgsError(args);
+            }
+        } catch (Exception e) {
+        }
+    }
+
+    private static void printArgsError(String[] args) {
+        logger.warn("invalid args:" + StringUtils.join(args, " "));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index 35a0fe9..d42da33 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -38,7 +38,7 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.AbstractKylinTestCase;
 import org.apache.kylin.common.util.ClassUtil;
 import org.apache.kylin.common.util.HBaseMetadataTestCase;
-import org.apache.kylin.streaming.StreamingBootstrap;
+import org.apache.kylin.job.streaming.StreamingBootstrap;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 38d6220..064ea11 100644
--- a/pom.xml
+++ b/pom.xml
@@ -616,6 +616,7 @@
                                 <exclude>**/BuildCubeWithEngineTest.java</exclude>
                                 <exclude>**/BuildIIWithEngineTest.java</exclude>
                                 <exclude>**/BuildIIWithStreamTest.java</exclude>
+                                <exclude>**/IIStreamBuilderTest.java</exclude>
                                 <exclude>**/SampleCubeSetupTest.java</exclude>
                                 <exclude>**/KylinQueryTest.java</exclude>
                                 <exclude>**/Kafka*Test.java</exclude>

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
index 699c0ed..ce87047 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaRequester.java
@@ -59,53 +59,65 @@ public final class KafkaRequester {
     private static final Logger logger = LoggerFactory.getLogger(KafkaRequester.class);
 
     public static TopicMeta getKafkaTopicMeta(KafkaConfig kafkaConfig) {
-        SimpleConsumer consumer;
+        SimpleConsumer consumer = null;
         for (Broker broker : kafkaConfig.getBrokers()) {
-            consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
-            List<String> topics = Collections.singletonList(kafkaConfig.getTopic());
-            TopicMetadataRequest req = new TopicMetadataRequest(topics);
-            TopicMetadataResponse resp = consumer.send(req);
-            final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
-            if (topicMetadatas.size() != 1) {
-                break;
-            }
-            final TopicMetadata topicMetadata = topicMetadatas.get(0);
-            if (topicMetadata.errorCode() != 0) {
-                break;
-            }
-            List<Integer> partitionIds = Lists.transform(topicMetadata.partitionsMetadata(), new Function<PartitionMetadata, Integer>() {
-                @Nullable
-                @Override
-                public Integer apply(PartitionMetadata partitionMetadata) {
-                    return partitionMetadata.partitionId();
+            try {
+                consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
+                List<String> topics = Collections.singletonList(kafkaConfig.getTopic());
+                TopicMetadataRequest req = new TopicMetadataRequest(topics);
+                TopicMetadataResponse resp = consumer.send(req);
+                final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
+                if (topicMetadatas.size() != 1) {
+                    break;
+                }
+                final TopicMetadata topicMetadata = topicMetadatas.get(0);
+                if (topicMetadata.errorCode() != 0) {
+                    break;
                 }
-            });
-            return new TopicMeta(kafkaConfig.getTopic(), partitionIds);
+                List<Integer> partitionIds = Lists.transform(topicMetadata.partitionsMetadata(), new Function<PartitionMetadata, Integer>() {
+                    @Nullable
+                    @Override
+                    public Integer apply(PartitionMetadata partitionMetadata) {
+                        return partitionMetadata.partitionId();
+                    }
+                });
+                return new TopicMeta(kafkaConfig.getTopic(), partitionIds);
+            } finally {
+                if (consumer != null) {
+                    consumer.close();
+                }
+            }
         }
         logger.debug("cannot find topic:" + kafkaConfig.getTopic());
         return null;
     }
 
     public static PartitionMetadata getPartitionMetadata(String topic, int partitionId, List<Broker> brokers, KafkaConfig kafkaConfig) {
-        SimpleConsumer consumer;
+        SimpleConsumer consumer = null;
         for (Broker broker : brokers) {
-            consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
-            List<String> topics = Collections.singletonList(topic);
-            TopicMetadataRequest req = new TopicMetadataRequest(topics);
-            TopicMetadataResponse resp = consumer.send(req);
-            final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
-            if (topicMetadatas.size() != 1) {
-                logger.warn("invalid topicMetadata size:" + topicMetadatas.size());
-                break;
-            }
-            final TopicMetadata topicMetadata = topicMetadatas.get(0);
-            if (topicMetadata.errorCode() != 0) {
-                logger.warn("fetching topicMetadata with errorCode:" + topicMetadata.errorCode());
-                break;
-            }
-            for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
-                if (partitionMetadata.partitionId() == partitionId) {
-                    return partitionMetadata;
+            try {
+                consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), "topic_meta_lookup");
+                List<String> topics = Collections.singletonList(topic);
+                TopicMetadataRequest req = new TopicMetadataRequest(topics);
+                TopicMetadataResponse resp = consumer.send(req);
+                final List<TopicMetadata> topicMetadatas = resp.topicsMetadata();
+                if (topicMetadatas.size() != 1) {
+                    logger.warn("invalid topicMetadata size:" + topicMetadatas.size());
+                    break;
+                }
+                final TopicMetadata topicMetadata = topicMetadatas.get(0);
+                if (topicMetadata.errorCode() != 0) {
+                    logger.warn("fetching topicMetadata with errorCode:" + topicMetadata.errorCode());
+                    break;
+                }
+                for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
+                    if (partitionMetadata.partitionId() == partitionId) {
+                        return partitionMetadata;
+                    }
+                }
+            } finally {
+                if (consumer != null) {
+                    consumer.close();
                 }
             }
         }
@@ -116,30 +128,38 @@ public final class KafkaRequester {
     public static FetchResponse fetchResponse(String topic, int partitionId, long offset, Broker broker, KafkaConfig kafkaConfig) {
         final String clientName = "client_" + topic + "_" + partitionId;
         SimpleConsumer consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), clientName);
-        kafka.api.FetchRequest req = new FetchRequestBuilder()
-                .clientId(clientName)
-                .addFetch(topic, partitionId, offset, kafkaConfig.getMaxReadCount()) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
-                .build();
-        return consumer.fetch(req);
+        try {
+            kafka.api.FetchRequest req = new FetchRequestBuilder()
+                    .clientId(clientName)
+                    .addFetch(topic, partitionId, offset, kafkaConfig.getMaxReadCount()) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
+                    .build();
+            return consumer.fetch(req);
+        } finally {
+            consumer.close();
+        }
     }
 
     public static long getLastOffset(String topic, int partitionId,
                                      long whichTime, Broker broker, KafkaConfig kafkaConfig) {
         String clientName = "client_" + topic + "_" + partitionId;
         SimpleConsumer consumer = new SimpleConsumer(broker.host(), broker.port(), kafkaConfig.getTimeout(), kafkaConfig.getBufferSize(), clientName);
-        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
-        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
-        requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
-        kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
-                requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
-        OffsetResponse response = consumer.getOffsetsBefore(request);
+        try {
+            TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
+            Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
+            requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
+            kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
+                    requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
+            OffsetResponse response = consumer.getOffsetsBefore(request);
 
-        if (response.hasError()) {
-            System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partitionId));
-            return 0;
+            if (response.hasError()) {
+                System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partitionId));
+                return 0;
+            }
+            long[] offsets = response.offsets(topic, partitionId);
+            return offsets[0];
+        } finally {
+            consumer.close();
         }
-        long[] offsets = response.offsets(topic, partitionId);
-        return offsets[0];
     }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
deleted file mode 100644
index bd1ab42..0000000
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *
- *
- *  Licensed to the Apache Software Foundation (ASF) under one or more
- *
- *  contributor license agreements. See the NOTICE file distributed with
- *
- *  this work for additional information regarding copyright ownership.
- *
- *  The ASF licenses this file to You under the Apache License, Version 2.0
- *
- *  (the "License"); you may not use this file except in compliance with
- *
- *  the License. You may obtain a copy of the License at
- *
- *
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- *  Unless required by applicable law or agreed to in writing, software
- *
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- *  See the License for the specific language governing permissions and
- *
- *  limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming;
-
-import com.google.common.base.Preconditions;
-import kafka.api.OffsetRequest;
-import kafka.cluster.Broker;
-import kafka.javaapi.PartitionMetadata;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.invertedindex.IIInstance;
-import org.apache.kylin.invertedindex.IIManager;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-
-import java.nio.ByteBuffer;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-/**
- * Created by qianzhou on 3/26/15.
- */
-public class StreamingBootstrap {
-
-    private KylinConfig kylinConfig;
-    private StreamManager streamManager;
-    private IIManager iiManager;
-
-    public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
-        return new StreamingBootstrap(kylinConfig);
-    }
-
-    private StreamingBootstrap(KylinConfig kylinConfig) {
-        this.kylinConfig = kylinConfig;
-        this.streamManager = StreamManager.getInstance(kylinConfig);
-        this.iiManager = IIManager.getInstance(kylinConfig);
-    }
-
-    private static Broker getLeadBroker(KafkaConfig kafkaConfig, int partitionId) {
-        final PartitionMetadata partitionMetadata = KafkaRequester.getPartitionMetadata(kafkaConfig.getTopic(), partitionId, kafkaConfig.getBrokers(), kafkaConfig);
-        if (partitionMetadata != null && partitionMetadata.errorCode() == 0) {
-            return partitionMetadata.leader();
-        } else {
-            return null;
-        }
-    }
-
-    public void startStreaming(String streamingConf, int partitionId) throws Exception {
-        final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
-        Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
-        final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
-        Preconditions.checkNotNull(ii);
-
-        final Broker leadBroker = getLeadBroker(kafkaConfig, partitionId);
-        Preconditions.checkState(leadBroker != null, "cannot find lead broker");
-        final long earliestOffset = KafkaRequester.getLastOffset(kafkaConfig.getTopic(), partitionId, OffsetRequest.EarliestTime(), leadBroker, kafkaConfig);
-        long streamOffset = ii.getStreamOffsets().get(partitionId);
-        if (streamOffset < earliestOffset) {
-            streamOffset = earliestOffset;
-        }
-
-
-        KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
-            @Override
-            protected void consume(long offset, ByteBuffer payload) throws Exception {
-                byte[] bytes = new byte[payload.limit()];
-                payload.get(bytes);
-                getStreamQueue().put(new Stream(offset, bytes));
-            }
-        };
-        final IIDesc desc = ii.getDescriptor();
-        Executors.newSingleThreadExecutor().submit(consumer);
-        final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId);
-        task.setStreamParser(JsonStreamParser.instance);
-        final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
-        future.get();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/2b5495ce/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
deleted file mode 100644
index dac8ce0..0000000
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingCLI.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *
- *
- *  Licensed to the Apache Software Foundation (ASF) under one or more
- *
- *  contributor license agreements. See the NOTICE file distributed with
- *
- *  this work for additional information regarding copyright ownership.
- *
- *  The ASF licenses this file to You under the Apache License, Version 2.0
- *
- *  (the "License"); you may not use this file except in compliance with
- *
- *  the License. You may obtain a copy of the License at
- *
- *
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *
- *
- *  Unless required by applicable law or agreed to in writing, software
- *
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *
- *  See the License for the specific language governing permissions and
- *
- *  limitations under the License.
- *
- * /
- */
-
-package org.apache.kylin.streaming;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.kylin.common.KylinConfig;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Created by qianzhou on 3/26/15.
- */
-public class StreamingCLI {
-
-    private static final Logger logger = LoggerFactory.getLogger(StreamingCLI.class);
-
-    public static void main(String[] args) {
-        try {
-            if (args.length < 2) {
-                printArgsError(args);
-                return;
-            }
-            if (args[0].equals("start")) {
-                String kafkaConfName = args[1];
-                StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
-            } else if (args.equals("stop")) {
-
-            } else {
-                printArgsError(args);
-            }
-        } catch (Exception e) {
-        }
-    }
-
-    private static void printArgsError(String[] args) {
-        logger.warn("invalid args:" + StringUtils.join(args, " "));
-    }
-
-}


[08/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3bf6b377
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3bf6b377
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3bf6b377

Branch: refs/heads/streaming-localdict
Commit: 3bf6b377fe419371a4d53ef79af2c69a7f872713
Parents: d564876 9a1c4cb
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 18:04:03 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 18:04:03 2015 +0800

----------------------------------------------------------------------
 .../kylin/common/persistence/ResourceStore.java |   2 +-
 .../localmeta/streaming/kafka_test.json         |  15 ++
 .../apache/kylin/invertedindex/IIInstance.java  |  12 ++
 .../apache/kylin/invertedindex/IIManager.java   |  17 +-
 .../invertedindex/index/BatchSliceBuilder.java  |   6 +-
 .../model/IIJoinedFlatTableDesc.java            |  12 +-
 .../invertedindex/model/IIKeyValueCodec.java    |  91 +++++----
 .../model/IIKeyValueCodecWithState.java         |  68 +++++++
 .../apache/kylin/invertedindex/model/IIRow.java |  13 ++
 .../org/apache/kylin/job/JoinedFlatTable.java   |   1 -
 .../kylin/job/hadoop/AbstractHadoopJob.java     |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java |   2 +-
 .../kylin/job/hadoop/cube/CubeHFileMapper.java  |   2 +-
 .../kylin/job/hadoop/cube/CuboidReducer.java    |   2 +-
 .../job/hadoop/cube/FactDistinctColumnsJob.java |   2 +-
 .../hadoop/cube/FactDistinctColumnsMapper.java  | 200 -------------------
 .../cube/FactDistinctColumnsMapperBase.java     |  81 ++++++++
 .../hadoop/cube/FactDistinctColumnsReducer.java |   2 +-
 .../cube/FactDistinctHiveColumnsMapper.java     | 129 ++++++++++++
 .../cube/FactDistinctIIColumnsMapper.java       | 129 ++++++++++++
 .../job/hadoop/cube/MergeCuboidMapper.java      |   2 +-
 .../kylin/job/hadoop/cube/NDCuboidMapper.java   |   2 +-
 .../job/hadoop/cube/NewBaseCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidReducer.java   |   2 +-
 .../invertedindex/InvertedIndexMapper.java      |   2 +-
 .../invertedindex/InvertedIndexPartitioner.java |   2 +-
 .../invertedindex/InvertedIndexReducer.java     |   2 +-
 .../metadata/model/IJoinedFlatTableDesc.java    |   2 -
 .../metadata/model/IntermediateColumnDesc.java  |   4 +
 .../endpoint/HbaseServerKVIterator.java         |   9 +-
 streaming/pom.xml                               |   8 +
 .../kylin/streaming/JsonStreamParser.java       |  73 +++++++
 .../org/apache/kylin/streaming/KafkaConfig.java |  19 +-
 .../apache/kylin/streaming/KafkaConsumer.java   |  17 +-
 .../apache/kylin/streaming/StreamBuilder.java   |   9 +
 .../apache/kylin/streaming/StreamManager.java   |  18 +-
 .../apache/kylin/streaming/StreamParser.java    |  47 +++++
 .../kylin/streaming/StreamingBootstrap.java     | 102 ++++++++++
 .../apache/kylin/streaming/StreamingCLI.java    |  70 +++++++
 .../kylin/streaming/StringStreamParser.java     |  55 +++++
 .../kylin/streaming/cube/CubeStreamBuilder.java |   2 +-
 .../invertedindex/IIStreamBuilder.java          |   2 +-
 .../kylin/streaming/EternalStreamProducer.java  |   3 +-
 .../apache/kylin/streaming/KafkaBaseTest.java   |  22 --
 .../apache/kylin/streaming/KafkaConfigTest.java |  50 -----
 .../kylin/streaming/KafkaConsumerTest.java      |   8 +-
 .../kylin/streaming/KafkaRequesterTest.java     |   9 +
 .../kylin/streaming/Nous/NousMessageTest.java   |   4 +-
 .../kylin/streaming/OneOffStreamProducer.java   |   2 +-
 .../kylin/streaming/StreamManagerTest.java      |  69 +++++++
 .../invertedindex/PrintOutStreamBuilder.java    |  67 +++++++
 .../kafka_streaming_test/kafka.properties       |  10 -
 53 files changed, 1068 insertions(+), 417 deletions(-)
----------------------------------------------------------------------



[13/50] incubator-kylin git commit: Collect cuboid statistics during fetching distinct columns;

Posted by li...@apache.org.
Collect cuboid statistics during fetching distinct columns;

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d4a271df
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d4a271df
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d4a271df

Branch: refs/heads/streaming-localdict
Commit: d4a271df9d9b055e44d1a6fc1e3cc3055e14c2bd
Parents: 7360f5b
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:15:57 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:15:57 2015 +0800

----------------------------------------------------------------------
 .../cube/FactDistinctColumnsMapperBase.java     |  5 +-
 .../hadoop/cube/FactDistinctColumnsReducer.java | 53 ++++++++++++++------
 .../cube/FactDistinctHiveColumnsMapper.java     | 51 +++++++++++++------
 3 files changed, 77 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index c0455ff..9945769 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -5,6 +5,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hive.hcatalog.data.HCatRecord;
@@ -23,7 +24,7 @@ import org.apache.kylin.metadata.model.TblColRef;
 /**
  * Created by Hongbin Ma(Binmahone) on 3/26/15.
  */
-public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, ShortWritable, Text> {
+public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, LongWritable, Text> {
 
     protected String cubeName;
     protected CubeInstance cube;
@@ -32,7 +33,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
     protected List<TblColRef> columns;
     protected ArrayList<Integer> factDictCols;
 
-    protected ShortWritable outputKey = new ShortWritable();
+    protected LongWritable outputKey = new LongWritable();
     protected Text outputValue = new Text();
     protected int errorRecordCounter =0;
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
index 2052d08..e1529d3 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsReducer.java
@@ -19,13 +19,14 @@
 package org.apache.kylin.job.hadoop.cube;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.kylin.common.KylinConfig;
@@ -44,17 +45,20 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 
 /**
  * @author yangli9
  */
-public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text, NullWritable, Text> {
+public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text, NullWritable, Text> {
 
     private List<TblColRef> columnList = new ArrayList<TblColRef>();
     private boolean collectStatistics = false;
     private String statisticsOutput = null;
     private List<Long> rowKeyCountInMappers;
-    private HyperLogLogPlusCounter totalHll;
+    private Map<Long, Long> rowKeyCountInCuboids;
+    protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+    protected long baseCuboidId;
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -66,23 +70,24 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
         CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
         CubeDesc cubeDesc = cube.getDescriptor();
 
-        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+        baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
         Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
         columnList = baseCuboid.getColumns();
         collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
         statisticsOutput = conf.get(BatchConstants.CFG_STATISTICS_OUTPUT);
 
         if (collectStatistics) {
-            totalHll = new HyperLogLogPlusCounter(16);
             rowKeyCountInMappers = Lists.newArrayList();
+            rowKeyCountInCuboids = Maps.newHashMap();
+            cuboidHLLMap = Maps.newHashMap();
         }
     }
 
     @Override
-    public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+    public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
 
         if (key.get() >= 0) {
-            TblColRef col = columnList.get(key.get());
+            TblColRef col = columnList.get((int) key.get());
 
             HashSet<ByteArray> set = new HashSet<ByteArray>();
             for (Text textValue : values) {
@@ -105,26 +110,38 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
             }
         } else {
             // for hll
+            long cuboidId = 0 - key.get();
+
             for (Text value : values) {
                 HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(16);
                 ByteArray byteArray = new ByteArray(value.getBytes());
                 hll.readRegisters(byteArray.asBuffer());
 
-                rowKeyCountInMappers.add(hll.getCountEstimate());
-                // merge the hll with total hll
-                totalHll.merge(hll);
+                if (cuboidId > baseCuboidId) {
+                    // if this is the summary info from a mapper, record the number before merge
+                    rowKeyCountInMappers.add(hll.getCountEstimate());
+                }
+
+                if (cuboidHLLMap.get(cuboidId) != null) {
+                    hll.merge(cuboidHLLMap.get(cuboidId));
+                }
+                cuboidHLLMap.put(cuboidId, hll);
             }
         }
 
     }
 
     protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
+
+        for (Long cuboidId : cuboidHLLMap.keySet()) {
+            rowKeyCountInCuboids.put(cuboidId, cuboidHLLMap.get(cuboidId).getCountEstimate());
+        }
+
         //output the hll info;
         if (collectStatistics) {
             Configuration conf = context.getConfiguration();
             FileSystem fs = FileSystem.get(conf);
-            String outputPath = conf.get(BatchConstants.CFG_STATISTICS_OUTPUT);
-            FSDataOutputStream out = fs.create(new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBE_ESTIMATION));
+            FSDataOutputStream out = fs.create(new Path(statisticsOutput, BatchConstants.CFG_STATISTICS_CUBE_ESTIMATION));
 
             try {
                 long totalSum = 0;
@@ -141,13 +158,21 @@ public class FactDistinctColumnsReducer extends KylinReducer<ShortWritable, Text
                 out.write('\n');
 
 
-                msg = "The merged cube segment has " + totalHll.getCountEstimate() + " rows.";
+                long grantTotal = rowKeyCountInCuboids.get(baseCuboidId + 1);
+                msg = "The merged cube has " + grantTotal + " rows.";
                 out.write(msg.getBytes());
                 out.write('\n');
 
-                msg = "The compaction rate is " + (totalHll.getCountEstimate()) + "/" + totalSum + " = " + (totalHll.getCountEstimate() * 100.0) / totalSum + "%.";
+                msg = "The compaction rate is " + (grantTotal) + "/" + totalSum + " = " + (grantTotal * 100.0) / totalSum + "%.";
                 out.write(msg.getBytes());
                 out.write('\n');
+                out.write('\n');
+                
+                for (long i = 0; i < baseCuboidId; i++) {
+                    msg = "Cuboid " + i + " has " + rowKeyCountInCuboids.get(i) + " rows.";
+                    out.write(msg.getBytes());
+                    out.write('\n');
+                }
 
             } finally {
                 out.close();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d4a271df/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
index 64ae353..9e9c096 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctHiveColumnsMapper.java
@@ -18,11 +18,8 @@
 
 package org.apache.kylin.job.hadoop.cube;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hive.hcatalog.data.HCatRecord;
@@ -30,12 +27,17 @@ import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
 import org.apache.hive.hcatalog.data.schema.HCatSchema;
 import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
 import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
 import org.apache.kylin.dict.lookup.HiveTableReader;
 import org.apache.kylin.job.constant.BatchConstants;
 
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
 
 /**
  * @author yangli9
@@ -48,7 +50,8 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
     protected boolean collectStatistics = false;
     protected CuboidScheduler cuboidScheduler = null;
     protected List<String> rowKeyValues = null;
-    protected HyperLogLogPlusCounter hll;
+    protected Map<Long, HyperLogLogPlusCounter> cuboidHLLMap = null;
+    protected HyperLogLogPlusCounter totalHll = null;
     protected int nRowKey;
 
     @Override
@@ -58,11 +61,11 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
         schema = HCatInputFormat.getTableSchema(context.getConfiguration());
         intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
 
-
         collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
         if (collectStatistics) {
             cuboidScheduler = new CuboidScheduler(cubeDesc);
-            hll = new HyperLogLogPlusCounter(16);
+            cuboidHLLMap = Maps.newHashMap();
+            totalHll = new HyperLogLogPlusCounter(16);
             rowKeyValues = Lists.newArrayList();
             nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
         }
@@ -74,7 +77,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
             int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
             HCatFieldSchema fieldSchema;
             for (int i : factDictCols) {
-                outputKey.set((short) i);
+                outputKey.set((long) i);
                 fieldSchema = schema.get(flatTableIndexes[i]);
                 Object fieldValue = record.get(fieldSchema.getName(), schema);
                 if (fieldValue == null)
@@ -103,8 +106,13 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
             mask = mask >> 1;
         }
 
-        String key = StringUtils.join(rowKeyValues, ",");
-        hll.add(key);
+        HyperLogLogPlusCounter hll = cuboidHLLMap.get(cuboidId);
+        if (hll == null) {
+            hll = new HyperLogLogPlusCounter(16);
+            cuboidHLLMap.put(cuboidId, hll);
+        }
+
+        hll.add(StringUtils.join(rowKeyValues, ","));
 
         Collection<Long> children = cuboidScheduler.getSpanningCuboid(cuboidId);
         for (Long childId : children) {
@@ -116,11 +124,22 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap
     @Override
     protected void cleanup(Context context) throws IOException, InterruptedException {
         if (collectStatistics) {
-            // output hll to reducer, key is -1
-            // keyBuf = Bytes.toBytes(-1);
-            outputKey.set((short) -1);
+
+            // output each cuboid's hll to reducer, key is 0 - cuboidId
+            for (Long cuboidId : cuboidHLLMap.keySet()) {
+                HyperLogLogPlusCounter hll = cuboidHLLMap.get(cuboidId);
+                totalHll.merge(hll); // merge each cuboid's counter to the total hll
+                outputKey.set(0 - cuboidId);
+                ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
+                hll.writeRegisters(hllBuf);
+                outputValue.set(hllBuf.array());
+                context.write(outputKey, outputValue);
+            }
+
+            //output the total hll for this mapper;
+            outputKey.set(0 - baseCuboidId - 1);
             ByteBuffer hllBuf = ByteBuffer.allocate(64 * 1024);
-            hll.writeRegisters(hllBuf);
+            totalHll.writeRegisters(hllBuf);
             outputValue.set(hllBuf.array());
             context.write(outputKey, outputValue);
         }


[21/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b6b3388c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b6b3388c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b6b3388c

Branch: refs/heads/streaming-localdict
Commit: b6b3388ce2239fe36f60f8aad2349081813b10f7
Parents: 7088724
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 11:57:27 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 11:57:27 2015 +0800

----------------------------------------------------------------------
 .../kylin/job/streaming/StreamingBootstrap.java | 35 +++++++++++++++-----
 .../kylin/job/streaming/StreamingCLI.java       |  2 +-
 .../apache/kylin/job/BuildIIWithStreamTest.java | 26 +++++----------
 .../apache/kylin/streaming/KafkaConsumer.java   |  8 ++++-
 .../java/org/apache/kylin/streaming/Stream.java |  2 ++
 5 files changed, 45 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index ddaae29..65b23c4 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -35,6 +35,7 @@
 package org.apache.kylin.job.streaming;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
 import kafka.api.OffsetRequest;
 import kafka.cluster.Broker;
 import kafka.javaapi.PartitionMetadata;
@@ -50,8 +51,8 @@ import org.apache.kylin.streaming.*;
 import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
 
 import java.nio.ByteBuffer;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
+import java.util.Map;
+import java.util.concurrent.*;
 
 /**
  * Created by qianzhou on 3/26/15.
@@ -62,6 +63,8 @@ public class StreamingBootstrap {
     private StreamManager streamManager;
     private IIManager iiManager;
 
+    private Map<String, KafkaConsumer> kafkaConsumers = Maps.newConcurrentMap();
+
     public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
         return new StreamingBootstrap(kylinConfig);
     }
@@ -81,9 +84,17 @@ public class StreamingBootstrap {
         }
     }
 
-    public void startStreaming(String streamingConf, int partitionId) throws Exception {
-        final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streamingConf);
-        Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streamingConf);
+    public void stop(String streaming, int partitionId) throws Exception {
+        final KafkaConsumer consumer = kafkaConsumers.remove(getKey(streaming, partitionId));
+        if (consumer != null) {
+            consumer.stop();
+            consumer.getStreamQueue().put(Stream.EOF);
+        }
+    }
+
+    public void start(String streaming, int partitionId) throws Exception {
+        final KafkaConfig kafkaConfig = streamManager.getKafkaConfig(streaming);
+        Preconditions.checkArgument(kafkaConfig != null, "cannot find kafka config:" + streaming);
         final IIInstance ii = iiManager.getII(kafkaConfig.getIiName());
         Preconditions.checkNotNull(ii);
         Preconditions.checkArgument(ii.getSegments().size() > 0);
@@ -96,7 +107,8 @@ public class StreamingBootstrap {
         if (streamOffset < earliestOffset) {
             streamOffset = earliestOffset;
         }
-
+        String[] args = new String[]{"-iiname", kafkaConfig.getIiName(), "-htablename", iiSegment.getStorageLocationIdentifier()};
+        ToolRunner.run(new IICreateHTableJob(), args);
 
         KafkaConsumer consumer = new KafkaConsumer(kafkaConfig.getTopic(), 0, streamOffset, kafkaConfig.getBrokers(), kafkaConfig) {
             @Override
@@ -107,11 +119,16 @@ public class StreamingBootstrap {
             }
         };
         final IIDesc desc = ii.getDescriptor();
+        kafkaConsumers.put(getKey(streaming, partitionId), consumer);
 
-        Executors.newSingleThreadExecutor().submit(consumer);
         final IIStreamBuilder task = new IIStreamBuilder(consumer.getStreamQueue(), iiSegment.getStorageLocationIdentifier(), desc, partitionId);
         task.setStreamParser(JsonStreamParser.instance);
-        final Future<?> future = Executors.newSingleThreadExecutor().submit(task);
-        future.get();
+
+        Executors.newSingleThreadExecutor().submit(consumer);
+        Executors.newSingleThreadExecutor().submit(task);
+    }
+
+    private String getKey(String streaming, int partitionId) {
+        return streaming + "_" + partitionId;
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 8813cb3..4977339 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -54,7 +54,7 @@ public class StreamingCLI {
             }
             if (args[0].equals("start")) {
                 String kafkaConfName = args[1];
-                StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).startStreaming(kafkaConfName, 0);
+                StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).start(kafkaConfName, 0);
             } else if (args.equals("stop")) {
 
             } else {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
index 04a53f7..dae2d03 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildIIWithStreamTest.java
@@ -34,7 +34,6 @@
 
 package org.apache.kylin.job;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.ToolRunner;
@@ -59,14 +58,19 @@ import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.realization.RealizationStatusEnum;
 import org.apache.kylin.streaming.Stream;
 import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
-import org.junit.*;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
 import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.List;
+import java.util.TimeZone;
+import java.util.UUID;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -111,7 +115,7 @@ public class BuildIIWithStreamTest {
     }
 
     @AfterClass
-    public static void after() throws Exception {
+    public static void afterClass() throws Exception {
         backup();
     }
 
@@ -213,22 +217,10 @@ public class BuildIIWithStreamTest {
         ExecutorService executorService = Executors.newSingleThreadExecutor();
         final IIStreamBuilder streamBuilder = new IIStreamBuilder(queue, segment.getStorageLocationIdentifier(), desc, 0);
         int count = 0;
-        List<String[]> rawData = Lists.newArrayList();
         while (reader.next()) {
-            desc.getTimestampColumn();
-            rawData.add(reader.getRow());
+            queue.put(parse(reader.getRow()));
             count++;
         }
-        final int timestampColumn = desc.getTimestampColumn();
-        Collections.sort(rawData, new Comparator<String[]>() {
-            @Override
-            public int compare(String[] o1, String[] o2) {
-                return o1[timestampColumn].compareTo(o2[timestampColumn]);
-            }
-        });
-        for (String[] row : rawData) {
-            queue.put(parse(row));
-        }
         logger.info("total record count:" + count + " htable:" + segment.getStorageLocationIdentifier());
         queue.put(new Stream(-1, null));
         final Future<?> future = executorService.submit(streamBuilder);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 18c8403..b083dea 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -64,6 +64,8 @@ public abstract class KafkaConsumer implements Runnable {
 
     private Logger logger;
 
+    private volatile boolean stop = false;
+
     public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
         this.topic = topic;
         this.partitionId = partitionId;
@@ -92,7 +94,7 @@ public abstract class KafkaConsumer implements Runnable {
     public void run() {
         try {
             Broker leadBroker = getLeadBroker();
-            while (true) {
+            while (!stop) {
                 if (leadBroker == null) {
                     leadBroker = getLeadBroker();
                 }
@@ -123,4 +125,8 @@ public abstract class KafkaConsumer implements Runnable {
 
     protected abstract void consume(long offset, ByteBuffer payload) throws Exception;
 
+    public void stop() {
+        this.stop = true;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b6b3388c/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/Stream.java b/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
index 2c6a86c..d337c4c 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/Stream.java
@@ -42,6 +42,8 @@ public class Stream {
     private long offset;
     private byte[] rawData;
 
+    public static final Stream EOF = new Stream(-1, new byte[0]);
+
     public Stream(long offset, byte[] rawData) {
         this.offset = offset;
         this.rawData = rawData;


[15/50] incubator-kylin git commit: Use LongWritable as key type in fact distinct job.

Posted by li...@apache.org.
Use LongWritable as key type in fact distinct job.

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/8d40a578
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/8d40a578
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/8d40a578

Branch: refs/heads/streaming-localdict
Commit: 8d40a578170da66c61503d7b42fe70c3a930dadd
Parents: 7658a50
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 23:32:56 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 23:32:56 2015 +0800

----------------------------------------------------------------------
 .../job/hadoop/cube/FactDistinctColumnsCombiner.java     |  6 +++---
 .../kylin/job/hadoop/cube/FactDistinctColumnsJob.java    |  4 ++--
 .../job/hadoop/cube/FactDistinctColumnsMapperBase.java   | 11 +++++------
 3 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
index c0cdd46..59ccd5a 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsCombiner.java
@@ -21,9 +21,9 @@ package org.apache.kylin.job.hadoop.cube;
 import java.io.IOException;
 import java.util.HashSet;
 
+import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.common.mr.KylinReducer;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 
 import org.apache.kylin.common.util.ByteArray;
@@ -31,7 +31,7 @@ import org.apache.kylin.common.util.ByteArray;
 /**
  * @author yangli9
  */
-public class FactDistinctColumnsCombiner extends KylinReducer<ShortWritable, Text, ShortWritable, Text> {
+public class FactDistinctColumnsCombiner extends KylinReducer<LongWritable, Text, LongWritable, Text> {
 
     private Text outputValue = new Text();
 
@@ -41,7 +41,7 @@ public class FactDistinctColumnsCombiner extends KylinReducer<ShortWritable, Tex
     }
 
     @Override
-    public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
+    public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
 
         if(key.get() >= 0) {
             HashSet<ByteArray> set = new HashSet<ByteArray>();

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
index 17c5e9b..5903c7b 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsJob.java
@@ -22,8 +22,8 @@ import java.io.IOException;
 
 import org.apache.commons.cli.Options;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -103,7 +103,7 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob {
         job.setInputFormatClass(HCatInputFormat.class);
         job.setMapperClass(FactDistinctHiveColumnsMapper.class);
         job.setCombinerClass(FactDistinctColumnsCombiner.class);
-        job.setMapOutputKeyClass(ShortWritable.class);
+        job.setMapOutputKeyClass(LongWritable.class);
         job.setMapOutputValueClass(Text.class);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/8d40a578/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
index 9945769..2f046ab 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctColumnsMapperBase.java
@@ -1,12 +1,7 @@
 package org.apache.kylin.job.hadoop.cube;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hive.hcatalog.data.HCatRecord;
 import org.apache.kylin.common.KylinConfig;
@@ -21,6 +16,10 @@ import org.apache.kylin.job.constant.BatchConstants;
 import org.apache.kylin.job.hadoop.AbstractHadoopJob;
 import org.apache.kylin.metadata.model.TblColRef;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Created by Hongbin Ma(Binmahone) on 3/26/15.
  */
@@ -35,7 +34,7 @@ public class FactDistinctColumnsMapperBase<KEYIN, VALUEIN> extends KylinMapper<K
 
     protected LongWritable outputKey = new LongWritable();
     protected Text outputValue = new Text();
-    protected int errorRecordCounter =0;
+    protected int errorRecordCounter = 0;
 
     @Override
     protected void setup(Context context) throws IOException {


[33/50] incubator-kylin git commit: KYLIN-653 adding streaming build test cases

Posted by li...@apache.org.
KYLIN-653 adding streaming build test cases


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/4df05317
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/4df05317
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/4df05317

Branch: refs/heads/streaming-localdict
Commit: 4df05317e1a754d1b1e422fdf5df580b2fa3366d
Parents: bbbcae8
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 09:49:52 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800

----------------------------------------------------------------------
 .../kylin/invertedindex/index/TableRecord.java  |  5 +-
 .../invertedindex/index/TableRecordInfo.java    | 10 +--
 .../kylin/invertedindex/model/IIDesc.java       |  1 +
 .../model/IIKeyValueCodecWithState.java         |  6 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java | 20 +++--
 .../cube/FactDistinctIIColumnsMapper.java       |  9 +-
 .../kylin/job/BuildCubeWithEngineTest.java      |  1 -
 .../invertedindex/IIStreamBuilder.java          | 33 ++++---
 .../IIKeyValueCodecWithStateTest.java           | 91 ++++++++++++++++++++
 .../invertedindex/ToyIIStreamBuilder.java       | 35 ++++++++
 10 files changed, 177 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
index ce1b7e0..78cea3d 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
@@ -18,15 +18,12 @@
 
 package org.apache.kylin.invertedindex.index;
 
-import com.google.common.collect.Lists;
-import org.apache.kylin.dict.DateStrDictionary;
 import org.apache.commons.lang.ObjectUtils;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.dict.DateStrDictionary;
 import org.apache.kylin.dict.Dictionary;
 
-import java.util.List;
-
 /**
  * @author yangli9, honma
  *         <p/>

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
index 3136ebb..9a08e64 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecordInfo.java
@@ -18,19 +18,17 @@
 
 package org.apache.kylin.invertedindex.index;
 
-import com.google.common.collect.Maps;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.invertedindex.IISegment;
 import org.apache.kylin.invertedindex.model.IIDesc;
 import org.apache.kylin.metadata.measure.fixedlen.FixedLenMeasureCodec;
-import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.DataType;
 import org.apache.kylin.metadata.model.TblColRef;
 
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
 /**
  * @author yangli9
  *         <p/>

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
index cda3c4d..17edb86 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIDesc.java
@@ -319,6 +319,7 @@ public class IIDesc extends RootPersistentEntity {
         return sliceSize;
     }
 
+
     public String getSignature() {
         return signature;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
index a8e149a..e838283 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIKeyValueCodecWithState.java
@@ -26,6 +26,7 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
     protected static class IIRowDecoderWithState extends IIRowDecoder {
 
         final ArrayList<IIRow> buffer = Lists.newArrayList();
+        private Iterator<Slice> superIterator = null;
 
         private IIRowDecoderWithState(TableRecordInfoDigest digest, Iterator<IIRow> iiRowIterator) {
             super(digest, iiRowIterator);
@@ -33,7 +34,10 @@ public class IIKeyValueCodecWithState extends IIKeyValueCodec {
         }
 
         private Iterator<Slice> getSuperIterator() {
-            return super.iterator();
+            if (superIterator == null) {
+                superIterator = super.iterator();
+            }
+            return superIterator;
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
index 41b21a7..a023c0c 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
@@ -210,20 +210,24 @@ public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text
 
         try {
             bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
-            intermediateTableDesc.sanityCheck(bytesSplitter);
+            outputKV(context);
 
-            byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
-            outputKey.set(rowKey, 0, rowKey.length);
-
-            buildValue(bytesSplitter.getSplitBuffers());
-            outputValue.set(valueBuf.array(), 0, valueBuf.position());
-
-            context.write(outputKey, outputValue);
         } catch (Exception ex) {
             handleErrorRecord(bytesSplitter, ex);
         }
     }
 
+    private void outputKV(Context context) throws IOException, InterruptedException {
+        intermediateTableDesc.sanityCheck(bytesSplitter);
+
+        byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
+        outputKey.set(rowKey, 0, rowKey.length);
+
+        buildValue(bytesSplitter.getSplitBuffers());
+        outputValue.set(valueBuf.array(), 0, valueBuf.position());
+        context.write(outputKey, outputValue);
+    }
+
     private void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
 
         System.err.println("Insane record: " + bytesSplitter);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 75e127e..705e272 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -20,6 +20,7 @@ package org.apache.kylin.job.hadoop.cube;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -50,7 +51,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
 
     private IIJoinedFlatTableDesc intermediateTableDesc;
     private ArrayList<IIRow> buffer = Lists.newArrayList();
-    private Iterable<Slice> slices;
+    private Iterator<Slice> slices;
 
     private String iiName;
     private IIInstance ii;
@@ -72,7 +73,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
         intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
         TableRecordInfo info = new TableRecordInfo(iiDesc);
         KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
-        slices = codec.decodeKeyValue(buffer);
+        slices = codec.decodeKeyValue(buffer).iterator();
 
         baseCuboidCol2FlattenTableCol = new int[factDictCols.size()];
         for (int i = 0; i < factDictCols.size(); ++i) {
@@ -98,9 +99,9 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
         }
         buffer.add(iiRow);
 
-        if (slices.iterator().hasNext()) {
+        if (slices.hasNext()) {
             byte[] vBytesBuffer = null;
-            Slice slice = slices.iterator().next();
+            Slice slice = slices.next();
 
             for (RawTableRecord record : slice) {
                 for (int i = 0; i < factDictCols.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
index ce70f2c..a33dab5 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
@@ -107,7 +107,6 @@ public class BuildCubeWithEngineTest {
                 jobService.deleteJob(jobId);
             }
         }
-
     }
 
     @After

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index f9adefe..0cf3c77 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -82,7 +82,11 @@ public class IIStreamBuilder extends StreamBuilder {
         super(queue, desc.getSliceSize());
         this.desc = desc;
         try {
-            this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
+            if (hTableName != null) {
+                this.hTable = HConnectionManager.createConnection(HBaseConfiguration.create()).getTable(hTableName);
+            } else {
+                this.hTable = null;
+            }
         } catch (IOException e) {
             logger.error("cannot open htable name:" + hTableName, e);
             throw new RuntimeException("cannot open htable name:" + hTableName, e);
@@ -105,12 +109,18 @@ public class IIStreamBuilder extends StreamBuilder {
         TableRecordInfo tableRecordInfo = new TableRecordInfo(desc, dictionaryMap);
         final Slice slice = buildSlice(table, sliceBuilder, tableRecordInfo, dictionaryMap);
         logger.info("slice info, shard:" + slice.getShard() + " timestamp:" + slice.getTimestamp() + " record count:" + slice.getRecordCount());
-        loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
+
+        outputSlice(slice, tableRecordInfo);
         submitOffset();
+
         stopwatch.stop();
         logger.info("stream build finished, size:" + streamsToBuild.size() + " elapsed time:" + stopwatch.elapsedTime(TimeUnit.MILLISECONDS) + TimeUnit.MILLISECONDS);
     }
 
+    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+        loadToHBase(hTable, slice, new IIKeyValueCodec(tableRecordInfo.getDigest()));
+    }
+
     private Map<Integer, Dictionary<?>> buildDictionary(List<List<String>> table, IIDesc desc) {
         HashMultimap<TblColRef, String> valueMap = HashMultimap.create();
         final List<TblColRef> allColumns = desc.listAllColumns();
@@ -122,15 +132,19 @@ public class IIStreamBuilder extends StreamBuilder {
                 }
             }
         }
+
         Map<Integer, Dictionary<?>> result = Maps.newHashMap();
         for (TblColRef tblColRef : valueMap.keySet()) {
-            result.put(desc.findColumn(tblColRef), DictionaryGenerator.buildDictionaryFromValueList(tblColRef.getType(), Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
-                @Nullable
-                @Override
-                public byte[] apply(String input) {
-                    return input.getBytes();
-                }
-            })));
+            result.put(desc.findColumn(tblColRef), //
+                    DictionaryGenerator.buildDictionaryFromValueList(//
+                            tblColRef.getType(), //
+                            Collections2.transform(valueMap.get(tblColRef), new Function<String, byte[]>() {
+                                @Nullable
+                                @Override
+                                public byte[] apply(String input) {
+                                    return input.getBytes();
+                                }
+                            })));
         }
         return result;
     }
@@ -178,7 +192,6 @@ public class IIStreamBuilder extends StreamBuilder {
         }
     }
 
-
     private void submitOffset() {
 
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
new file mode 100644
index 0000000..25e250c
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
@@ -0,0 +1,91 @@
+package org.apache.kylin.streaming.invertedindex;
+
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.annotation.Nullable;
+
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.streaming.Stream;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
+
+    IIInstance ii;
+    IIDesc iiDesc;
+    List<IIRow> iiRowList = Lists.newArrayList();
+
+    final String[] inputs = new String[] { //
+    "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+            "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,2012-08-16,43479,10000807,26.2474,0", //
+            "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+
+    @Before
+    public void setUp() throws Exception {
+        this.createTestMetadata();
+        this.ii = IIManager.getInstance(getTestConfig()).getII("test_kylin_ii_inner_join");
+        this.iiDesc = ii.getDescriptor();
+
+        Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+            @Nullable
+            @Override
+            public Stream apply(String input) {
+                return new Stream(0, input.getBytes());
+            }
+        });
+        LinkedBlockingQueue q = new LinkedBlockingQueue();
+        q.addAll(streams);
+        q.put(new Stream(-1, null));//a stop sign for builder
+
+        ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRowList);
+        ExecutorService executorService = Executors.newSingleThreadExecutor();
+        Future<?> future = executorService.submit(builder);
+        future.get();
+    }
+
+    @Test
+    public void basicTest() {
+        ArrayList<IIRow> buffer = Lists.newArrayList();
+        TableRecordInfo info = new TableRecordInfo(iiDesc);
+        TableRecordInfoDigest digest = info.getDigest();
+        int columnCount = digest.getColumnCount();
+        KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
+        Iterator<Slice> slices = codec.decodeKeyValue(buffer).iterator();
+
+        Assert.assertTrue(!slices.hasNext());
+        Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
+
+        for (int i = 0; i < digest.getColumnCount(); ++i) {
+            buffer.add(iiRowList.get(i));
+
+            if (i != digest.getColumnCount() - 1) {
+                Assert.assertTrue(!slices.hasNext());
+            } else {
+                Assert.assertTrue(slices.hasNext());
+            }
+        }
+
+        Slice newSlice = slices.next();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4df05317/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
new file mode 100644
index 0000000..161b6f6
--- /dev/null
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
@@ -0,0 +1,35 @@
+package org.apache.kylin.streaming.invertedindex;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.streaming.Stream;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ *
+ * A IIStreamBuilder that can hold all the built slices in form of IIRow
+ * This is only for test use
+ */
+public class ToyIIStreamBuilder extends IIStreamBuilder {
+    private List<IIRow> result;
+
+    public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
+        super(queue, null, desc, partitionId);
+        this.result = result;
+    }
+
+    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+        IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
+        for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+            result.add(iiRow);
+        }
+    }
+
+}


[44/50] incubator-kylin git commit: KYLIN-653 minor change

Posted by li...@apache.org.
KYLIN-653 minor change


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/cff578a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/cff578a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/cff578a7

Branch: refs/heads/streaming-localdict
Commit: cff578a7df8e75b07caf4f21803f0426aa94485e
Parents: a36d416
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 18:05:23 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 18:06:19 2015 +0800

----------------------------------------------------------------------
 .../main/java/org/apache/kylin/job/cube/CubingJobBuilder.java | 2 --
 .../java/org/apache/kylin/job/BuildCubeWithEngineTest.java    | 7 ++-----
 .../apache/kylin/job/hadoop/invertedindex/II2CubeTest.java    | 2 ++
 3 files changed, 4 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java b/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
index 278f4dd..7cde298 100644
--- a/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
+++ b/job/src/main/java/org/apache/kylin/job/cube/CubingJobBuilder.java
@@ -170,9 +170,7 @@ public final class CubingJobBuilder extends AbstractJobBuilder {
 
         final AbstractExecutable intermediateHiveTableStep = createIntermediateHiveTableStep(intermediateTableDesc, jobId);
         result.addTask(intermediateHiveTableStep);
-
         result.addTask(createFactDistinctColumnsStep(seg, intermediateHiveTableName, jobId));
-
         result.addTask(createBuildDictionaryStep(seg, factDistinctColumnsPath));
         MapReduceExecutable baseCuboidStep = null;
         if(!useImMemCubing) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
index a33dab5..dc2f74f 100644
--- a/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
+++ b/job/src/test/java/org/apache/kylin/job/BuildCubeWithEngineTest.java
@@ -51,16 +51,13 @@ import static org.junit.Assert.assertEquals;
 
 public class BuildCubeWithEngineTest {
 
-    private JobEngineConfig jobEngineConfig;
+    private static final Log logger = LogFactory.getLog(BuildCubeWithEngineTest.class);
 
+    private JobEngineConfig jobEngineConfig;
     private CubeManager cubeManager;
-
     private DefaultScheduler scheduler;
-
     protected ExecutableManager jobService;
 
-    private static final Log logger = LogFactory.getLog(BuildCubeWithEngineTest.class);
-
     protected void waitForJob(String jobId) {
         while (true) {
             AbstractExecutable job = jobService.getJob(jobId);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/cff578a7/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
index da1cb18..080da1b 100644
--- a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -22,6 +22,7 @@ import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
 import org.apache.kylin.invertedindex.model.*;
 import org.apache.kylin.job.constant.BatchConstants;
 import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
+import org.apache.kylin.job.hadoop.cube.IIToBaseCuboidMapper;
 import org.apache.kylin.streaming.Stream;
 import org.apache.kylin.streaming.StringStreamParser;
 import org.apache.kylin.streaming.invertedindex.SliceBuilder;
@@ -133,4 +134,5 @@ public class II2CubeTest extends LocalFileMetadataTestCase {
             Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
         }
     }
+
 }


[41/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/ea96dc54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/ea96dc54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/ea96dc54

Branch: refs/heads/streaming-localdict
Commit: ea96dc5453553d40a31e6fce7f5d7f489514f883
Parents: 8e6afbf f3a592b
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 16:51:20 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 16:51:20 2015 +0800

----------------------------------------------------------------------
 .../hadoop/cube/FactDistinctHiveColumnsMapper.java  | 16 ++++++++--------
 .../kylin/job/hadoop/invertedindex/II2CubeTest.java |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------



[22/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/3d3cee84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/3d3cee84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/3d3cee84

Branch: refs/heads/streaming-localdict
Commit: 3d3cee8475a335f984283d584962dd25f7f00754
Parents: b6b3388 7f73abe
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 11:58:26 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 11:58:26 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/util/ByteArray.java |  23 ++-
 .../java/org/apache/kylin/dict/Dictionary.java  |  31 ++--
 .../org/apache/kylin/dict/TrieDictionary.java   |  48 ++---
 .../apache/kylin/dict/NumberDictionaryTest.java |   2 +-
 .../hadoop/cubev2/BuildDictionaryMapper.java    | 184 +++++++++++++++++++
 .../gridtable/GTDictionaryCodeSystem.java       |   3 +-
 .../kylin/storage/gridtable/GTRecord.java       |   5 +-
 .../kafka_streaming_test/eagle.properties       |  10 +
 8 files changed, 247 insertions(+), 59 deletions(-)
----------------------------------------------------------------------



[12/50] incubator-kylin git commit: Refine GTDictionaryCodeSystem.java

Posted by li...@apache.org.
Refine GTDictionaryCodeSystem.java

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/7360f5bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/7360f5bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/7360f5bd

Branch: refs/heads/streaming-localdict
Commit: 7360f5bd61f28a38e16d61f0b1388024ebe51fdd
Parents: 3bf6b37
Author: Shao Feng, Shi <sh...@ebay.com>
Authored: Thu Mar 26 21:47:51 2015 +0800
Committer: Shao Feng, Shi <sh...@ebay.com>
Committed: Thu Mar 26 21:47:51 2015 +0800

----------------------------------------------------------------------
 .../gridtable/GTDictionaryCodeSystem.java       | 68 +++++++++++---------
 1 file changed, 38 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7360f5bd/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
index cff108a..45b5d5f 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTDictionaryCodeSystem.java
@@ -1,6 +1,5 @@
 package org.apache.kylin.storage.gridtable;
 
-import com.google.common.collect.Maps;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.BytesUtil;
 import org.apache.kylin.dict.Dictionary;
@@ -9,7 +8,6 @@ import org.apache.kylin.metadata.measure.MeasureAggregator;
 import org.apache.kylin.metadata.serializer.DataTypeSerializer;
 
 import java.nio.ByteBuffer;
-import java.util.BitSet;
 import java.util.Map;
 
 /**
@@ -17,10 +15,9 @@ import java.util.Map;
  */
 public class GTDictionaryCodeSystem implements IGTCodeSystem {
     private GTInfo info;
-    private BitSet encodedColumns = null;
     private Map<Integer, Dictionary> dictionaryMaps = null; // key: column index; value: dictionary for this column;
-    private Map<Integer, DataTypeSerializer> serializerMap = null; // column index; value: serializer for this column;
     private IFilterCodeSystem<ByteArray> filterCS;
+    private DataTypeSerializer[] serializers;
 
     public GTDictionaryCodeSystem(Map<Integer, Dictionary> dictionaryMaps) {
         this.dictionaryMaps = dictionaryMaps;
@@ -29,15 +26,13 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
     @Override
     public void init(GTInfo info) {
         this.info = info;
-        encodedColumns = new BitSet();
-        for (Integer index : dictionaryMaps.keySet()) {
-            encodedColumns.set(index);
-        }
 
-        serializerMap = Maps.newHashMap();
+        serializers = new DataTypeSerializer[info.nColumns];
         for (int i = 0; i < info.nColumns; i++) {
-            if (!encodedColumns.get(i)) {
-                serializerMap.put(i, DataTypeSerializer.create(info.colTypes[i]));
+            if (dictionaryMaps.get(i) != null) {
+                serializers[i] = new DictionarySerializer(dictionaryMaps.get(i));
+            } else {
+                serializers[i] = DataTypeSerializer.create(info.colTypes[i]);
             }
         }
 
@@ -77,35 +72,22 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
 
     @Override
     public int codeLength(int col, ByteBuffer buf) {
-        if (useDictionary(col))
-            return dictionaryMaps.get(col).getSizeOfId();
-        else
-            return serializerMap.get(col).peekLength(buf);
+        return serializers[col].peekLength(buf);
     }
 
     @Override
     public void encodeColumnValue(int col, Object value, ByteBuffer buf) {
-        if (useDictionary(col)) {
-            int id = dictionaryMaps.get(col).getIdFromValue(value);
-            BytesUtil.writeUnsigned(id, dictionaryMaps.get(col).getSizeOfId(), buf);
-        } else {
-            serializerMap.get(col).serialize(value, buf);
-        }
+        serializers[col].serialize(value, buf);
     }
 
     @Override
     public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) {
-
+        throw new UnsupportedOperationException();
     }
 
     @Override
     public Object decodeColumnValue(int col, ByteBuffer buf) {
-        if (useDictionary(col)) {
-            int id = BytesUtil.readUnsigned(buf, dictionaryMaps.get(col).getSizeOfId());
-            return dictionaryMaps.get(col).getValueFromId(id);
-        } else {
-            return serializerMap.get(col).deserialize(buf);
-        }
+       return serializers[col].deserialize(buf);
     }
 
     @Override
@@ -113,7 +95,33 @@ public class GTDictionaryCodeSystem implements IGTCodeSystem {
         return MeasureAggregator.create(aggrFunction, info.colTypes[col].toString());
     }
 
-    private boolean useDictionary(int col) {
-        return encodedColumns.get(col);
+    class DictionarySerializer extends DataTypeSerializer {
+        private Dictionary dictionary;
+
+        DictionarySerializer(Dictionary dictionary) {
+            this.dictionary = dictionary;
+        }
+
+        @Override
+        public void serialize(Object value, ByteBuffer out) {
+            int id = dictionary.getIdFromValue(value);
+            BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), out);
+        }
+
+        @Override
+        public Object deserialize(ByteBuffer in) {
+            int id = BytesUtil.readUnsigned(in, dictionary.getSizeOfId());
+            return dictionary.getValueFromId(id);
+        }
+
+        @Override
+        public int peekLength(ByteBuffer in) {
+            return dictionary.getSizeOfId();
+        }
+
+        @Override
+        public Object valueOf(byte[] value) {
+            throw new UnsupportedOperationException();
+        }
     }
 }


[09/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/21b8f0f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/21b8f0f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/21b8f0f6

Branch: refs/heads/streaming-localdict
Commit: 21b8f0f6f43bec5caba0d7c5bbac2f47a5aef27a
Parents: 9a1c4cb
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 18:04:14 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 18:04:14 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/streaming/KafkaConsumer.java  |  5 +++--
 .../org/apache/kylin/streaming/StreamingBootstrap.java  | 12 +++++-------
 .../kylin/streaming/invertedindex/IIStreamBuilder.java  |  4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index 910041c..18c8403 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -46,6 +46,7 @@ import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -59,7 +60,7 @@ public abstract class KafkaConsumer implements Runnable {
     private KafkaConfig kafkaConfig;
     private List<Broker> replicaBrokers;
     private long offset;
-    private BlockingQueue<Stream> streamQueue;
+    private LinkedBlockingQueue<Stream> streamQueue;
 
     private Logger logger;
 
@@ -70,7 +71,7 @@ public abstract class KafkaConsumer implements Runnable {
         offset = startOffset;
         this.replicaBrokers = initialBrokers;
         logger = LoggerFactory.getLogger("KafkaConsumer_" + topic + "_" + partitionId);
-        streamQueue = new ArrayBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
+        streamQueue = new LinkedBlockingQueue<Stream>(kafkaConfig.getMaxReadCount());
     }
 
     public BlockingQueue<Stream> getStreamQueue() {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
index 4528a72..4b7c6b7 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamingBootstrap.java
@@ -43,9 +43,11 @@ import org.apache.kylin.invertedindex.IIDescManager;
 import org.apache.kylin.invertedindex.IIInstance;
 import org.apache.kylin.invertedindex.IIManager;
 import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
 
 import java.nio.ByteBuffer;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 /**
  * Created by qianzhou on 3/26/15.
@@ -91,12 +93,8 @@ public class StreamingBootstrap {
             }
         };
         final IIDesc desc = ii.getDescriptor();
-        Executors.newSingleThreadExecutor().execute(consumer);
-        while (true) {
-            final Stream stream = consumer.getStreamQueue().poll();
-            if (stream != null) {
-                System.out.println("offset:" + stream.getOffset() + " content:" + new String(stream.getRawData()));
-            }
-        }
+        Executors.newSingleThreadExecutor().submit(consumer);
+        final Future<?> future = Executors.newSingleThreadExecutor().submit(new IIStreamBuilder(consumer.getStreamQueue(), ii.getSegments().get(0).getStorageLocationIdentifier(), desc, partitionId));
+        future.get();
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/21b8f0f6/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
index 9724ba7..f9adefe 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/invertedindex/IIStreamBuilder.java
@@ -64,7 +64,7 @@ import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -78,7 +78,7 @@ public class IIStreamBuilder extends StreamBuilder {
     private final HTableInterface hTable;
     private final BatchSliceBuilder sliceBuilder;
 
-    public IIStreamBuilder(LinkedBlockingDeque<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
+    public IIStreamBuilder(BlockingQueue<Stream> queue, String hTableName, IIDesc desc, int partitionId) {
         super(queue, desc.getSliceSize());
         this.desc = desc;
         try {


[35/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b979dfae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b979dfae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b979dfae

Branch: refs/heads/streaming-localdict
Commit: b979dfaea999b548e089a0e41d38e164e7b46662
Parents: dee2955
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 15:35:57 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 15:35:57 2015 +0800

----------------------------------------------------------------------
 .../kylin/job/streaming/StreamingBootstrap.java    | 17 +++++++++++------
 .../apache/kylin/job/streaming/StreamingCLI.java   |  2 --
 .../org/apache/kylin/streaming/KafkaConsumer.java  | 10 ++++------
 3 files changed, 15 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
index 65b23c4..5d1673c 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingBootstrap.java
@@ -66,7 +66,14 @@ public class StreamingBootstrap {
     private Map<String, KafkaConsumer> kafkaConsumers = Maps.newConcurrentMap();
 
     public static StreamingBootstrap getInstance(KylinConfig kylinConfig) {
-        return new StreamingBootstrap(kylinConfig);
+        final StreamingBootstrap bootstrap = new StreamingBootstrap(kylinConfig);
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                bootstrap.stop();
+            }
+        }));
+        return bootstrap;
     }
 
     private StreamingBootstrap(KylinConfig kylinConfig) {
@@ -84,11 +91,9 @@ public class StreamingBootstrap {
         }
     }
 
-    public void stop(String streaming, int partitionId) throws Exception {
-        final KafkaConsumer consumer = kafkaConsumers.remove(getKey(streaming, partitionId));
-        if (consumer != null) {
+    public void stop() {
+        for (KafkaConsumer consumer : kafkaConsumers.values()) {
             consumer.stop();
-            consumer.getStreamQueue().put(Stream.EOF);
         }
     }
 
@@ -125,7 +130,7 @@ public class StreamingBootstrap {
         task.setStreamParser(JsonStreamParser.instance);
 
         Executors.newSingleThreadExecutor().submit(consumer);
-        Executors.newSingleThreadExecutor().submit(task);
+        Executors.newSingleThreadExecutor().submit(task).get();
     }
 
     private String getKey(String streaming, int partitionId) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
index 4977339..219ca41 100644
--- a/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
+++ b/job/src/main/java/org/apache/kylin/job/streaming/StreamingCLI.java
@@ -55,8 +55,6 @@ public class StreamingCLI {
             if (args[0].equals("start")) {
                 String kafkaConfName = args[1];
                 StreamingBootstrap.getInstance(KylinConfig.getInstanceFromEnv()).start(kafkaConfName, 0);
-            } else if (args.equals("stop")) {
-
             } else {
                 printArgsError(args);
             }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/b979dfae/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
index b083dea..868673d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConsumer.java
@@ -34,7 +34,6 @@
 
 package org.apache.kylin.streaming;
 
-import kafka.api.OffsetRequest;
 import kafka.cluster.Broker;
 import kafka.javaapi.FetchResponse;
 import kafka.javaapi.PartitionMetadata;
@@ -44,10 +43,8 @@ import org.slf4j.LoggerFactory;
 
 import java.nio.ByteBuffer;
 import java.util.List;
-import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicLong;
 
 /**
  * Created by qianzhou on 2/15/15.
@@ -64,7 +61,7 @@ public abstract class KafkaConsumer implements Runnable {
 
     private Logger logger;
 
-    private volatile boolean stop = false;
+    private volatile boolean isRunning = true;
 
     public KafkaConsumer(String topic, int partitionId, long startOffset, List<Broker> initialBrokers, KafkaConfig kafkaConfig) {
         this.topic = topic;
@@ -94,7 +91,7 @@ public abstract class KafkaConsumer implements Runnable {
     public void run() {
         try {
             Broker leadBroker = getLeadBroker();
-            while (!stop) {
+            while (isRunning) {
                 if (leadBroker == null) {
                     leadBroker = getLeadBroker();
                 }
@@ -118,6 +115,7 @@ public abstract class KafkaConsumer implements Runnable {
                     offset++;
                 }
             }
+            getStreamQueue().put(Stream.EOF);
         } catch (Exception e) {
             logger.error("consumer has encountered an error", e);
         }
@@ -126,7 +124,7 @@ public abstract class KafkaConsumer implements Runnable {
     protected abstract void consume(long offset, ByteBuffer payload) throws Exception;
 
     public void stop() {
-        this.stop = true;
+        this.isRunning = false;
     }
 
 }


[04/50] incubator-kylin git commit: refactor

Posted by li...@apache.org.
refactor


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/56d57a2d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/56d57a2d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/56d57a2d

Branch: refs/heads/streaming-localdict
Commit: 56d57a2d5940a19c575125a9aff073235355b4c3
Parents: 9dd1512
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 16:21:41 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 16:21:41 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/streaming/JsonStreamParser.java     | 11 +++++++++--
 .../java/org/apache/kylin/streaming/KafkaConfig.java     |  3 +++
 .../java/org/apache/kylin/streaming/StreamParser.java    |  3 +--
 .../org/apache/kylin/streaming/StringStreamParser.java   |  2 +-
 .../apache/kylin/streaming/cube/CubeStreamBuilder.java   |  2 +-
 .../streaming/invertedindex/PrintOutStreamBuilder.java   |  6 +++---
 6 files changed, 18 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
index cb43dc6..5c8b49d 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/JsonStreamParser.java
@@ -35,6 +35,7 @@
 package org.apache.kylin.streaming;
 
 import com.google.common.collect.Lists;
+import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
 import com.google.gson.JsonParser;
 import org.apache.kylin.metadata.model.TblColRef;
@@ -42,6 +43,7 @@ import org.apache.kylin.metadata.model.TblColRef;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Created by qianzhou on 3/25/15.
@@ -55,11 +57,16 @@ public final class JsonStreamParser implements StreamParser {
     private JsonStreamParser(){}
 
     @Override
-    public List<String> parse(Stream stream, Collection<TblColRef> allColumns) {
+    public List<String> parse(Stream stream, List<TblColRef> allColumns) {
         final JsonObject root = jsonParser.parse(new String(stream.getRawData())).getAsJsonObject();
         ArrayList<String> result = Lists.newArrayList();
+
         for (TblColRef column : allColumns) {
-            result.add(root.get(column.getName()).getAsString());
+            for (Map.Entry<String, JsonElement> entry : root.entrySet()) {
+                if (entry.getKey().equalsIgnoreCase(column.getName())) {
+                    result.add(entry.getValue().getAsString());
+                }
+            }
         }
         return result;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
index ee5a96a..b22c7e0 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/KafkaConfig.java
@@ -76,6 +76,9 @@ public class KafkaConfig extends RootPersistentEntity {
     @JsonProperty("bufferSize")
     private int bufferSize;
 
+    @JsonProperty("iiDesc")
+    private String iiDesc;
+
     private int partitionId;
 
     public int getTimeout() {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
index 0c59151..9b41c95 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StreamParser.java
@@ -36,7 +36,6 @@ package org.apache.kylin.streaming;
 
 import org.apache.kylin.metadata.model.TblColRef;
 
-import java.util.Collection;
 import java.util.List;
 
 /**
@@ -44,5 +43,5 @@ import java.util.List;
  */
 public interface StreamParser {
 
-    List<String> parse(Stream stream, Collection<TblColRef> allColumns);
+    List<String> parse(Stream stream, List<TblColRef> allColumns);
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
index 7611869..3c62a3a 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/StringStreamParser.java
@@ -49,7 +49,7 @@ public final class StringStreamParser implements StreamParser {
 
     private StringStreamParser(){}
     @Override
-    public List<String> parse(Stream stream, Collection<TblColRef> allColumns) {
+    public List<String> parse(Stream stream, List<TblColRef> allColumns) {
         return Lists.newArrayList(new String(stream.getRawData()).split(","));
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 9429033..912c3cd 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -410,7 +410,7 @@ public class CubeStreamBuilder extends StreamBuilder {
     }
 
     private List<String> parseStream(Stream stream, CubeDesc desc) {
-        return getStreamParser().parse(stream, desc.listAllColumns());
+        return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
     }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/56d57a2d/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
index 43aa0a5..e83bdc5 100644
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
+++ b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/PrintOutStreamBuilder.java
@@ -49,9 +49,9 @@ import java.util.concurrent.BlockingQueue;
  */
 public class PrintOutStreamBuilder extends StreamBuilder {
 
-    private final Collection<TblColRef> allColumns;
+    private final List<TblColRef> allColumns;
 
-    public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, Collection<TblColRef> allColumns) {
+    public PrintOutStreamBuilder(BlockingQueue<Stream> streamQueue, int sliceSize, List<TblColRef> allColumns) {
         super(streamQueue, sliceSize);
         setStreamParser(JsonStreamParser.instance);
         this.allColumns = allColumns;
@@ -61,7 +61,7 @@ public class PrintOutStreamBuilder extends StreamBuilder {
     protected void build(List<Stream> streamsToBuild) throws Exception {
         for (Stream stream : streamsToBuild) {
             final List<String> row = getStreamParser().parse(stream, allColumns);
-            System.out.println(StringUtils.join(row, ","));
+            System.out.println("offset:" + stream.getOffset() + " " + StringUtils.join(row, ","));
         }
     }
 }


[43/50] incubator-kylin git commit: add streaming shell

Posted by li...@apache.org.
add streaming shell


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/d72f2e67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/d72f2e67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/d72f2e67

Branch: refs/heads/streaming-localdict
Commit: d72f2e679571d6b6ba5baade8b49f6c15b9adcf8
Parents: a36d416
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 17:07:10 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 17:07:10 2015 +0800

----------------------------------------------------------------------
 bin/kylin.sh | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/d72f2e67/bin/kylin.sh
----------------------------------------------------------------------
diff --git a/bin/kylin.sh b/bin/kylin.sh
index 179fa47..95568e1 100644
--- a/bin/kylin.sh
+++ b/bin/kylin.sh
@@ -76,6 +76,32 @@ then
     fi
     rm ${KYLIN_HOME}/pid
     exit 0
+elif [ $1 == "streaming" ]
+then
+    useSandbox=`cat ${KYLIN_HOME}/conf/kylin.properties | grep 'kylin.sandbox' | awk -F '=' '{print $2}'`
+    spring_profile="default"
+    if [ "$useSandbox" = "true" ]
+        then spring_profile="sandbox"
+    fi
+
+    #retrive $hive_dependency
+    source ${dir}/find-hive-dependency.sh
+    #retrive $KYLIN_EXTRA_START_OPTS
+    if [ -f "${dir}/setenv.sh" ]
+        then source ${dir}/setenv.sh
+    fi
+
+    export HBASE_CLASSPATH=$hive_dependency:${HBASE_CLASSPATH}
+    export JAVA_OPTS="-Xms2048M -Xmx2048M"
+
+    hbase ${KYLIN_EXTRA_START_OPTS} \
+    -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager \
+    -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
+    -Dkylin.hive.dependency=${hive_dependency} \
+    -Dspring.profiles.active=${spring_profile} \
+    org.apache.hadoop.util.RunJar ${KYLIN_HOME}/lib/kylin-job-*.jar org.apache.kylin.job.streaming.StreamingCLI start $2 > ${tomcat_root}/logs/kylin.log 2>&1 & echo $! > ${KYLIN_HOME}/$2 &
+    echo "streaming started $2"
+    exit 0
 else
     echo "usage: kylin.sh start or kylin.sh stop"
     exit 1


[46/50] incubator-kylin git commit: remove compilation error

Posted by li...@apache.org.
remove compilation error


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/a3ff2d9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/a3ff2d9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/a3ff2d9f

Branch: refs/heads/streaming-localdict
Commit: a3ff2d9ffeb65bd58aa4a81b562dcb6fa9fc5a60
Parents: cff578a
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 18:26:06 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 18:26:06 2015 +0800

----------------------------------------------------------------------
 job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/a3ff2d9f/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index d42da33..3e352ff 100644
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@ -75,6 +75,6 @@ public class IIStreamBuilderTest extends HBaseMetadataTestCase {
 
     @Test
     public void test() throws Exception {
-        StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
+        //StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
     }
 }


[05/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/71324f4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/71324f4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/71324f4c

Branch: refs/heads/streaming-localdict
Commit: 71324f4cc5b168a2fee318b84c167d17a72c08fb
Parents: 56d57a2 c8f4c2a
Author: qianhao.zhou <qi...@ebay.com>
Authored: Thu Mar 26 16:21:47 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Thu Mar 26 16:21:47 2015 +0800

----------------------------------------------------------------------
 .../model/IIJoinedFlatTableDesc.java            |  12 +-
 .../invertedindex/model/IIKeyValueCodec.java    |  91 +++++----
 .../model/IIKeyValueCodecWithState.java         |  68 +++++++
 .../apache/kylin/invertedindex/model/IIRow.java |  13 ++
 .../org/apache/kylin/job/JoinedFlatTable.java   |   1 -
 .../kylin/job/hadoop/AbstractHadoopJob.java     |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java |   2 +-
 .../kylin/job/hadoop/cube/CubeHFileMapper.java  |   2 +-
 .../kylin/job/hadoop/cube/CuboidReducer.java    |   2 +-
 .../job/hadoop/cube/FactDistinctColumnsJob.java |   2 +-
 .../hadoop/cube/FactDistinctColumnsMapper.java  | 200 -------------------
 .../cube/FactDistinctColumnsMapperBase.java     |  81 ++++++++
 .../hadoop/cube/FactDistinctColumnsReducer.java |   2 +-
 .../cube/FactDistinctHiveColumnsMapper.java     | 129 ++++++++++++
 .../cube/FactDistinctIIColumnsMapper.java       | 129 ++++++++++++
 .../job/hadoop/cube/MergeCuboidMapper.java      |   2 +-
 .../kylin/job/hadoop/cube/NDCuboidMapper.java   |   2 +-
 .../job/hadoop/cube/NewBaseCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidMapper.java    |   2 +-
 .../job/hadoop/cubev2/InMemCuboidReducer.java   |   2 +-
 .../invertedindex/InvertedIndexMapper.java      |   2 +-
 .../invertedindex/InvertedIndexPartitioner.java |   2 +-
 .../invertedindex/InvertedIndexReducer.java     |   2 +-
 .../metadata/model/IJoinedFlatTableDesc.java    |   2 -
 .../metadata/model/IntermediateColumnDesc.java  |   4 +
 .../endpoint/HbaseServerKVIterator.java         |   9 +-
 26 files changed, 490 insertions(+), 277 deletions(-)
----------------------------------------------------------------------



[49/50] incubator-kylin git commit: KYLIN-625, filter constants convert pass

Posted by li...@apache.org.
KYLIN-625, filter constants convert pass


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/48a79714
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/48a79714
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/48a79714

Branch: refs/heads/streaming-localdict
Commit: 48a797149b604d0f58f6b450bde2c4fc3c75937e
Parents: d7fc231
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 21:03:13 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 21:03:13 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/metadata/model/ColumnDesc.java | 10 +++++++
 .../apache/kylin/metadata/model/TableDesc.java  |  7 +++++
 .../apache/kylin/storage/gridtable/GTUtil.java  | 12 ++-------
 .../storage/gridtable/DictGridTableTest.java    | 28 ++++++++++++++++++--
 4 files changed, 45 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
index 95b320c..194b650 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java
@@ -21,6 +21,7 @@ package org.apache.kylin.metadata.model;
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonProperty;
+
 import org.apache.commons.lang.StringUtils;
 
 /**
@@ -131,4 +132,13 @@ public class ColumnDesc {
         return "ColumnDesc [name=" + name + ",table=" + table.getIdentity() + "]";
     }
 
+    public static ColumnDesc mockup(TableDesc table, int oneBasedColumnIndex, String name, String datatype) {
+        ColumnDesc desc = new ColumnDesc();
+        String id = "" + oneBasedColumnIndex;
+        desc.setId(id);
+        desc.setName(name);
+        desc.setDatatype(datatype);
+        desc.init(table);
+        return desc;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
index 6db1202..6934ae9 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
@@ -162,4 +162,11 @@ public class TableDesc extends RootPersistentEntity {
     public String toString() {
         return "TableDesc [database=" + getDatabase() + " name=" + name + "]";
     }
+    
+    /** create a mockup table for unit test */
+    public static TableDesc mockup(String tableName) {
+        TableDesc mockup = new TableDesc();
+        mockup.setName(tableName);
+        return mockup;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
index 7d042eb..94e5206 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -19,18 +19,10 @@ import com.google.common.collect.Sets;
 
 public class GTUtil {
 
-    static final TableDesc MOCKUP_TABLE = new TableDesc();
-    static {
-        MOCKUP_TABLE.setName("GT_MOCKUP_TABLE");
-    }
+    static final TableDesc MOCKUP_TABLE = TableDesc.mockup("GT_MOCKUP_TABLE");
 
     static TblColRef tblColRef(int col, String datatype) {
-        ColumnDesc desc = new ColumnDesc();
-        String id = "" + (col + 1);
-        desc.setId(id);
-        desc.setName(id);
-        desc.setDatatype(datatype);
-        desc.init(MOCKUP_TABLE);
+        ColumnDesc desc = ColumnDesc.mockup(MOCKUP_TABLE, col + 1, "" + col, datatype);
         return new TblColRef(desc);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/48a79714/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
index 46ec66c..a3de8b8 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/DictGridTableTest.java
@@ -21,7 +21,9 @@ import org.apache.kylin.metadata.filter.ExtractTupleFilter;
 import org.apache.kylin.metadata.filter.LogicalTupleFilter;
 import org.apache.kylin.metadata.filter.TupleFilter;
 import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.storage.gridtable.GTInfo.Builder;
 import org.apache.kylin.storage.gridtable.memstore.GTSimpleMemStore;
@@ -37,6 +39,7 @@ public class DictGridTableTest {
         verifyFirstRow(table);
         verifyScanWithUnevaluatableFilter(table);
         verifyScanWithEvaluatableFilter(table);
+        verifyConvertFilterConstants(table);
     }
 
     private void verifyFirstRow(GridTable table) throws IOException {
@@ -51,8 +54,9 @@ public class DictGridTableTest {
         LogicalTupleFilter filter = and(fcomp, funevaluatable);
 
         GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+
         // note the unEvaluatable column 1 in filter is added to group by
-        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
         
         doScanAndVerify(table, req, "[1421280000000, 20, null, 20, null]");
     }
@@ -65,12 +69,32 @@ public class DictGridTableTest {
         LogicalTupleFilter filter = and(fcomp1, fcomp2);
 
         GTScanRequest req = new GTScanRequest(info, null, setOf(0), setOf(3), new String[] { "sum" }, filter);
+        
         // note the evaluatable column 1 in filter is added to returned columns but not in group by
-        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.1 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.2 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
+        assertEquals("GTScanRequest [range=null-null, columns={0, 1, 3}, filterPushDown=AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.1 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString());
         
         doScanAndVerify(table, req, "[1421280000000, 30, null, 30, null]", "[1421366400000, 20, null, 40, null]");
     }
 
+    private void verifyConvertFilterConstants(GridTable table) {
+        GTInfo info = table.getInfo();
+        
+        TableDesc extTable = TableDesc.mockup("ext");
+        TblColRef extColA = new TblColRef(ColumnDesc.mockup(extTable, 1, "A", "timestamp"));
+        TblColRef extColB = new TblColRef(ColumnDesc.mockup(extTable, 2, "B", "integer"));
+
+        CompareTupleFilter fcomp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
+        CompareTupleFilter fcomp2 = compare(extColB, FilterOperatorEnum.EQ, "10");
+        LogicalTupleFilter filter = and(fcomp1, fcomp2);
+        
+        Map<TblColRef, Integer> colMapping = Maps.newHashMap();
+        colMapping.put(extColA, 0);
+        colMapping.put(extColB, 1);
+        
+        TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
+        assertEquals("AND [NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], NULL.GT_MOCKUP_TABLE.1 EQ [\\x00]]", newFilter.toString());
+    }
+
     private void doScanAndVerify(GridTable table, GTScanRequest req, String... verifyRows) throws IOException {
         System.out.println(req);
         IGTScanner scanner = table.scan(req);


[26/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/b2010404
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/b2010404
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/b2010404

Branch: refs/heads/streaming-localdict
Commit: b20104040c96fd76d419f2773aa3d00f997350ad
Parents: 3d3cee8 71bbd0c
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 13:57:35 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 13:57:35 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/dict/DateStrDictionary.java    |  12 +-
 .../job/hadoop/cubev2/InMemCuboidMapper.java    |   2 +-
 .../gridtable/GTDictionaryCodeSystem.java       |  16 +-
 .../kylin/storage/gridtable/GTScanRange.java    |  61 +++
 .../storage/gridtable/GTScanRangePlanner.java   | 474 +++++++++++++++++++
 .../kylin/storage/gridtable/GTScanRequest.java  |  22 +-
 .../apache/kylin/storage/gridtable/GTUtil.java  |  26 +-
 .../kylin/storage/gridtable/IGTCodeSystem.java  |  25 +-
 .../kylin/storage/gridtable/GridTableTest.java  |   2 +-
 9 files changed, 603 insertions(+), 37 deletions(-)
----------------------------------------------------------------------



[48/50] incubator-kylin git commit: fix

Posted by li...@apache.org.
fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c043b858
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c043b858
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c043b858

Branch: refs/heads/streaming-localdict
Commit: c043b8588673ef282759b0144d2448dddd13145e
Parents: b5a78a6 a3ff2d9
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 18:27:03 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 18:27:03 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/job/cube/CubingJobBuilder.java     |  2 --
 .../org/apache/kylin/job/BuildCubeWithEngineTest.java   |  7 ++-----
 .../java/org/apache/kylin/job/IIStreamBuilderTest.java  | 12 ++++++------
 .../kylin/job/hadoop/invertedindex/II2CubeTest.java     |  2 ++
 4 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c043b858/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
----------------------------------------------------------------------
diff --cc job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
index bafcb61,3e352ff..d15d1e5
--- a/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
+++ b/job/src/test/java/org/apache/kylin/job/IIStreamBuilderTest.java
@@@ -96,13 -68,13 +96,13 @@@ public class IIStreamBuilderTest extend
          DeployUtil.overrideJobJarLocations();
      }
  
 -    @After
 -    public void after() {
 -        this.cleanupTestMetadata();
 -    }
 -
      @Test
      public void test() throws Exception {
- //        final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
- //        bootstrap.start("eagle", 0);
- //        Thread.sleep(30 * 60 * 1000);
- //        logger.info("time is up, stop streaming");
- //        bootstrap.stop();
- //        Thread.sleep(5 * 1000);
 -        //StreamingBootstrap.getInstance(kylinConfig).startStreaming("eagle", 0);
++        final StreamingBootstrap bootstrap = StreamingBootstrap.getInstance(kylinConfig);
++        bootstrap.start("eagle", 0);
++        Thread.sleep(30 * 60 * 1000);
++        logger.info("time is up, stop streaming");
++        bootstrap.stop();
++        Thread.sleep(5 * 1000);
      }
  }


[18/50] incubator-kylin git commit: Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict

Posted by li...@apache.org.
Merge branch 'streaming-localdict' of https://github.com/KylinOLAP/Kylin into streaming-localdict


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/70887247
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/70887247
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/70887247

Branch: refs/heads/streaming-localdict
Commit: 70887247acfa9b2bcf3241f8beb4fa97e14a1607
Parents: 2b5495c 0edf400
Author: qianhao.zhou <qi...@ebay.com>
Authored: Fri Mar 27 10:05:44 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Fri Mar 27 10:05:44 2015 +0800

----------------------------------------------------------------------
 .../cube/FactDistinctColumnsCombiner.java       |  6 +-
 .../job/hadoop/cube/FactDistinctColumnsJob.java |  4 +-
 .../cube/FactDistinctColumnsMapperBase.java     | 16 ++---
 .../hadoop/cube/FactDistinctColumnsReducer.java | 61 +++++++++++++-----
 .../cube/FactDistinctHiveColumnsMapper.java     | 51 ++++++++++-----
 .../gridtable/GTDictionaryCodeSystem.java       | 68 +++++++++++---------
 6 files changed, 130 insertions(+), 76 deletions(-)
----------------------------------------------------------------------



[29/50] incubator-kylin git commit: add serializer for Date/Time/Timestamp

Posted by li...@apache.org.
add serializer for Date/Time/Timestamp


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/24accccc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/24accccc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/24accccc

Branch: refs/heads/streaming-localdict
Commit: 24accccc59009dd305cd70fb96cfe3160ad8ffa1
Parents: bbbcae8
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 14:37:49 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 14:37:49 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/dict/DateStrDictionary.java    |  73 +--------
 .../kylin/invertedindex/index/TableRecord.java  |   7 +-
 .../metadata/serializer/DataTypeSerializer.java |   4 +-
 .../metadata/serializer/DateTimeSerializer.java |  39 +++++
 .../metadata/tool/HiveSourceTableLoader.java    | 155 -------------------
 .../apache/kylin/metadata/util/DateFormat.java  |  76 +++++++++
 .../metadata/util/HiveSourceTableLoader.java    | 155 +++++++++++++++++++
 .../tool/HiveSourceTableLoaderTest.java         |   2 +-
 .../apache/kylin/rest/service/CubeService.java  |   2 +-
 .../kylin/storage/hbase/HBaseKeyRange.java      |  12 +-
 .../org/apache/kylin/storage/tuple/Tuple.java   |   6 +-
 .../kylin/storage/gridtable/GridTableTest.java  |   8 +-
 12 files changed, 292 insertions(+), 247 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 95f67ff..4523e67 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -18,17 +18,14 @@
 
 package org.apache.kylin.dict;
 
+import static org.apache.kylin.metadata.util.DateFormat.*;
+
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.Date;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.lang.StringUtils;
 
@@ -44,74 +41,8 @@ import org.apache.commons.lang.StringUtils;
  */
 public class DateStrDictionary extends Dictionary<String> {
 
-    static final String DEFAULT_DATE_PATTERN = "yyyy-MM-dd";
-    static final String DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS = "yyyy-MM-dd HH:mm:ss";
-    static final String DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS = "yyyy-MM-dd HH:mm:ss.SSS";
-
     static final int ID_9999_12_31 = 3652426; // assume 0 based
 
-    static final private Map<String, ThreadLocal<SimpleDateFormat>> threadLocalMap = new ConcurrentHashMap<String, ThreadLocal<SimpleDateFormat>>();
-
-    static SimpleDateFormat getDateFormat(String datePattern) {
-        ThreadLocal<SimpleDateFormat> formatThreadLocal = threadLocalMap.get(datePattern);
-        if (formatThreadLocal == null) {
-            threadLocalMap.put(datePattern, formatThreadLocal = new ThreadLocal<SimpleDateFormat>());
-        }
-        SimpleDateFormat format = formatThreadLocal.get();
-        if (format == null) {
-            format = new SimpleDateFormat(datePattern);
-            format.setTimeZone(TimeZone.getTimeZone("GMT")); // NOTE: this must be GMT to calculate epoch date correctly
-            formatThreadLocal.set(format);
-        }
-        return format;
-    }
-
-    public static String dateToString(Date date) {
-        return dateToString(date, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
-    }
-
-    public static String dateToString(Date date, String pattern) {
-        return getDateFormat(pattern).format(date);
-    }
-
-    public static Date stringToDate(String str) {
-        return stringToDate(str, DEFAULT_DATE_PATTERN);
-    }
-
-    public static Date stringToDate(String str, String pattern) {
-        Date date = null;
-        try {
-            date = getDateFormat(pattern).parse(str);
-        } catch (ParseException e) {
-            throw new IllegalArgumentException("'" + str + "' is not a valid date of pattern '" + pattern + "'", e);
-        }
-        return date;
-    }
-
-    public static long stringToMillis(String str) {
-        if (isAllDigits(str)) {
-            return Long.parseLong(str);
-        } else if (str.length() == 10) {
-            return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
-        } else if (str.length() == 19) {
-            return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
-        } else if (str.length() == 23) {
-            return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS).getTime();
-        } else {
-            throw new IllegalArgumentException("there is no valid date pattern for:" + str);
-        }
-    }
-    
-    private static boolean isAllDigits(String str) {
-        for (int i = 0, n = str.length(); i < n; i++) {
-            if (Character.isDigit(str.charAt(i)) == false)
-                return false;
-        }
-        return true;
-    }
-
-    // ============================================================================
-
     private String pattern;
     private int baseId;
     private int maxId;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
index ce1b7e0..15869f9 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/TableRecord.java
@@ -18,14 +18,11 @@
 
 package org.apache.kylin.invertedindex.index;
 
-import com.google.common.collect.Lists;
-import org.apache.kylin.dict.DateStrDictionary;
 import org.apache.commons.lang.ObjectUtils;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.kylin.dict.Dictionary;
-
-import java.util.List;
+import org.apache.kylin.metadata.util.DateFormat;
 
 /**
  * @author yangli9, honma
@@ -67,7 +64,7 @@ public class TableRecord implements Cloneable {
 
     public long getTimestamp() {
         String str = getValueString(info.getTimestampColumn());
-        return DateStrDictionary.stringToMillis(str);
+        return DateFormat.stringToMillis(str);
     }
 
     public int length(int col) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
index 094c2f1..63d4ddd 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DataTypeSerializer.java
@@ -42,7 +42,9 @@ abstract public class DataTypeSerializer<T> implements BytesSerializer<T> {
         implementations.put("integer", LongSerializer.class);
         implementations.put("int", LongSerializer.class);
         implementations.put("smallint", LongSerializer.class);
-        implementations.put("date", StringSerializer.class);
+        implementations.put("date", DateTimeSerializer.class);
+        implementations.put("datetime", DateTimeSerializer.class);
+        implementations.put("timestamp", DateTimeSerializer.class);
     }
 
     public static DataTypeSerializer<?> create(String dataType) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
new file mode 100644
index 0000000..465c158
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/serializer/DateTimeSerializer.java
@@ -0,0 +1,39 @@
+package org.apache.kylin.metadata.serializer;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.metadata.util.DateFormat;
+
+public class DateTimeSerializer extends DataTypeSerializer<LongWritable> {
+
+    // avoid mass object creation
+    LongWritable current = new LongWritable();
+
+    @Override
+    public void serialize(LongWritable value, ByteBuffer out) {
+        out.putLong(value.get());
+    }
+
+    @Override
+    public LongWritable deserialize(ByteBuffer in) {
+        current.set(in.getLong());
+        return current;
+    }
+
+    @Override
+    public int peekLength(ByteBuffer in) {
+        return 8;
+    }
+
+    @Override
+    public LongWritable valueOf(byte[] value) {
+        if (value == null)
+            current.set(0L);
+        else
+            current.set(DateFormat.stringToMillis(Bytes.toString(value)));
+        return current;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java b/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
deleted file mode 100644
index 5297188..0000000
--- a/metadata/src/main/java/org/apache/kylin/metadata/tool/HiveSourceTableLoader.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.metadata.tool;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.util.HadoopUtil;
-import org.apache.kylin.common.util.HiveClient;
-import org.apache.kylin.metadata.MetadataConstants;
-import org.apache.kylin.metadata.MetadataManager;
-import org.apache.kylin.metadata.model.ColumnDesc;
-import org.apache.kylin.metadata.model.TableDesc;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.*;
-
-/**
- * Management class to sync hive table metadata with command See main method for
- * how to use the class
- *
- * @author jianliu
- */
-public class HiveSourceTableLoader {
-
-    @SuppressWarnings("unused")
-    private static final Logger logger = LoggerFactory.getLogger(HiveSourceTableLoader.class);
-
-    public static final String OUTPUT_SURFIX = "json";
-    public static final String TABLE_FOLDER_NAME = "table";
-    public static final String TABLE_EXD_FOLDER_NAME = "table_exd";
-
-    public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {
-
-        Map<String, Set<String>> db2tables = Maps.newHashMap();
-        for (String table : hiveTables) {
-            String[] parts = HadoopUtil.parseHiveTableName(table);
-            Set<String> set = db2tables.get(parts[0]);
-            if (set == null) {
-                set = Sets.newHashSet();
-                db2tables.put(parts[0], set);
-            }
-            set.add(parts[1]);
-        }
-
-        // extract from hive
-        Set<String> loadedTables = Sets.newHashSet();
-        for (String database : db2tables.keySet()) {
-            List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
-            loadedTables.addAll(loaded);
-        }
-
-        return loadedTables;
-    }
-
-    private static List<String> extractHiveTables(String database, Set<String> tables, KylinConfig config) throws IOException {
-
-        List<String> loadedTables = Lists.newArrayList();
-        MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
-        for (String tableName : tables) {
-            Table table = null;
-            HiveClient hiveClient = new HiveClient();
-            List<FieldSchema> partitionFields = null;
-            List<FieldSchema> fields = null;
-            try {
-                table = hiveClient.getHiveTable(database, tableName);
-                partitionFields = table.getPartitionKeys();
-                fields = hiveClient.getHiveTableFields(database, tableName);
-            } catch (Exception e) {
-                e.printStackTrace();
-                throw new IOException(e);
-            }
-
-            if (fields != null && partitionFields != null && partitionFields.size() > 0) {
-                fields.addAll(partitionFields);
-            }
-
-            long tableSize = hiveClient.getFileSizeForTable(table);
-            long tableFileNum = hiveClient.getFileNumberForTable(table);
-            TableDesc tableDesc = metaMgr.getTableDesc(database + "." + tableName);
-            if (tableDesc == null) {
-                tableDesc = new TableDesc();
-                tableDesc.setDatabase(database.toUpperCase());
-                tableDesc.setName(tableName.toUpperCase());
-                tableDesc.setUuid(UUID.randomUUID().toString());
-                tableDesc.setLastModified(0);
-            }
-
-            int columnNumber = fields.size();
-            List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber);
-            for (int i = 0; i < columnNumber; i++) {
-                FieldSchema field = fields.get(i);
-                ColumnDesc cdesc = new ColumnDesc();
-                cdesc.setName(field.getName().toUpperCase());
-                cdesc.setDatatype(field.getType());
-                cdesc.setId(String.valueOf(i + 1));
-                columns.add(cdesc);
-            }
-            tableDesc.setColumns(columns.toArray(new ColumnDesc[columnNumber]));
-
-            StringBuffer partitionColumnString = new StringBuffer();
-            for (int i = 0, n = partitionFields.size(); i < n; i++) {
-                if (i > 0)
-                    partitionColumnString.append(", ");
-                partitionColumnString.append(partitionFields.get(i).getName().toUpperCase());
-            }
-
-            Map<String, String> map = metaMgr.getTableDescExd(tableDesc.getIdentity());
-
-            if (map == null) {
-                map = Maps.newHashMap();
-            }
-            map.put(MetadataConstants.TABLE_EXD_TABLENAME, table.getTableName());
-            map.put(MetadataConstants.TABLE_EXD_LOCATION, table.getSd().getLocation());
-            map.put(MetadataConstants.TABLE_EXD_IF, table.getSd().getInputFormat());
-            map.put(MetadataConstants.TABLE_EXD_OF, table.getSd().getOutputFormat());
-            map.put(MetadataConstants.TABLE_EXD_OWNER, table.getOwner());
-            map.put(MetadataConstants.TABLE_EXD_LAT, String.valueOf(table.getLastAccessTime()));
-            map.put(MetadataConstants.TABLE_EXD_PC, partitionColumnString.toString());
-            map.put(MetadataConstants.TABLE_EXD_TFS, String.valueOf(tableSize));
-            map.put(MetadataConstants.TABLE_EXD_TNF, String.valueOf(tableFileNum));
-            map.put(MetadataConstants.TABLE_EXD_PARTITIONED, Boolean.valueOf(partitionFields != null && partitionFields.size() > 0).toString());
-
-            metaMgr.saveSourceTable(tableDesc);
-            metaMgr.saveTableExd(tableDesc.getIdentity(), map);
-            loadedTables.add(tableDesc.getIdentity());
-        }
-
-
-        return loadedTables;
-    }
-
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java b/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
new file mode 100644
index 0000000..c0967e3
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/util/DateFormat.java
@@ -0,0 +1,76 @@
+package org.apache.kylin.metadata.util;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class DateFormat {
+
+    public static final String DEFAULT_DATE_PATTERN = "yyyy-MM-dd";
+    public static final String DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS = "yyyy-MM-dd HH:mm:ss";
+    public static final String DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS = "yyyy-MM-dd HH:mm:ss.SSS";
+
+    static final private Map<String, ThreadLocal<SimpleDateFormat>> threadLocalMap = new ConcurrentHashMap<String, ThreadLocal<SimpleDateFormat>>();
+
+    static SimpleDateFormat getDateFormat(String datePattern) {
+        ThreadLocal<SimpleDateFormat> formatThreadLocal = threadLocalMap.get(datePattern);
+        if (formatThreadLocal == null) {
+            threadLocalMap.put(datePattern, formatThreadLocal = new ThreadLocal<SimpleDateFormat>());
+        }
+        SimpleDateFormat format = formatThreadLocal.get();
+        if (format == null) {
+            format = new SimpleDateFormat(datePattern);
+            format.setTimeZone(TimeZone.getTimeZone("GMT")); // NOTE: this must be GMT to calculate epoch date correctly
+            formatThreadLocal.set(format);
+        }
+        return format;
+    }
+
+    public static String dateToString(Date date) {
+        return dateToString(date, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
+    }
+
+    public static String dateToString(Date date, String pattern) {
+        return getDateFormat(pattern).format(date);
+    }
+
+    public static Date stringToDate(String str) {
+        return stringToDate(str, DEFAULT_DATE_PATTERN);
+    }
+
+    public static Date stringToDate(String str, String pattern) {
+        Date date = null;
+        try {
+            date = getDateFormat(pattern).parse(str);
+        } catch (ParseException e) {
+            throw new IllegalArgumentException("'" + str + "' is not a valid date of pattern '" + pattern + "'", e);
+        }
+        return date;
+    }
+
+    public static long stringToMillis(String str) {
+        if (isAllDigits(str)) {
+            return Long.parseLong(str);
+        } else if (str.length() == 10) {
+            return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
+        } else if (str.length() == 19) {
+            return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
+        } else if (str.length() == 23) {
+            return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITH_MILLISECONDS).getTime();
+        } else {
+            throw new IllegalArgumentException("there is no valid date pattern for:" + str);
+        }
+    }
+    
+    private static boolean isAllDigits(String str) {
+        for (int i = 0, n = str.length(); i < n; i++) {
+            if (Character.isDigit(str.charAt(i)) == false)
+                return false;
+        }
+        return true;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java b/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
new file mode 100644
index 0000000..fe5c2b3
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kylin/metadata/util/HiveSourceTableLoader.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.metadata.util;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.HiveClient;
+import org.apache.kylin.metadata.MetadataConstants;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Management class to sync hive table metadata with command See main method for
+ * how to use the class
+ *
+ * @author jianliu
+ */
+public class HiveSourceTableLoader {
+
+    @SuppressWarnings("unused")
+    private static final Logger logger = LoggerFactory.getLogger(HiveSourceTableLoader.class);
+
+    public static final String OUTPUT_SURFIX = "json";
+    public static final String TABLE_FOLDER_NAME = "table";
+    public static final String TABLE_EXD_FOLDER_NAME = "table_exd";
+
+    public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {
+
+        Map<String, Set<String>> db2tables = Maps.newHashMap();
+        for (String table : hiveTables) {
+            String[] parts = HadoopUtil.parseHiveTableName(table);
+            Set<String> set = db2tables.get(parts[0]);
+            if (set == null) {
+                set = Sets.newHashSet();
+                db2tables.put(parts[0], set);
+            }
+            set.add(parts[1]);
+        }
+
+        // extract from hive
+        Set<String> loadedTables = Sets.newHashSet();
+        for (String database : db2tables.keySet()) {
+            List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
+            loadedTables.addAll(loaded);
+        }
+
+        return loadedTables;
+    }
+
+    private static List<String> extractHiveTables(String database, Set<String> tables, KylinConfig config) throws IOException {
+
+        List<String> loadedTables = Lists.newArrayList();
+        MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+        for (String tableName : tables) {
+            Table table = null;
+            HiveClient hiveClient = new HiveClient();
+            List<FieldSchema> partitionFields = null;
+            List<FieldSchema> fields = null;
+            try {
+                table = hiveClient.getHiveTable(database, tableName);
+                partitionFields = table.getPartitionKeys();
+                fields = hiveClient.getHiveTableFields(database, tableName);
+            } catch (Exception e) {
+                e.printStackTrace();
+                throw new IOException(e);
+            }
+
+            if (fields != null && partitionFields != null && partitionFields.size() > 0) {
+                fields.addAll(partitionFields);
+            }
+
+            long tableSize = hiveClient.getFileSizeForTable(table);
+            long tableFileNum = hiveClient.getFileNumberForTable(table);
+            TableDesc tableDesc = metaMgr.getTableDesc(database + "." + tableName);
+            if (tableDesc == null) {
+                tableDesc = new TableDesc();
+                tableDesc.setDatabase(database.toUpperCase());
+                tableDesc.setName(tableName.toUpperCase());
+                tableDesc.setUuid(UUID.randomUUID().toString());
+                tableDesc.setLastModified(0);
+            }
+
+            int columnNumber = fields.size();
+            List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber);
+            for (int i = 0; i < columnNumber; i++) {
+                FieldSchema field = fields.get(i);
+                ColumnDesc cdesc = new ColumnDesc();
+                cdesc.setName(field.getName().toUpperCase());
+                cdesc.setDatatype(field.getType());
+                cdesc.setId(String.valueOf(i + 1));
+                columns.add(cdesc);
+            }
+            tableDesc.setColumns(columns.toArray(new ColumnDesc[columnNumber]));
+
+            StringBuffer partitionColumnString = new StringBuffer();
+            for (int i = 0, n = partitionFields.size(); i < n; i++) {
+                if (i > 0)
+                    partitionColumnString.append(", ");
+                partitionColumnString.append(partitionFields.get(i).getName().toUpperCase());
+            }
+
+            Map<String, String> map = metaMgr.getTableDescExd(tableDesc.getIdentity());
+
+            if (map == null) {
+                map = Maps.newHashMap();
+            }
+            map.put(MetadataConstants.TABLE_EXD_TABLENAME, table.getTableName());
+            map.put(MetadataConstants.TABLE_EXD_LOCATION, table.getSd().getLocation());
+            map.put(MetadataConstants.TABLE_EXD_IF, table.getSd().getInputFormat());
+            map.put(MetadataConstants.TABLE_EXD_OF, table.getSd().getOutputFormat());
+            map.put(MetadataConstants.TABLE_EXD_OWNER, table.getOwner());
+            map.put(MetadataConstants.TABLE_EXD_LAT, String.valueOf(table.getLastAccessTime()));
+            map.put(MetadataConstants.TABLE_EXD_PC, partitionColumnString.toString());
+            map.put(MetadataConstants.TABLE_EXD_TFS, String.valueOf(tableSize));
+            map.put(MetadataConstants.TABLE_EXD_TNF, String.valueOf(tableFileNum));
+            map.put(MetadataConstants.TABLE_EXD_PARTITIONED, Boolean.valueOf(partitionFields != null && partitionFields.size() > 0).toString());
+
+            metaMgr.saveSourceTable(tableDesc);
+            metaMgr.saveTableExd(tableDesc.getIdentity(), map);
+            loadedTables.add(tableDesc.getIdentity());
+        }
+
+
+        return loadedTables;
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
----------------------------------------------------------------------
diff --git a/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java b/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
index cd773ba..1f48b77 100644
--- a/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
+++ b/metadata/src/test/java/org/apache/kylin/metadata/tool/HiveSourceTableLoaderTest.java
@@ -26,9 +26,9 @@ import java.util.Set;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
-
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.metadata.util.HiveSourceTableLoader;
 
 public class HiveSourceTableLoaderTest extends HBaseMetadataTestCase {
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/kylin/rest/service/CubeService.java b/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
index de97a7b..d786b1e 100644
--- a/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
+++ b/server/src/main/java/org/apache/kylin/rest/service/CubeService.java
@@ -44,7 +44,7 @@ import org.apache.kylin.metadata.project.ProjectManager;
 import org.apache.kylin.metadata.project.RealizationEntry;
 import org.apache.kylin.metadata.realization.RealizationStatusEnum;
 import org.apache.kylin.metadata.realization.RealizationType;
-import org.apache.kylin.metadata.tool.HiveSourceTableLoader;
+import org.apache.kylin.metadata.util.HiveSourceTableLoader;
 import org.apache.kylin.rest.constant.Constant;
 import org.apache.kylin.rest.controller.QueryController;
 import org.apache.kylin.rest.exception.InternalErrorException;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
index 1c81eac..e766317 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/HBaseKeyRange.java
@@ -26,9 +26,6 @@ import java.util.Set;
 
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import org.apache.kylin.common.util.BytesUtil;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.cuboid.Cuboid;
@@ -37,8 +34,11 @@ import org.apache.kylin.cube.kv.FuzzyKeyEncoder;
 import org.apache.kylin.cube.kv.FuzzyMaskEncoder;
 import org.apache.kylin.cube.kv.RowConstants;
 import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.DateStrDictionary;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.util.DateFormat;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 /**
  * 
@@ -135,10 +135,10 @@ public class HBaseKeyRange implements Comparable<HBaseKeyRange> {
 
     private void initPartitionRange(ColumnValueRange dimRange) {
         if (null != dimRange.getBeginValue()) {
-            this.partitionColumnStartDate = DateStrDictionary.stringToDate(dimRange.getBeginValue()).getTime();
+            this.partitionColumnStartDate = DateFormat.stringToDate(dimRange.getBeginValue()).getTime();
         }
         if (null != dimRange.getEndValue()) {
-            this.partitionColumnEndDate = DateStrDictionary.stringToDate(dimRange.getEndValue()).getTime();
+            this.partitionColumnEndDate = DateFormat.stringToDate(dimRange.getEndValue()).getTime();
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
index dd904d4..2d18597 100644
--- a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
+++ b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
@@ -26,10 +26,10 @@ import org.apache.kylin.common.util.Array;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.model.CubeDesc.DeriveInfo;
-import org.apache.kylin.dict.DateStrDictionary;
 import org.apache.kylin.dict.lookup.LookupStringTable;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.tuple.ITuple;
+import org.apache.kylin.metadata.util.DateFormat;
 
 /**
  * @author xjiang
@@ -133,7 +133,7 @@ public class Tuple implements ITuple {
         // TODO use data type enum instead of string comparison
         if ("date".equals(dataType)) {
             // convert epoch time
-            Date dateValue = DateStrDictionary.stringToDate(strValue); // NOTE: forces GMT timezone
+            Date dateValue = DateFormat.stringToDate(strValue); // NOTE: forces GMT timezone
             long millis = dateValue.getTime();
             long days = millis / (1000 * 3600 * 24);
             return Integer.valueOf((int) days); // Optiq expects Integer instead of Long. by honma
@@ -150,7 +150,7 @@ public class Tuple implements ITuple {
         } else if ("decimal".equals(dataType)) {
             return new BigDecimal(strValue);
         } else if ("timestamp".equals(dataType)) {
-            return Long.valueOf(DateStrDictionary.stringToMillis(strValue));
+            return Long.valueOf(DateFormat.stringToMillis(strValue));
         } else {
             return strValue;
         }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/24accccc/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
index 1a69138..6561c6e 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
@@ -114,7 +114,7 @@ public class GridTableTest {
         return scanner;
     }
 
-    private GTBuilder rebuild(GridTable table) throws IOException {
+    static GTBuilder rebuild(GridTable table) throws IOException {
         GTRecord r = new GTRecord(table.getInfo());
         GTBuilder builder = table.rebuild();
 
@@ -135,7 +135,7 @@ public class GridTableTest {
         return builder;
     }
 
-    private void rebuildViaAppend(GridTable table) throws IOException {
+    static void rebuildViaAppend(GridTable table) throws IOException {
         GTRecord r = new GTRecord(table.getInfo());
         GTBuilder builder;
 
@@ -170,13 +170,13 @@ public class GridTableTest {
         System.out.println("Written Row Count: " + builder.getWrittenRowCount());
     }
 
-    public static GTInfo basicInfo() {
+    static GTInfo basicInfo() {
         Builder builder = infoBuilder();
         GTInfo info = builder.build();
         return info;
     }
 
-    public static GTInfo advancedInfo() {
+    static GTInfo advancedInfo() {
         Builder builder = infoBuilder();
         builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
         builder.enableRowBlock(4);


[32/50] incubator-kylin git commit: KYLIN-653 fact distinct mapper for II test passed

Posted by li...@apache.org.
KYLIN-653 fact distinct mapper for II test passed


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/0f8b7a46
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/0f8b7a46
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/0f8b7a46

Branch: refs/heads/streaming-localdict
Commit: 0f8b7a4689cde3e4844132efba8665cf0362bf60
Parents: fc5ab52
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 14:52:46 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:16:20 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/util/BasicTest.java |   2 -
 .../test_kylin_cube_with_slr_desc.json          |   2 +-
 .../apache/kylin/invertedindex/model/IIRow.java |  10 ++
 .../cube/FactDistinctIIColumnsMapper.java       |  15 +-
 .../job/hadoop/invertedindex/II2CubeTest.java   | 146 +++++++++++++++++++
 .../invertedindex/ToyIIStreamBuilder.java       |  36 +++++
 streaming/pom.xml                               |   7 +
 .../kylin/streaming/cube/CubeStreamBuilder.java |  20 +--
 .../IIKeyValueCodecWithStateTest.java           | 103 -------------
 .../invertedindex/ToyIIStreamBuilder.java       |  35 -----
 10 files changed, 211 insertions(+), 165 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
----------------------------------------------------------------------
diff --git a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
index 0b92bf9..068ebbf 100644
--- a/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
+++ b/common/src/test/java/org/apache/kylin/common/util/BasicTest.java
@@ -23,9 +23,7 @@ import java.nio.ByteBuffer;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.*;
-import java.util.concurrent.*;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.configuration.ConfigurationException;
 import org.junit.Ignore;
 import org.junit.Test;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
index c4d55f4..5a1049c 100644
--- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
+++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json
@@ -135,7 +135,7 @@
     }, {
       "column" : "lstg_format_name",
       "length" : 12,
-      "dictionary" : null,
+      "dictionary" : "true",
       "mandatory" : false
     }, {
       "column" : "lstg_site_id",

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
index f3d398a..273d1e6 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/model/IIRow.java
@@ -34,10 +34,14 @@
 
 package org.apache.kylin.invertedindex.model;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.kylin.common.util.BytesUtil;
 
+import java.util.List;
+
 /**
  * Created by qianzhou on 3/10/15.
  */
@@ -77,4 +81,10 @@ public final class IIRow {
             this.getDictionary().set(c.getValueArray(), c.getValueOffset(), c.getValueLength());
         }
     }
+
+    public List<Cell> makeCells() {
+        Cell a = new KeyValue(this.getKey().copyBytes(), IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_QUALIFIER_BYTES, this.getValue().copyBytes());
+        Cell b = new KeyValue(this.getKey().copyBytes(), IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_DICTIONARY_BYTES, this.getDictionary().copyBytes());
+        return Lists.newArrayList(a, b);
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
index 6a236fd..75709f6 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/FactDistinctIIColumnsMapper.java
@@ -51,14 +51,9 @@ import com.google.common.collect.Lists;
  */
 public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<ImmutableBytesWritable, Result> {
 
-    private IIJoinedFlatTableDesc intermediateTableDesc;
     private Queue<IIRow> buffer = Lists.newLinkedList();
     private Iterator<Slice> slices;
 
-    private String iiName;
-    private IIInstance ii;
-    private IIDesc iiDesc;
-
     private int[] baseCuboidCol2FlattenTableCol;
 
     @Override
@@ -68,11 +63,11 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
         Configuration conf = context.getConfiguration();
         KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
-        iiName = conf.get(BatchConstants.CFG_II_NAME);
-        ii = IIManager.getInstance(config).getII(iiName);
-        iiDesc = ii.getDescriptor();
+        String iiName = conf.get(BatchConstants.CFG_II_NAME);
+        IIInstance ii = IIManager.getInstance(config).getII(iiName);
+        IIDesc iiDesc = ii.getDescriptor();
 
-        intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
+        IIJoinedFlatTableDesc intermediateTableDesc = new IIJoinedFlatTableDesc(iiDesc);
         TableRecordInfo info = new TableRecordInfo(iiDesc);
         KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
         slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
@@ -116,7 +111,7 @@ public class FactDistinctIIColumnsMapper extends FactDistinctColumnsMapperBase<I
                         vBytesBuffer = new byte[dictionary.getSizeOfValue() * 2];
                     }
 
-                    int vid = record.getValueID(baseCuboidIndex);
+                    int vid = record.getValueID(indexInRecord);
                     if (vid == dictionary.nullId()) {
                         continue;
                     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
new file mode 100644
index 0000000..6832dcf
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/II2CubeTest.java
@@ -0,0 +1,146 @@
+package org.apache.kylin.job.hadoop.invertedindex;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.annotation.Nullable;
+
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.MutationSerialization;
+import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
+import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.kylin.common.util.FIFOIterable;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.invertedindex.model.KeyValueCodec;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.cube.FactDistinctIIColumnsMapper;
+import org.apache.kylin.streaming.Stream;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ */
+public class II2CubeTest extends LocalFileMetadataTestCase {
+
+    String iiName = "test_kylin_ii_inner_join";
+    IIInstance ii;
+    IIDesc iiDesc;
+    String cubeName = "test_kylin_cube_with_slr_empty";
+
+    List<IIRow> iiRows;
+
+    final String[] inputs = new String[] { //
+    "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
+            "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
+            "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
+
+    @Before
+    public void setUp() throws Exception {
+        this.createTestMetadata();
+        this.ii = IIManager.getInstance(getTestConfig()).getII(iiName);
+        this.iiDesc = ii.getDescriptor();
+
+        Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
+            @Nullable
+            @Override
+            public Stream apply(String input) {
+                return new Stream(0, input.getBytes());
+            }
+        });
+        LinkedBlockingQueue q = new LinkedBlockingQueue();
+        q.addAll(streams);
+        q.put(new Stream(-1, null));//a stop sign for builder
+
+        iiRows = Lists.newArrayList();
+        ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRows);
+        ExecutorService executorService = Executors.newSingleThreadExecutor();
+        Future<?> future = executorService.submit(builder);
+        future.get();
+
+    }
+
+    @After
+    public void after() throws Exception {
+        cleanupTestMetadata();
+    }
+
+    /**
+     * simulate stream building into slices, and encode the slice into IIRows.
+     * Then reconstruct the IIRows to slice.
+     */
+    @Test
+    public void basicTest() {
+        Queue<IIRow> buffer = Lists.newLinkedList();
+        FIFOIterable bufferIterable = new FIFOIterable(buffer);
+        TableRecordInfo info = new TableRecordInfo(iiDesc);
+        TableRecordInfoDigest digest = info.getDigest();
+        KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
+        Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
+
+        Assert.assertTrue(!slices.hasNext());
+        Assert.assertEquals(iiRows.size(), digest.getColumnCount());
+
+        for (int i = 0; i < digest.getColumnCount(); ++i) {
+            buffer.add(iiRows.get(i));
+
+            if (i != digest.getColumnCount() - 1) {
+                Assert.assertTrue(!slices.hasNext());
+            } else {
+                Assert.assertTrue(slices.hasNext());
+            }
+        }
+
+        Slice newSlice = slices.next();
+        Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
+    }
+
+    @Test
+    public void factDistinctIIColumnsMapperTest() throws IOException {
+        MapDriver<ImmutableBytesWritable, Result, ShortWritable, Text> mapDriver;
+        FactDistinctIIColumnsMapper mapper = new FactDistinctIIColumnsMapper();
+        mapDriver = MapDriver.newMapDriver(mapper);
+
+        mapDriver.getConfiguration().set(BatchConstants.CFG_II_NAME, iiName);
+        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+        mapDriver.getConfiguration().setStrings("io.serializations", mapDriver.getConfiguration().get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName());
+        mapDriver.addAll(Lists.newArrayList(Collections2.transform(iiRows, new Function<IIRow, Pair<ImmutableBytesWritable, Result>>() {
+            @Nullable
+            @Override
+            public Pair<ImmutableBytesWritable, Result> apply(@Nullable IIRow input) {
+                return new Pair<ImmutableBytesWritable, Result>(new ImmutableBytesWritable(new byte[] { 1 }), Result.create(input.makeCells()));
+            }
+        })));
+
+        List<Pair<ShortWritable, Text>> result = mapDriver.run();
+        Set<String> lstgNames = Sets.newHashSet("FP-non GTC","ABIN");
+        for(Pair<ShortWritable, Text> pair : result)
+        {
+            Assert.assertEquals(pair.getFirst().get(),6);
+            Assert.assertTrue(lstgNames.contains(pair.getSecond().toString()));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
new file mode 100644
index 0000000..3e2a892
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/invertedindex/ToyIIStreamBuilder.java
@@ -0,0 +1,36 @@
+package org.apache.kylin.job.hadoop.invertedindex;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.model.IIDesc;
+import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
+import org.apache.kylin.invertedindex.model.IIRow;
+import org.apache.kylin.streaming.Stream;
+import org.apache.kylin.streaming.invertedindex.IIStreamBuilder;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/26/15.
+ *
+ * A IIStreamBuilder that can hold all the built slices in form of IIRow
+ * This is only for test use
+ */
+public class ToyIIStreamBuilder extends IIStreamBuilder {
+    private List<IIRow> result;
+
+    public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
+        super(queue, null, desc, partitionId);
+        this.result = result;
+    }
+
+    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
+        IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
+        for (IIRow iiRow : codec.encodeKeyValue(slice)) {
+            result.add(iiRow);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 650c9ac..0c084d5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -16,6 +16,13 @@
 
 
         <dependency>
+            <groupId>org.apache.mrunit</groupId>
+            <artifactId>mrunit</artifactId>
+            <classifier>hadoop2</classifier>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
             <groupId>org.apache.kylin</groupId>
             <artifactId>kylin-invertedindex</artifactId>
             <version>${project.parent.version}</version>

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
index 9554797..5c2efdc 100644
--- a/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
+++ b/streaming/src/main/java/org/apache/kylin/streaming/cube/CubeStreamBuilder.java
@@ -156,7 +156,6 @@ public class CubeStreamBuilder extends StreamBuilder {
         logger.info("Totally " + generatedCuboids.size() + " cuboids be calculated, takes " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
     }
 
-
     private void calculateCuboid(GridTable parentCuboid, long parentCuboidId, long cuboidId, Map<Long, GridTable> result) throws IOException {
 
         GridTable thisCuboid;
@@ -220,7 +219,6 @@ public class CubeStreamBuilder extends StreamBuilder {
         return gridTable;
     }
 
-
     private GridTable aggregateCuboid(GridTable parentCuboid, long parentCuboidId, long cuboidId) throws IOException {
         //logger.info("Calculating cuboid " + cuboidId + " from parent " + parentCuboidId);
         Pair<BitSet, BitSet> columnBitSets = getDimensionAndMetricColumBitSet(parentCuboidId);
@@ -281,14 +279,12 @@ public class CubeStreamBuilder extends StreamBuilder {
     }
 
     private Pair<BitSet, BitSet> getDimensionAndMetricColumBitSet(long cuboidId) {
-        BitSet bitSet = BitSet.valueOf(new long[]{cuboidId});
+        BitSet bitSet = BitSet.valueOf(new long[] { cuboidId });
         BitSet dimension = new BitSet();
         dimension.set(0, bitSet.cardinality());
         BitSet metrics = new BitSet();
         metrics.set(bitSet.cardinality(), bitSet.cardinality() + this.measureNumber);
-        return new Pair<BitSet, BitSet>(
-                dimension, metrics
-        );
+        return new Pair<BitSet, BitSet>(dimension, metrics);
     }
 
     private Object[] buildKey(List<String> row, DataTypeSerializer[] serializers) {
@@ -302,7 +298,6 @@ public class CubeStreamBuilder extends StreamBuilder {
         return key;
     }
 
-
     private Object[] buildValue(List<String> row) {
 
         Object[] values = new Object[desc.getMeasures().size()];
@@ -340,11 +335,10 @@ public class CubeStreamBuilder extends StreamBuilder {
         return values;
     }
 
-
     private GTInfo newGTInfo(long cuboidID) {
         Pair<BitSet, BitSet> dimensionMetricsBitSet = getDimensionAndMetricColumBitSet(cuboidID);
         GTInfo.Builder builder = infoBuilder(cuboidID);
-        builder.enableColumnBlock(new BitSet[]{dimensionMetricsBitSet.getFirst(), dimensionMetricsBitSet.getSecond()});
+        builder.enableColumnBlock(new BitSet[] { dimensionMetricsBitSet.getFirst(), dimensionMetricsBitSet.getSecond() });
         builder.setPrimaryKey(dimensionMetricsBitSet.getFirst());
         GTInfo info = builder.build();
         return info;
@@ -374,7 +368,6 @@ public class CubeStreamBuilder extends StreamBuilder {
         return builder;
     }
 
-
     private void buildDictionary(List<List<String>> table, CubeDesc desc, Map<TblColRef, Dictionary> dictionaryMap) {
         SetMultimap<TblColRef, String> valueMap = HashMultimap.create();
 
@@ -399,9 +392,9 @@ public class CubeStreamBuilder extends StreamBuilder {
                     }));
 
                     logger.info("Building dictionary for " + col);
-//                    DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
-//                    dictInfo.setDictionaryObject(dict);
-//                    dictInfo.setDictionaryClass(dict.getClass().getName());
+                    //                    DictionaryInfo dictInfo = new DictionaryInfo(col.getTable(), col.getName(), 0, col.getDatatype(), null, "");
+                    //                    dictInfo.setDictionaryObject(dict);
+                    //                    dictInfo.setDictionaryClass(dict.getClass().getName());
                     dictionaryMap.put(col, dict);
                 }
             }
@@ -413,5 +406,4 @@ public class CubeStreamBuilder extends StreamBuilder {
         return getStreamParser().parse(stream, Lists.newArrayList(desc.listAllColumns()));
     }
 
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
deleted file mode 100644
index 5ade5f1..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/IIKeyValueCodecWithStateTest.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package org.apache.kylin.streaming.invertedindex;
-
-import java.util.*;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import javax.annotation.Nullable;
-
-import org.apache.kylin.common.util.FIFOIterable;
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.invertedindex.IIInstance;
-import org.apache.kylin.invertedindex.IIManager;
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodecWithState;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.invertedindex.model.KeyValueCodec;
-import org.apache.kylin.streaming.Stream;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.Lists;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- */
-public class IIKeyValueCodecWithStateTest extends LocalFileMetadataTestCase {
-
-    IIInstance ii;
-    IIDesc iiDesc;
-    List<IIRow> iiRowList = Lists.newArrayList();
-
-    final String[] inputs = new String[] { //
-    "FP-non GTC,0,15,145970,0,28,Toys,2008-10-08 07:18:40,USER_Y,Toys & Hobbies,Models & Kits,Automotive,0,Ebay,USER_S,15,Professional-Other,2012-08-16,2012-08-11,0,2012-08-16,145970,10000329,26.8551,0", //
-            "ABIN,0,-99,43479,0,21,Photo,2012-09-11 20:26:04,USER_Y,Cameras & Photo,Film Photography,Other,0,Ebay,USER_S,-99,Not Applicable,2012-08-16,2012-08-11,0,2012-08-16,43479,10000807,26.2474,0", //
-            "ABIN,0,16,80053,0,12,Computers,2012-06-19 21:15:09,USER_Y,Computers/Tablets & Networking,MonitorProjectors & Accs,Monitors,0,Ebay,USER_S,16,Consumer-Other,2012-08-16,2012-08-11,0,2012-08-16,80053,10000261,94.2273,0" };
-
-    @Before
-    public void setUp() throws Exception {
-        this.createTestMetadata();
-        this.ii = IIManager.getInstance(getTestConfig()).getII("test_kylin_ii_inner_join");
-        this.iiDesc = ii.getDescriptor();
-
-        Collection<?> streams = Collections2.transform(Arrays.asList(inputs), new Function<String, Stream>() {
-            @Nullable
-            @Override
-            public Stream apply(String input) {
-                return new Stream(0, input.getBytes());
-            }
-        });
-        LinkedBlockingQueue q = new LinkedBlockingQueue();
-        q.addAll(streams);
-        q.put(new Stream(-1, null));//a stop sign for builder
-
-        ToyIIStreamBuilder builder = new ToyIIStreamBuilder(q, iiDesc, 0, iiRowList);
-        ExecutorService executorService = Executors.newSingleThreadExecutor();
-        Future<?> future = executorService.submit(builder);
-        future.get();
-    }
-
-    @After
-    public void after() throws Exception {
-        cleanupTestMetadata();
-    }
-
-    /**
-     * simulate stream building into slices, and encode the slice into IIRows.
-     * Then reconstruct the IIRows to slice.
-     */
-    @Test
-    public void basicTest() {
-        Queue<IIRow> buffer = Lists.newLinkedList();
-        FIFOIterable bufferIterable = new FIFOIterable(buffer);
-        TableRecordInfo info = new TableRecordInfo(iiDesc);
-        TableRecordInfoDigest digest = info.getDigest();
-        KeyValueCodec codec = new IIKeyValueCodecWithState(digest);
-        Iterator<Slice> slices = codec.decodeKeyValue(bufferIterable).iterator();
-
-        Assert.assertTrue(!slices.hasNext());
-        Assert.assertEquals(iiRowList.size(), digest.getColumnCount());
-
-        for (int i = 0; i < digest.getColumnCount(); ++i) {
-            buffer.add(iiRowList.get(i));
-
-            if (i != digest.getColumnCount() - 1) {
-                Assert.assertTrue(!slices.hasNext());
-            } else {
-                Assert.assertTrue(slices.hasNext());
-            }
-        }
-
-        Slice newSlice = slices.next();
-        Assert.assertEquals(newSlice.getLocalDictionaries().get(0).getSize(), 2);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/0f8b7a46/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
----------------------------------------------------------------------
diff --git a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java b/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
deleted file mode 100644
index 161b6f6..0000000
--- a/streaming/src/test/java/org/apache/kylin/streaming/invertedindex/ToyIIStreamBuilder.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package org.apache.kylin.streaming.invertedindex;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.BlockingQueue;
-
-import org.apache.kylin.invertedindex.index.Slice;
-import org.apache.kylin.invertedindex.index.TableRecordInfo;
-import org.apache.kylin.invertedindex.model.IIDesc;
-import org.apache.kylin.invertedindex.model.IIKeyValueCodec;
-import org.apache.kylin.invertedindex.model.IIRow;
-import org.apache.kylin.streaming.Stream;
-
-/**
- * Created by Hongbin Ma(Binmahone) on 3/26/15.
- *
- * A IIStreamBuilder that can hold all the built slices in form of IIRow
- * This is only for test use
- */
-public class ToyIIStreamBuilder extends IIStreamBuilder {
-    private List<IIRow> result;
-
-    public ToyIIStreamBuilder(BlockingQueue<Stream> queue, IIDesc desc, int partitionId, List<IIRow> result) {
-        super(queue, null, desc, partitionId);
-        this.result = result;
-    }
-
-    protected void outputSlice(Slice slice, TableRecordInfo tableRecordInfo) throws IOException {
-        IIKeyValueCodec codec = new IIKeyValueCodec(tableRecordInfo.getDigest());
-        for (IIRow iiRow : codec.encodeKeyValue(slice)) {
-            result.add(iiRow);
-        }
-    }
-
-}


[03/50] incubator-kylin git commit: KYLIN-653 quick fix compile

Posted by li...@apache.org.
KYLIN-653 quick fix compile


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/c8f4c2a5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/c8f4c2a5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/c8f4c2a5

Branch: refs/heads/streaming-localdict
Commit: c8f4c2a513ac51621046541a02a92fea9d41c7af
Parents: 1b52438
Author: honma <ho...@ebay.com>
Authored: Thu Mar 26 16:09:33 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Thu Mar 26 16:09:33 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/c8f4c2a5/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
index 0a163e2..41b21a7 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
@@ -96,7 +96,7 @@ public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text
 
         byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
 
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
 
         cube = CubeManager.getInstance(config).getCube(cubeName);
         cubeDesc = cube.getDescriptor();


[37/50] incubator-kylin git commit: KYLIN-653 add ii2basecuboid mapper

Posted by li...@apache.org.
KYLIN-653 add ii2basecuboid mapper


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/929b986d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/929b986d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/929b986d

Branch: refs/heads/streaming-localdict
Commit: 929b986d6d7396204d443aa6e420dd745a217611
Parents: d1c115d
Author: honma <ho...@ebay.com>
Authored: Fri Mar 27 15:56:10 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Fri Mar 27 15:56:10 2015 +0800

----------------------------------------------------------------------
 .../invertedindex/index/RawTableRecord.java     |   2 +
 .../kylin/job/hadoop/cube/BaseCuboidJob.java    |   2 +-
 .../kylin/job/hadoop/cube/BaseCuboidMapper.java | 246 -------------------
 .../job/hadoop/cube/BaseCuboidMapperBase.java   | 205 ++++++++++++++++
 .../job/hadoop/cube/HiveToBaseCuboidMapper.java |  49 ++++
 .../job/hadoop/cube/IIToBaseCuboidMapper.java   | 109 ++++++++
 .../kylin/job/hadoop/cubev2/InMemCuboidJob.java |   5 -
 .../cube/BaseCuboidMapperPerformanceTest.java   |  65 -----
 .../job/hadoop/cube/BaseCuboidMapperTest.java   | 145 -----------
 .../HiveToBaseCuboidMapperPerformanceTest.java  |  65 +++++
 .../hadoop/cube/HiveToBaseCuboidMapperTest.java | 145 +++++++++++
 11 files changed, 576 insertions(+), 462 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
----------------------------------------------------------------------
diff --git a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
index 895fd4f..ccfc5b1 100644
--- a/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
+++ b/invertedindex/src/main/java/org/apache/kylin/invertedindex/index/RawTableRecord.java
@@ -18,6 +18,7 @@
 
 package org.apache.kylin.invertedindex.index;
 
+import com.google.common.base.Preconditions;
 import org.apache.kylin.common.util.BytesUtil;
 import org.apache.kylin.dict.Dictionary;
 import org.apache.kylin.metadata.measure.fixedlen.FixedLenMeasureCodec;
@@ -100,6 +101,7 @@ public class RawTableRecord implements Cloneable {
         bytes.set(buf, digest.offset(col), digest.length(col));
     }
 
+
     @Override
     public Object clone() {
         return new RawTableRecord(this);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
index 5f7802a..06046c5 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidJob.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.util.ToolRunner;
 
 public class BaseCuboidJob extends CuboidJob {
     public BaseCuboidJob() {
-        this.setMapperClass(BaseCuboidMapper.class);
+        this.setMapperClass(HiveToBaseCuboidMapper.class);
     }
 
     public static void main(String[] args) throws Exception {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
deleted file mode 100644
index a023c0c..0000000
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapper.java
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.mr.KylinMapper;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.CubeSegment;
-import org.apache.kylin.common.util.BytesSplitter;
-import org.apache.kylin.common.util.SplittedBytes;
-import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.cube.kv.AbstractRowKeyEncoder;
-import org.apache.kylin.cube.kv.RowConstants;
-import org.apache.kylin.metadata.measure.MeasureCodec;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.metadata.model.MeasureDesc;
-import org.apache.kylin.job.constant.BatchConstants;
-import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
-import org.apache.kylin.metadata.model.FunctionDesc;
-import org.apache.kylin.metadata.model.ParameterDesc;
-import org.apache.kylin.metadata.model.SegmentStatusEnum;
-
-/**
- * @author George Song (ysong1)
- */
-public class BaseCuboidMapper<KEYIN> extends KylinMapper<KEYIN, Text, Text, Text> {
-
-    private static final Logger logger = LoggerFactory.getLogger(BaseCuboidMapper.class);
-
-    public static final byte[] HIVE_NULL = Bytes.toBytes("\\N");
-    public static final byte[] ONE = Bytes.toBytes("1");
-
-    private String cubeName;
-    private String segmentName;
-    private Cuboid baseCuboid;
-    private CubeInstance cube;
-    private CubeDesc cubeDesc;
-    private CubeSegment cubeSegment;
-    private List<byte[]> nullBytes;
-
-    private CubeJoinedFlatTableDesc intermediateTableDesc;
-    private String intermediateTableRowDelimiter;
-    private byte byteRowDelimiter;
-
-    private int counter;
-    private int errorRecordCounter;
-    private Text outputKey = new Text();
-    private Text outputValue = new Text();
-    private Object[] measures;
-    private byte[][] keyBytesBuf;
-    private ByteBuffer valueBuf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
-
-    private BytesSplitter bytesSplitter;
-    private AbstractRowKeyEncoder rowKeyEncoder;
-    private MeasureCodec measureCodec;
-
-    @Override
-    protected void setup(Context context) throws IOException {
-        super.publishConfiguration(context.getConfiguration());
-
-        cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
-        segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
-        intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
-        if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
-            throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
-        }
-
-        byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
-
-        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
-
-        cube = CubeManager.getInstance(config).getCube(cubeName);
-        cubeDesc = cube.getDescriptor();
-        cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
-
-        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
-        baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
-
-        intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
-
-        bytesSplitter = new BytesSplitter(200, 4096);
-        rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);
-
-        measureCodec = new MeasureCodec(cubeDesc.getMeasures());
-        measures = new Object[cubeDesc.getMeasures().size()];
-
-        int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
-        keyBytesBuf = new byte[colCount][];
-
-        initNullBytes();
-    }
-
-    private void initNullBytes() {
-        nullBytes = Lists.newArrayList();
-        nullBytes.add(HIVE_NULL);
-        String[] nullStrings = cubeDesc.getNullStrings();
-        if (nullStrings != null) {
-            for (String s : nullStrings) {
-                nullBytes.add(Bytes.toBytes(s));
-            }
-        }
-    }
-
-    private boolean isNull(byte[] v) {
-        for (byte[] nullByte : nullBytes) {
-            if (Bytes.equals(v, nullByte))
-                return true;
-        }
-        return false;
-    }
-
-    private byte[] buildKey(SplittedBytes[] splitBuffers) {
-        int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
-        for (int i = 0; i < baseCuboid.getColumns().size(); i++) {
-            int index = rowKeyColumnIndexes[i];
-            keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length);
-            if (isNull(keyBytesBuf[i])) {
-                keyBytesBuf[i] = null;
-            }
-        }
-        return rowKeyEncoder.encode(keyBytesBuf);
-    }
-
-    private void buildValue(SplittedBytes[] splitBuffers) {
-
-        for (int i = 0; i < measures.length; i++) {
-            byte[] valueBytes = getValueBytes(splitBuffers, i);
-            measures[i] = measureCodec.getSerializer(i).valueOf(valueBytes);
-        }
-
-        valueBuf.clear();
-        measureCodec.encode(measures, valueBuf);
-    }
-
-    private byte[] getValueBytes(SplittedBytes[] splitBuffers, int measureIdx) {
-        MeasureDesc desc = cubeDesc.getMeasures().get(measureIdx);
-        FunctionDesc func = desc.getFunction();
-        ParameterDesc paramDesc = func.getParameter();
-        int[] flatTableIdx = intermediateTableDesc.getMeasureColumnIndexes()[measureIdx];
-
-        byte[] result = null;
-
-        // constant
-        if (flatTableIdx == null) {
-            result = Bytes.toBytes(paramDesc.getValue());
-        }
-        // column values
-        else {
-            // for multiple columns, their values are joined
-            for (int i = 0; i < flatTableIdx.length; i++) {
-                SplittedBytes split = splitBuffers[flatTableIdx[i]];
-                if (result == null) {
-                    result = Arrays.copyOf(split.value, split.length);
-                } else {
-                    byte[] newResult = new byte[result.length + split.length];
-                    System.arraycopy(result, 0, newResult, 0, result.length);
-                    System.arraycopy(split.value, 0, newResult, result.length, split.length);
-                    result = newResult;
-                }
-            }
-        }
-
-        if (func.isCount() || func.isHolisticCountDistinct()) {
-            // note for holistic count distinct, this value will be ignored
-            result = ONE;
-        }
-
-        if (isNull(result)) {
-            result = null;
-        }
-
-        return result;
-    }
-
-    @Override
-    public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
-        counter++;
-        if (counter % BatchConstants.COUNTER_MAX == 0) {
-            logger.info("Handled " + counter + " records!");
-        }
-
-        try {
-            bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
-            outputKV(context);
-
-        } catch (Exception ex) {
-            handleErrorRecord(bytesSplitter, ex);
-        }
-    }
-
-    private void outputKV(Context context) throws IOException, InterruptedException {
-        intermediateTableDesc.sanityCheck(bytesSplitter);
-
-        byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
-        outputKey.set(rowKey, 0, rowKey.length);
-
-        buildValue(bytesSplitter.getSplitBuffers());
-        outputValue.set(valueBuf.array(), 0, valueBuf.position());
-        context.write(outputKey, outputValue);
-    }
-
-    private void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
-
-        System.err.println("Insane record: " + bytesSplitter);
-        ex.printStackTrace(System.err);
-
-        errorRecordCounter++;
-        if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
-            if (ex instanceof IOException)
-                throw (IOException) ex;
-            else if (ex instanceof RuntimeException)
-                throw (RuntimeException) ex;
-            else
-                throw new RuntimeException("", ex);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
new file mode 100644
index 0000000..e2972dc
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperBase.java
@@ -0,0 +1,205 @@
+package org.apache.kylin.job.hadoop.cube;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.mr.KylinMapper;
+import org.apache.kylin.common.util.BytesSplitter;
+import org.apache.kylin.common.util.SplittedBytes;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.kv.AbstractRowKeyEncoder;
+import org.apache.kylin.cube.kv.RowConstants;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.ParameterDesc;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Created by Hongbin Ma(Binmahone) on 3/27/15.
+ */
+public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, Text, Text> {
+    protected static final Logger logger = LoggerFactory.getLogger(HiveToBaseCuboidMapper.class);
+    public static final byte[] HIVE_NULL = Bytes.toBytes("\\N");
+    public static final byte[] ONE = Bytes.toBytes("1");
+    protected String cubeName;
+    protected String segmentName;
+    protected Cuboid baseCuboid;
+    protected CubeInstance cube;
+    protected CubeDesc cubeDesc;
+    protected CubeSegment cubeSegment;
+    protected List<byte[]> nullBytes;
+    protected CubeJoinedFlatTableDesc intermediateTableDesc;
+    protected String intermediateTableRowDelimiter;
+    protected byte byteRowDelimiter;
+    protected int counter;
+    protected Object[] measures;
+    protected byte[][] keyBytesBuf;
+    protected BytesSplitter bytesSplitter;
+    protected AbstractRowKeyEncoder rowKeyEncoder;
+    protected MeasureCodec measureCodec;
+    private int errorRecordCounter;
+    private Text outputKey = new Text();
+    private Text outputValue = new Text();
+    private ByteBuffer valueBuf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE);
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        super.publishConfiguration(context.getConfiguration());
+
+        cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
+        segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
+        intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
+        if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
+            throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
+        }
+
+        byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];
+
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+        cube = CubeManager.getInstance(config).getCube(cubeName);
+        cubeDesc = cube.getDescriptor();
+        cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
+
+        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
+        baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
+
+        intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);
+
+        bytesSplitter = new BytesSplitter(200, 4096);
+        rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);
+
+        measureCodec = new MeasureCodec(cubeDesc.getMeasures());
+        measures = new Object[cubeDesc.getMeasures().size()];
+
+        int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
+        keyBytesBuf = new byte[colCount][];
+
+        initNullBytes();
+    }
+
+    private void initNullBytes() {
+        nullBytes = Lists.newArrayList();
+        nullBytes.add(HIVE_NULL);
+        String[] nullStrings = cubeDesc.getNullStrings();
+        if (nullStrings != null) {
+            for (String s : nullStrings) {
+                nullBytes.add(Bytes.toBytes(s));
+            }
+        }
+    }
+
+    private boolean isNull(byte[] v) {
+        for (byte[] nullByte : nullBytes) {
+            if (Bytes.equals(v, nullByte))
+                return true;
+        }
+        return false;
+    }
+
+    private byte[] buildKey(SplittedBytes[] splitBuffers) {
+        int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
+        for (int i = 0; i < baseCuboid.getColumns().size(); i++) {
+            int index = rowKeyColumnIndexes[i];
+            keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length);
+            if (isNull(keyBytesBuf[i])) {
+                keyBytesBuf[i] = null;
+            }
+        }
+        return rowKeyEncoder.encode(keyBytesBuf);
+    }
+
+    private void buildValue(SplittedBytes[] splitBuffers) {
+
+        for (int i = 0; i < measures.length; i++) {
+            byte[] valueBytes = getValueBytes(splitBuffers, i);
+            measures[i] = measureCodec.getSerializer(i).valueOf(valueBytes);
+        }
+
+        valueBuf.clear();
+        measureCodec.encode(measures, valueBuf);
+    }
+
+    private byte[] getValueBytes(SplittedBytes[] splitBuffers, int measureIdx) {
+        MeasureDesc desc = cubeDesc.getMeasures().get(measureIdx);
+        FunctionDesc func = desc.getFunction();
+        ParameterDesc paramDesc = func.getParameter();
+        int[] flatTableIdx = intermediateTableDesc.getMeasureColumnIndexes()[measureIdx];
+
+        byte[] result = null;
+
+        // constant
+        if (flatTableIdx == null) {
+            result = Bytes.toBytes(paramDesc.getValue());
+        }
+        // column values
+        else {
+            // for multiple columns, their values are joined
+            for (int i = 0; i < flatTableIdx.length; i++) {
+                SplittedBytes split = splitBuffers[flatTableIdx[i]];
+                if (result == null) {
+                    result = Arrays.copyOf(split.value, split.length);
+                } else {
+                    byte[] newResult = new byte[result.length + split.length];
+                    System.arraycopy(result, 0, newResult, 0, result.length);
+                    System.arraycopy(split.value, 0, newResult, result.length, split.length);
+                    result = newResult;
+                }
+            }
+        }
+
+        if (func.isCount() || func.isHolisticCountDistinct()) {
+            // note for holistic count distinct, this value will be ignored
+            result = ONE;
+        }
+
+        if (isNull(result)) {
+            result = null;
+        }
+
+        return result;
+    }
+
+    protected void outputKV(Context context) throws IOException, InterruptedException {
+        intermediateTableDesc.sanityCheck(bytesSplitter);
+
+        byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
+        outputKey.set(rowKey, 0, rowKey.length);
+
+        buildValue(bytesSplitter.getSplitBuffers());
+        outputValue.set(valueBuf.array(), 0, valueBuf.position());
+        context.write(outputKey, outputValue);
+    }
+
+    protected void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException {
+
+        System.err.println("Insane record: " + bytesSplitter);
+        ex.printStackTrace(System.err);
+
+        errorRecordCounter++;
+        if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
+            if (ex instanceof IOException)
+                throw (IOException) ex;
+            else if (ex instanceof RuntimeException)
+                throw (RuntimeException) ex;
+            else
+                throw new RuntimeException("", ex);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
new file mode 100644
index 0000000..599dde8
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapper.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.job.constant.BatchConstants;
+
+/**
+ * @author George Song (ysong1)
+ */
+public class HiveToBaseCuboidMapper<KEYIN> extends BaseCuboidMapperBase<KEYIN, Text> {
+
+    @Override
+    public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
+        counter++;
+        if (counter % BatchConstants.COUNTER_MAX == 0) {
+            logger.info("Handled " + counter + " records!");
+        }
+
+        try {
+            //put a record into the shared bytesSplitter
+            bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
+            //take care of the data in bytesSplitter
+            outputKV(context);
+
+        } catch (Exception ex) {
+            handleErrorRecord(bytesSplitter, ex);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java b/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
new file mode 100644
index 0000000..68886c0
--- /dev/null
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cube/IIToBaseCuboidMapper.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Queue;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.FIFOIterable;
+import org.apache.kylin.common.util.SplittedBytes;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.invertedindex.IIInstance;
+import org.apache.kylin.invertedindex.IIManager;
+import org.apache.kylin.invertedindex.index.RawTableRecord;
+import org.apache.kylin.invertedindex.index.Slice;
+import org.apache.kylin.invertedindex.index.TableRecordInfo;
+import org.apache.kylin.invertedindex.index.TableRecordInfoDigest;
+import org.apache.kylin.invertedindex.model.*;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.apache.kylin.job.hadoop.AbstractHadoopJob;
+
+/**
+ * honma
+ */
+public class IIToBaseCuboidMapper extends BaseCuboidMapperBase<ImmutableBytesWritable, Result> {
+    private Queue<IIRow> buffer = Lists.newLinkedList();
+    private Iterator<Slice> slices;
+
+    @Override
+    protected void setup(Context context) throws IOException {
+        super.setup(context);
+
+        Configuration conf = context.getConfiguration();
+        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
+
+        String iiName = conf.get(BatchConstants.CFG_II_NAME);
+        IIInstance ii = IIManager.getInstance(config).getII(iiName);
+        IIDesc iiDesc = ii.getDescriptor();
+
+        TableRecordInfo info = new TableRecordInfo(iiDesc);
+        KeyValueCodec codec = new IIKeyValueCodecWithState(info.getDigest());
+        slices = codec.decodeKeyValue(new FIFOIterable<IIRow>(buffer)).iterator();
+    }
+
+    @Override
+    public void map(ImmutableBytesWritable key, Result cells, Context context) throws IOException, InterruptedException {
+        try {
+            IIRow iiRow = new IIRow();
+            for (Cell c : cells.rawCells()) {
+                iiRow.updateWith(c);
+            }
+            buffer.add(iiRow);
+
+            if (slices.hasNext()) {
+                Slice slice = slices.next();
+                TableRecordInfoDigest localDigest = slice.getInfo();
+                for (RawTableRecord record : slice) {
+
+                    counter++;
+                    if (counter % BatchConstants.COUNTER_MAX == 0) {
+                        logger.info("Handled " + counter + " records!");
+                    }
+
+                    for (int indexInRecord = 0; indexInRecord < localDigest.getColumnCount(); ++indexInRecord) {
+                        SplittedBytes columnBuffer = bytesSplitter.getSplitBuffer(indexInRecord);
+                        if (!localDigest.isMetrics(indexInRecord)) {
+                            String v = record.getValueMetric(indexInRecord);
+                            byte[] metricBytes = v.getBytes();
+                            System.arraycopy(metricBytes, 0, columnBuffer.value, 0, metricBytes.length);
+                            columnBuffer.length = metricBytes.length;
+                        } else {
+                            Dictionary<?> dictionary = slice.getLocalDictionaries().get(indexInRecord);
+                            Preconditions.checkArgument(columnBuffer.value.length > dictionary.getSizeOfValue(), "Column length too big");
+                            int vid = record.getValueID(indexInRecord);
+                            columnBuffer.length = dictionary.getValueBytesFromId(vid, columnBuffer.value, 0);
+                        }
+                    }
+
+                    outputKV(context);
+                }
+            }
+        } catch (Exception ex) {
+            handleErrorRecord(bytesSplitter, ex);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
index f83e9d7..7a7c62e 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCuboidJob.java
@@ -23,10 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.hadoop.util.ToolRunner;
@@ -40,8 +37,6 @@ import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.job.constant.BatchConstants;
 import org.apache.kylin.job.exception.JobException;
 import org.apache.kylin.job.hadoop.AbstractHadoopJob;
-import org.apache.kylin.job.hadoop.cube.BaseCuboidMapper;
-import org.apache.kylin.job.hadoop.cube.CuboidJob;
 import org.apache.kylin.job.hadoop.cube.CuboidReducer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
deleted file mode 100644
index 7826e86..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperPerformanceTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.Reader;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-/**
- * @author yangli9
- * 
- */
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class BaseCuboidMapperPerformanceTest {
-
-    String metadataUrl = "hbase:yadesk00:2181:/hbase-unsecure";
-    String cubeName = "test_kylin_cube_with_slr";
-    Path srcPath = new Path("/download/test_kylin_cube_with_slr_intermediate_table_64mb.seq");
-
-    @Ignore("convenient trial tool for dev")
-    @Test
-    public void test() throws IOException, InterruptedException {
-        Configuration hconf = new Configuration();
-        BaseCuboidMapper mapper = new BaseCuboidMapper();
-        Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
-
-        mapper.setup(context);
-
-        Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
-        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
-        Text value = new Text();
-
-        while (reader.next(key, value)) {
-            mapper.map(key, value, context);
-        }
-
-        reader.close();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
deleted file mode 100644
index c3632b7..0000000
--- a/job/src/test/java/org/apache/kylin/job/hadoop/cube/BaseCuboidMapperTest.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.hadoop.cube;
-
-import static org.junit.Assert.*;
-
-import java.io.File;
-import java.math.BigDecimal;
-import java.util.List;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.apache.kylin.job.constant.BatchConstants;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.kv.RowKeyDecoder;
-import org.apache.kylin.metadata.measure.MeasureCodec;
-import org.apache.kylin.metadata.model.MeasureDesc;
-
-/**
- * @author George Song (ysong1)
- * 
- */
-public class BaseCuboidMapperTest extends LocalFileMetadataTestCase {
-
-    MapDriver<Text, Text, Text, Text> mapDriver;
-    String localTempDir = System.getProperty("java.io.tmpdir") + File.separator;
-
-    @Before
-    public void setUp() throws Exception {
-        createTestMetadata();
-
-        // hack for distributed cache
-        FileUtils.deleteDirectory(new File("../job/meta"));
-        FileUtils.copyDirectory(new File(getTestConfig().getMetadataUrl()), new File("../job/meta"));
-
-        BaseCuboidMapper<Text> mapper = new BaseCuboidMapper<Text>();
-        mapDriver = MapDriver.newMapDriver(mapper);
-    }
-
-    @After
-    public void after() throws Exception {
-        cleanupTestMetadata();
-        FileUtils.deleteDirectory(new File("../job/meta"));
-    }
-
-    @Test
-    public void testMapperWithHeader() throws Exception {
-        String cubeName = "test_kylin_cube_with_slr_1_new_segment";
-        String segmentName = "20130331080000_20131212080000";
-        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
-        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
-        // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
-        // metadata);
-        mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
-        List<Pair<Text, Text>> result = mapDriver.run();
-
-        CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
-        CubeInstance cube = cubeMgr.getCube(cubeName);
-
-        assertEquals(1, result.size());
-        Text rowkey = result.get(0).getFirst();
-        byte[] key = rowkey.getBytes();
-        byte[] header = Bytes.head(key, 26);
-        byte[] sellerId = Bytes.tail(header, 18);
-        byte[] cuboidId = Bytes.head(header, 8);
-        byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
-
-        RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
-        decoder.decode(key);
-        assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());
-
-        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
-        assertEquals(511, Bytes.toLong(cuboidId));
-        assertEquals(22, restKey.length);
-
-        verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
-    }
-
-    private void verifyMeasures(List<MeasureDesc> measures, Text valueBytes, String m1, String m2, String m3) {
-        MeasureCodec codec = new MeasureCodec(measures);
-        Object[] values = new Object[measures.size()];
-        codec.decode(valueBytes, values);
-        assertTrue(new BigDecimal(m1).equals(values[0]));
-        assertTrue(new BigDecimal(m2).equals(values[1]));
-        assertTrue(new BigDecimal(m3).equals(values[2]));
-    }
-
-    @Test
-    public void testMapperWithNull() throws Exception {
-        String cubeName = "test_kylin_cube_with_slr_1_new_segment";
-        String segmentName = "20130331080000_20131212080000";
-        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
-        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
-        // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
-        // metadata);
-        mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
-        List<Pair<Text, Text>> result = mapDriver.run();
-
-        CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
-        CubeInstance cube = cubeMgr.getCube(cubeName);
-
-        assertEquals(1, result.size());
-        Text rowkey = result.get(0).getFirst();
-        byte[] key = rowkey.getBytes();
-        byte[] header = Bytes.head(key, 26);
-        byte[] sellerId = Bytes.tail(header, 18);
-        byte[] cuboidId = Bytes.head(header, 8);
-        byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
-
-        RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
-        decoder.decode(key);
-        assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());
-
-        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
-        assertEquals(511, Bytes.toLong(cuboidId));
-        assertEquals(22, restKey.length);
-
-        verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
new file mode 100644
index 0000000..cf9cfe0
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperPerformanceTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * @author yangli9
+ * 
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class HiveToBaseCuboidMapperPerformanceTest {
+
+    String metadataUrl = "hbase:yadesk00:2181:/hbase-unsecure";
+    String cubeName = "test_kylin_cube_with_slr";
+    Path srcPath = new Path("/download/test_kylin_cube_with_slr_intermediate_table_64mb.seq");
+
+    @Ignore("convenient trial tool for dev")
+    @Test
+    public void test() throws IOException, InterruptedException {
+        Configuration hconf = new Configuration();
+        HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
+        Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
+
+        mapper.setup(context);
+
+        Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
+        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
+        Text value = new Text();
+
+        while (reader.next(key, value)) {
+            mapper.map(key, value, context);
+        }
+
+        reader.close();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/929b986d/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
----------------------------------------------------------------------
diff --git a/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
new file mode 100644
index 0000000..f906fcb
--- /dev/null
+++ b/job/src/test/java/org/apache/kylin/job/hadoop/cube/HiveToBaseCuboidMapperTest.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.job.hadoop.cube;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.kylin.job.constant.BatchConstants;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.kv.RowKeyDecoder;
+import org.apache.kylin.metadata.measure.MeasureCodec;
+import org.apache.kylin.metadata.model.MeasureDesc;
+
+/**
+ * @author George Song (ysong1)
+ * 
+ */
+public class HiveToBaseCuboidMapperTest extends LocalFileMetadataTestCase {
+
+    MapDriver<Text, Text, Text, Text> mapDriver;
+    String localTempDir = System.getProperty("java.io.tmpdir") + File.separator;
+
+    @Before
+    public void setUp() throws Exception {
+        createTestMetadata();
+
+        // hack for distributed cache
+        FileUtils.deleteDirectory(new File("../job/meta"));
+        FileUtils.copyDirectory(new File(getTestConfig().getMetadataUrl()), new File("../job/meta"));
+
+        HiveToBaseCuboidMapper<Text> mapper = new HiveToBaseCuboidMapper<Text>();
+        mapDriver = MapDriver.newMapDriver(mapper);
+    }
+
+    @After
+    public void after() throws Exception {
+        cleanupTestMetadata();
+        FileUtils.deleteDirectory(new File("../job/meta"));
+    }
+
+    @Test
+    public void testMapperWithHeader() throws Exception {
+        String cubeName = "test_kylin_cube_with_slr_1_new_segment";
+        String segmentName = "20130331080000_20131212080000";
+        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
+        // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
+        // metadata);
+        mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33"));
+        List<Pair<Text, Text>> result = mapDriver.run();
+
+        CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
+        CubeInstance cube = cubeMgr.getCube(cubeName);
+
+        assertEquals(1, result.size());
+        Text rowkey = result.get(0).getFirst();
+        byte[] key = rowkey.getBytes();
+        byte[] header = Bytes.head(key, 26);
+        byte[] sellerId = Bytes.tail(header, 18);
+        byte[] cuboidId = Bytes.head(header, 8);
+        byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
+
+        RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
+        decoder.decode(key);
+        assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString());
+
+        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
+        assertEquals(511, Bytes.toLong(cuboidId));
+        assertEquals(22, restKey.length);
+
+        verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33");
+    }
+
+    private void verifyMeasures(List<MeasureDesc> measures, Text valueBytes, String m1, String m2, String m3) {
+        MeasureCodec codec = new MeasureCodec(measures);
+        Object[] values = new Object[measures.size()];
+        codec.decode(valueBytes, values);
+        assertTrue(new BigDecimal(m1).equals(values[0]));
+        assertTrue(new BigDecimal(m2).equals(values[1]));
+        assertTrue(new BigDecimal(m3).equals(values[2]));
+    }
+
+    @Test
+    public void testMapperWithNull() throws Exception {
+        String cubeName = "test_kylin_cube_with_slr_1_new_segment";
+        String segmentName = "20130331080000_20131212080000";
+        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
+        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
+        // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
+        // metadata);
+        mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
+        List<Pair<Text, Text>> result = mapDriver.run();
+
+        CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
+        CubeInstance cube = cubeMgr.getCube(cubeName);
+
+        assertEquals(1, result.size());
+        Text rowkey = result.get(0).getFirst();
+        byte[] key = rowkey.getBytes();
+        byte[] header = Bytes.head(key, 26);
+        byte[] sellerId = Bytes.tail(header, 18);
+        byte[] cuboidId = Bytes.head(header, 8);
+        byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);
+
+        RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
+        decoder.decode(key);
+        assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());
+
+        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
+        assertEquals(511, Bytes.toLong(cuboidId));
+        assertEquals(22, restKey.length);
+
+        verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
+    }
+}


[25/50] incubator-kylin git commit: accept bigint as II record timestamp

Posted by li...@apache.org.
accept bigint as II record timestamp


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/71bbd0c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/71bbd0c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/71bbd0c6

Branch: refs/heads/streaming-localdict
Commit: 71bbd0c6ceabdb53fdf485da35b60508578f9bd2
Parents: d136933
Author: Li, Yang <ya...@ebay.com>
Authored: Fri Mar 27 13:55:39 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Fri Mar 27 13:55:39 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/dict/DateStrDictionary.java   | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/71bbd0c6/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index 7cace15..95f67ff 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -89,7 +89,9 @@ public class DateStrDictionary extends Dictionary<String> {
     }
 
     public static long stringToMillis(String str) {
-        if (str.length() == 10) {
+        if (isAllDigits(str)) {
+            return Long.parseLong(str);
+        } else if (str.length() == 10) {
             return stringToDate(str, DEFAULT_DATE_PATTERN).getTime();
         } else if (str.length() == 19) {
             return stringToDate(str, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS).getTime();
@@ -99,6 +101,14 @@ public class DateStrDictionary extends Dictionary<String> {
             throw new IllegalArgumentException("there is no valid date pattern for:" + str);
         }
     }
+    
+    private static boolean isAllDigits(String str) {
+        for (int i = 0, n = str.length(); i < n; i++) {
+            if (Character.isDigit(str.charAt(i)) == false)
+                return false;
+        }
+        return true;
+    }
 
     // ============================================================================