You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2016/11/19 14:21:15 UTC

[05/12] kylin git commit: KYLIN-2199 refined KYLIN-2191 due to KYLIN-2198

KYLIN-2199 refined KYLIN-2191 due to KYLIN-2198


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ac4e9ec5
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ac4e9ec5
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ac4e9ec5

Branch: refs/heads/yang21-cdh5.7
Commit: ac4e9ec563b6e14e387c5803d441c8dbcbc06f87
Parents: e1acc41
Author: Hongbin Ma <ma...@apache.org>
Authored: Wed Nov 16 14:48:08 2016 +0800
Committer: Hongbin Ma <ma...@apache.org>
Committed: Thu Nov 17 11:14:24 2016 +0800

----------------------------------------------------------------------
 .../gridtable/DimEncodingPreserveOrderTest.java |   4 +-
 .../apache/kylin/dimension/IntegerDimEnc.java   |   9 +-
 .../apache/kylin/dimension/IntegerDimEncV2.java | 228 +++++++++++++++++++
 .../apache/kylin/dimension/IntDimEncTest.java   |   2 +-
 .../kylin/dimension/IntegerDimEncTest.java      |  18 +-
 5 files changed, 248 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java
index 87d37be..d572e56 100644
--- a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java
@@ -27,7 +27,7 @@ import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.dimension.DimensionEncoding;
 import org.apache.kylin.dimension.FixedLenHexDimEnc;
-import org.apache.kylin.dimension.IntegerDimEnc;
+import org.apache.kylin.dimension.IntegerDimEncV2;
 import org.apache.kylin.dimension.OneMoreByteVLongDimEnc;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -99,7 +99,7 @@ public class DimEncodingPreserveOrderTest {
     @Test
     public void testVLongDimEncPreserveOrder() {
         for (int i = 1; i <= successValue.size(); i++) {
-            IntegerDimEnc enc = new IntegerDimEnc(i);
+            IntegerDimEncV2 enc = new IntegerDimEncV2(i);
             List<ByteArray> encodedValues = Lists.newArrayList();
             for (long value : successValue.get(i - 1)) {
                 encodedValues.add(encode(enc, value));

http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java
index e55a0a8..983af9a 100644
--- a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java
@@ -31,8 +31,13 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * replacement for IntegerDimEnc, the diff is VLongDimEnc supports negative values
+ * replacement for IntDimEnc, the diff is IntegerDimEnc supports negative values
+ * for IntegerDimEnc(N), the supported range is (-2^(8*N-1),2^(8*N-1))
+ *
+ * -2^(8*N-1) is not supported because the slot is reserved for null values.
+ * -2^(8*N-1) will be encoded with warn, and its output will be null
  */
+@Deprecated//due to a fatal bug (KYLIN-2191)
 public class IntegerDimEnc extends DimensionEncoding {
     private static final long serialVersionUID = 1L;
 
@@ -127,7 +132,7 @@ public class IntegerDimEnc extends DimensionEncoding {
 
         //only take useful bytes
         integer = integer & MASK[fixedLen];
-        boolean positive = (integer & ((0x80L) << ((fixedLen - 1) << 3))) == 0;
+        boolean positive = (integer & ((0x80) << ((fixedLen - 1) << 3))) == 0;
         if (!positive) {
             integer |= (~MASK[fixedLen]);
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java
new file mode 100644
index 0000000..1a54664
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *  
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *  
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.dimension;
+
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.metadata.datatype.DataTypeSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * replacement for IntDimEnc, the diff is IntegerDimEnc supports negative values
+ * for IntegerDimEnc(N), the supported range is (-2^(8*N-1),2^(8*N-1))
+ * 
+ * -2^(8*N-1) is not supported because the slot is reserved for null values.
+ * -2^(8*N-1) will be encoded with warn, and its output will be null
+ */
+public class IntegerDimEncV2 extends DimensionEncoding {
+    private static final long serialVersionUID = 1L;
+
+    private static Logger logger = LoggerFactory.getLogger(IntegerDimEncV2.class);
+
+    private static final long[] CAP = { 0, 0x7fL, 0x7fffL, 0x7fffffL, 0x7fffffffL, 0x7fffffffffL, 0x7fffffffffffL, 0x7fffffffffffffL, 0x7fffffffffffffffL };
+    private static final long[] MASK = { 0, 0xffL, 0xffffL, 0xffffffL, 0xffffffffL, 0xffffffffffL, 0xffffffffffffL, 0xffffffffffffffL, 0xffffffffffffffffL };
+    private static final long[] TAIL = { 0, 0x80L, 0x8000L, 0x800000L, 0x80000000L, 0x8000000000L, 0x800000000000L, 0x80000000000000L, 0x8000000000000000L };
+    static {
+        for (int i = 1; i < TAIL.length; ++i) {
+            long head = ~MASK[i];
+            TAIL[i] = head | TAIL[i];
+        }
+    }
+
+    public static final String ENCODING_NAME = "integer";
+
+    public static class Factory extends DimensionEncodingFactory {
+        @Override
+        public String getSupportedEncodingName() {
+            return ENCODING_NAME;
+        }
+
+        @Override
+        protected int getCurrentVersion() {
+            return 2;
+        }
+
+        @Override
+        public DimensionEncoding createDimensionEncoding(String encodingName, String[] args) {
+            return new IntegerDimEncV2(Integer.parseInt(args[0]));
+        }
+    };
+
+    // ============================================================================
+
+    private int fixedLen;
+
+    transient private int avoidVerbose = 0;
+    transient private int avoidVerbose2 = 0;
+
+    //no-arg constructor is required for Externalizable
+    public IntegerDimEncV2() {
+    }
+
+    public IntegerDimEncV2(int len) {
+        if (len <= 0 || len >= CAP.length)
+            throw new IllegalArgumentException();
+
+        this.fixedLen = len;
+    }
+
+    @Override
+    public int getLengthOfEncoding() {
+        return fixedLen;
+    }
+
+    @Override
+    public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) {
+        if (value == null) {
+            Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL);
+            return;
+        }
+
+        encode(Bytes.toString(value, 0, valueLen), output, outputOffset);
+    }
+
+    void encode(String valueStr, byte[] output, int outputOffset) {
+        if (valueStr == null) {
+            Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL);
+            return;
+        }
+
+        long integer = Long.parseLong(valueStr);
+        if (integer > CAP[fixedLen] || integer < TAIL[fixedLen]) {
+            if (avoidVerbose++ % 10000 == 0) {
+                logger.warn("Expect at most " + fixedLen + " bytes, but got " + valueStr + ", will truncate, hit times:" + avoidVerbose);
+            }
+        }
+
+        if (integer == TAIL[fixedLen]) {
+            if (avoidVerbose2++ % 10000 == 0) {
+                logger.warn("Value " + valueStr + " does not fit into " + fixedLen + " bytes ");
+            }
+        }
+
+        BytesUtil.writeLong(integer + CAP[fixedLen], output, outputOffset, fixedLen);//apply an offset to preserve binary order, overflow is okay
+    }
+
+    @Override
+    public String decode(byte[] bytes, int offset, int len) {
+        if (isNull(bytes, offset, len)) {
+            return null;
+        }
+
+        long integer = BytesUtil.readLong(bytes, offset, len) - CAP[fixedLen];
+
+        //only take useful bytes
+        integer = integer & MASK[fixedLen];
+        boolean positive = (integer & ((0x80L) << ((fixedLen - 1) << 3))) == 0;
+        if (!positive) {
+            integer |= (~MASK[fixedLen]);
+        }
+
+        return String.valueOf(integer);
+    }
+
+    @Override
+    public DataTypeSerializer<Object> asDataTypeSerializer() {
+        return new IntegerSerializer();
+    }
+
+    public class IntegerSerializer extends DataTypeSerializer<Object> {
+        // be thread-safe and avoid repeated obj creation
+        private ThreadLocal<byte[]> current = new ThreadLocal<byte[]>();
+
+        private byte[] currentBuf() {
+            byte[] buf = current.get();
+            if (buf == null) {
+                buf = new byte[fixedLen];
+                current.set(buf);
+            }
+            return buf;
+        }
+
+        @Override
+        public void serialize(Object value, ByteBuffer out) {
+            byte[] buf = currentBuf();
+            String valueStr = value == null ? null : value.toString();
+            encode(valueStr, buf, 0);
+            out.put(buf);
+        }
+
+        @Override
+        public Object deserialize(ByteBuffer in) {
+            byte[] buf = currentBuf();
+            in.get(buf);
+            return decode(buf, 0, buf.length);
+        }
+
+        @Override
+        public int peekLength(ByteBuffer in) {
+            return fixedLen;
+        }
+
+        @Override
+        public int maxLength() {
+            return fixedLen;
+        }
+
+        @Override
+        public int getStorageBytesEstimate() {
+            return fixedLen;
+        }
+
+        @Override
+        public Object valueOf(String str) {
+            return str;
+        }
+    }
+
+    @Override
+    public void writeExternal(ObjectOutput out) throws IOException {
+        out.writeShort(fixedLen);
+    }
+
+    @Override
+    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+        fixedLen = in.readShort();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o)
+            return true;
+        if (o == null || getClass() != o.getClass())
+            return false;
+
+        IntegerDimEncV2 that = (IntegerDimEncV2) o;
+
+        return fixedLen == that.fixedLen;
+
+    }
+
+    @Override
+    public int hashCode() {
+        return fixedLen;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java
index 280a242..d228dd5 100644
--- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java
@@ -26,7 +26,7 @@ import org.junit.Assert;
 import org.junit.Test;
 
 /**
- * Deprecated. use VLongDimEnc instead
+ * Deprecated. use integer encoding instead
  * @deprecated
  */
 public class IntDimEncTest {

http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java
index a9b2511..9924053 100644
--- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java
+++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java
@@ -76,24 +76,24 @@ public class IntegerDimEncTest {
     @Test
     public void testConstructor() {
         try {
-            new IntegerDimEnc(0);
+            new IntegerDimEncV2(0);
             Assert.fail();
         } catch (IllegalArgumentException e) {
             // expect
         }
         try {
-            new IntegerDimEnc(9);
+            new IntegerDimEncV2(9);
             Assert.fail();
         } catch (IllegalArgumentException e) {
             // expect
         }
-        new IntegerDimEnc(8);
+        new IntegerDimEncV2(8);
     }
 
     @Test
     public void testNull() {
         for (int i = 1; i < 9; i++) {
-            IntegerDimEnc enc = new IntegerDimEnc(i);
+            IntegerDimEncV2 enc = new IntegerDimEncV2(i);
 
             byte[] buf = new byte[enc.getLengthOfEncoding()];
             enc.encode(null, 0, buf, 0);
@@ -113,7 +113,7 @@ public class IntegerDimEncTest {
     @Test
     public void testEncodeDecode() {
         for (int i = 1; i <= successValue.size(); i++) {
-            IntegerDimEnc enc = new IntegerDimEnc(i);
+            IntegerDimEncV2 enc = new IntegerDimEncV2(i);
             for (long value : successValue.get(i - 1)) {
                 testEncodeDecode(enc, value);
             }
@@ -129,7 +129,7 @@ public class IntegerDimEncTest {
         }
     }
 
-    private void testEncodeDecode(IntegerDimEnc enc, long value) {
+    private void testEncodeDecode(IntegerDimEncV2 enc, long value) {
         String valueStr = "" + value;
         byte[] buf = new byte[enc.getLengthOfEncoding()];
         byte[] bytes = Bytes.toBytes(valueStr);
@@ -141,7 +141,9 @@ public class IntegerDimEncTest {
     @Test
     public void testSerDes() {
         for (int i = 1; i <= successValue.size(); i++) {
-            IntegerDimEnc enc = new IntegerDimEnc(i);
+            IntegerDimEncV2 enc = new IntegerDimEncV2(i);
+
+            testSerDes(enc, 127);
             for (long value : successValue.get(i - 1)) {
                 testSerDes(enc, value);
             }
@@ -156,7 +158,7 @@ public class IntegerDimEncTest {
         }
     }
 
-    private void testSerDes(IntegerDimEnc enc, long value) {
+    private void testSerDes(IntegerDimEncV2 enc, long value) {
         DataTypeSerializer<Object> ser = enc.asDataTypeSerializer();
         byte[] buf = new byte[enc.getLengthOfEncoding()];
         String valueStr = "" + value;