You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2017/01/17 03:30:32 UTC
[15/20] kylin git commit: KYLIN-2396 percentile pre-aggregation
support
KYLIN-2396 percentile pre-aggregation support
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/95d44121
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/95d44121
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/95d44121
Branch: refs/heads/master-hbase1.x
Commit: 95d4412194fb6fc08c99600d162826d9350a7a80
Parents: 4625b05
Author: lidongsjtu <li...@apache.org>
Authored: Mon Jan 16 11:12:36 2017 +0800
Committer: lidongsjtu <li...@apache.org>
Committed: Mon Jan 16 17:25:28 2017 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/MathUtil.java | 32 ++++++
core-metadata/pom.xml | 4 +
.../kylin/measure/MeasureTypeFactory.java | 4 +-
.../measure/percentile/PercentileAggFunc.java | 44 ++++++++
.../percentile/PercentileAggregator.java | 64 ++++++++++++
.../measure/percentile/PercentileContUdf.java | 37 +++++++
.../measure/percentile/PercentileCounter.java | 97 ++++++++++++++++++
.../percentile/PercentileMeasureType.java | 102 +++++++++++++++++++
.../percentile/PercentileSerializer.java | 71 +++++++++++++
.../percentile/PercentileAggregatorTest.java | 55 ++++++++++
.../percentile/PercentileCounterTest.java | 79 ++++++++++++++
.../percentile/PercentileSerializerTest.java | 68 +++++++++++++
.../kylin/measure/percentile/TDigestTest.java | 57 +++++++++++
.../localmeta/cube_desc/ci_inner_join_cube.json | 12 ++-
.../localmeta/cube_desc/ci_left_join_cube.json | 12 ++-
.../apache/kylin/query/ITKylinQueryTest.java | 18 ++--
.../resources/query/sql_percentile/query01.sql | 2 +
pom.xml | 6 ++
storage-hbase/pom.xml | 1 +
19 files changed, 755 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-common/src/main/java/org/apache/kylin/common/util/MathUtil.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/MathUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/MathUtil.java
new file mode 100644
index 0000000..ae674c6
--- /dev/null
+++ b/core-common/src/main/java/org/apache/kylin/common/util/MathUtil.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.common.util;
+
+import java.util.List;
+
+public class MathUtil {
+ public static double findMedianInSortedList(List<Double> m) {
+ int middle = m.size() / 2;
+ if (m.size() % 2 == 1) {
+ return m.get(middle);
+ } else {
+ return (m.get(middle - 1) + m.get(middle)) / 2.0;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/pom.xml
----------------------------------------------------------------------
diff --git a/core-metadata/pom.xml b/core-metadata/pom.xml
index b3fe885..87c4438 100644
--- a/core-metadata/pom.xml
+++ b/core-metadata/pom.xml
@@ -47,6 +47,10 @@
<groupId>org.roaringbitmap</groupId>
<artifactId>RoaringBitmap</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.tdunning</groupId>
+ <artifactId>t-digest</artifactId>
+ </dependency>
<!-- Env & Test -->
<dependency>
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
index 8e58858..5d0e007 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java
@@ -28,6 +28,7 @@ import org.apache.kylin.measure.bitmap.BitmapMeasureType;
import org.apache.kylin.measure.dim.DimCountDistinctMeasureType;
import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType;
import org.apache.kylin.measure.hllc.HLLCMeasureType;
+import org.apache.kylin.measure.percentile.PercentileMeasureType;
import org.apache.kylin.measure.raw.RawMeasureType;
import org.apache.kylin.measure.topn.TopNMeasureType;
import org.apache.kylin.metadata.datatype.DataType;
@@ -109,6 +110,7 @@ abstract public class MeasureTypeFactory<T> {
factoryInsts.add(new TopNMeasureType.Factory());
factoryInsts.add(new RawMeasureType.Factory());
factoryInsts.add(new ExtendedColumnMeasureType.Factory());
+ factoryInsts.add(new PercentileMeasureType.Factory());
logger.info("Checking custom measure types from kylin config");
@@ -143,7 +145,7 @@ abstract public class MeasureTypeFactory<T> {
List<MeasureTypeFactory<?>> list = factories.get(funcName);
if (list == null)
list = Lists.newArrayListWithCapacity(2);
- factories.put(funcName, list);
+ factories.put(funcName, list);
list.add(factory);
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggFunc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggFunc.java
new file mode 100644
index 0000000..ad02019
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggFunc.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+public class PercentileAggFunc {
+ public static PercentileCounter init() {
+ return null;
+ }
+
+ public static PercentileCounter add(PercentileCounter counter, Object v, Object r) {
+ PercentileCounter c = (PercentileCounter) v;
+ Number n = (Number) r;
+ if (counter == null) {
+ counter = new PercentileCounter(c.compression, n.doubleValue());
+ }
+ counter.merge(c);
+ return counter;
+ }
+
+ public static PercentileCounter merge(PercentileCounter counter0, PercentileCounter counter1) {
+ counter0.merge(counter1);
+ return counter0;
+ }
+
+ public static double result(PercentileCounter counter) {
+ return counter == null ? 0L : counter.getResultEstimate();
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggregator.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggregator.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggregator.java
new file mode 100644
index 0000000..d6b93eb
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileAggregator.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+import org.apache.kylin.measure.MeasureAggregator;
+
+public class PercentileAggregator extends MeasureAggregator<PercentileCounter> {
+ final double compression;
+ PercentileCounter sum = null;
+
+ public PercentileAggregator(double compression) {
+ this.compression = compression;
+ }
+
+ @Override
+ public void reset() {
+ sum = null;
+ }
+
+ @Override
+ public void aggregate(PercentileCounter value) {
+ if (sum == null)
+ sum = new PercentileCounter(value);
+ else
+ sum.merge(value);
+ }
+
+ @Override
+ public PercentileCounter aggregate(PercentileCounter value1, PercentileCounter value2) {
+ PercentileCounter merged = new PercentileCounter(value1);
+ merged.merge(value2);
+ return merged;
+ }
+
+ @Override
+ public PercentileCounter getState() {
+ return sum;
+ }
+
+ @Override
+ public int getMemBytesEstimate() {
+ // 10K as upbound
+ // Test on random double data, 20 tDigest, each has 5000000 doubles. Finally merged into one tDigest.
+ // Before compress: 10309 bytes
+ // After compress: 8906 bytes
+ return 10 * 1024;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileContUdf.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileContUdf.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileContUdf.java
new file mode 100644
index 0000000..4ef6b75
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileContUdf.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+public class PercentileContUdf {
+ public static double init() {
+ return 0;
+ }
+
+ public static double add(double accumulator, double v, double r) {
+ return 0;
+ }
+
+ public static double merge(double accumulator0, double accumulator1) {
+ return 0;
+ }
+
+ public static double result(long accumulator) {
+ return 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileCounter.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileCounter.java
new file mode 100644
index 0000000..bf505cf
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileCounter.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+
+import com.tdunning.math.stats.AVLTreeDigest;
+import com.tdunning.math.stats.TDigest;
+
+public class PercentileCounter implements Serializable {
+ private static final double INVALID_QUANTILE_RATIO = -1;
+
+ double compression;
+ double quantileRatio;
+
+ TDigest registers;
+
+ public PercentileCounter(double compression) {
+ this(compression, INVALID_QUANTILE_RATIO);
+ }
+
+ public PercentileCounter(PercentileCounter another) {
+ this(another.compression, another.quantileRatio);
+ merge(another);
+ }
+
+ public PercentileCounter(double compression, double quantileRatio) {
+ this.compression = compression;
+ this.quantileRatio = quantileRatio;
+ reInitRegisters();
+ }
+
+ private void reInitRegisters() {
+ this.registers = TDigest.createAvlTreeDigest(this.compression);
+ }
+
+ public void add(double v) {
+ registers.add(v);
+ }
+
+ public void merge(PercentileCounter counter) {
+ assert this.compression == counter.compression;
+ registers.add(counter.registers);
+ }
+
+ public double getResultEstimate() {
+ return registers.quantile(quantileRatio);
+ }
+
+ public void writeRegisters(ByteBuffer out) {
+ registers.compress();
+ registers.asSmallBytes(out);
+ }
+
+ public void readRegisters(ByteBuffer in) {
+ registers = AVLTreeDigest.fromBytes(in);
+ compression = registers.compression();
+ }
+
+ public int getBytesEstimate() {
+ return maxLength();
+ }
+
+ public int maxLength() {
+ // 10KB for max length
+ return 10 * 1024;
+ }
+
+ public int peekLength(ByteBuffer in) {
+ int mark = in.position();
+ AVLTreeDigest.fromBytes(in);
+ int total = in.position() - mark;
+ in.position(mark);
+ return total;
+ }
+
+ public void clear() {
+ reInitRegisters();
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java
new file mode 100644
index 0000000..45ebe89
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+import java.util.Map;
+
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.measure.MeasureAggregator;
+import org.apache.kylin.measure.MeasureIngester;
+import org.apache.kylin.measure.MeasureType;
+import org.apache.kylin.measure.MeasureTypeFactory;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.datatype.DataTypeSerializer;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.ImmutableMap;
+
+public class PercentileMeasureType extends MeasureType<PercentileCounter> {
+ // compression ratio saved in DataType.precision
+ private final DataType dataType;
+ public static final String FUNC_PERCENTILE = "PERCENTILE";
+ public static final String DATATYPE_PERCENTILE = "percentile";
+
+ public PercentileMeasureType(String funcName, DataType dataType) {
+ this.dataType = dataType;
+ }
+
+ public static class Factory extends MeasureTypeFactory<PercentileCounter> {
+
+ @Override
+ public MeasureType<PercentileCounter> createMeasureType(String funcName, DataType dataType) {
+ return new PercentileMeasureType(funcName, dataType);
+ }
+
+ @Override
+ public String getAggrFunctionName() {
+ return FUNC_PERCENTILE;
+ }
+
+ @Override
+ public String getAggrDataTypeName() {
+ return DATATYPE_PERCENTILE;
+ }
+
+ @Override
+ public Class<? extends DataTypeSerializer<PercentileCounter>> getAggrDataTypeSerializer() {
+ return PercentileSerializer.class;
+ }
+ }
+
+ @Override
+ public MeasureIngester<PercentileCounter> newIngester() {
+ return new MeasureIngester<PercentileCounter>() {
+ PercentileCounter current = new PercentileCounter(dataType.getPrecision());
+
+ @Override
+ public PercentileCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
+ PercentileCounter counter = current;
+ counter.clear();
+ for (String v : values) {
+ if (v != null)
+ counter.add(Double.parseDouble(v));
+ }
+ return counter;
+ }
+ };
+ }
+
+ @Override
+ public MeasureAggregator<PercentileCounter> newAggregator() {
+ return new PercentileAggregator(dataType.getPrecision());
+ }
+
+ @Override
+ public boolean needRewrite() {
+ return true;
+ }
+
+ static final Map<String, Class<?>> UDAF_MAP = ImmutableMap.<String, Class<?>> of(PercentileMeasureType.FUNC_PERCENTILE, PercentileAggFunc.class);
+
+ @Override
+ public Map<String, Class<?>> getRewriteCalciteAggrFunctions() {
+ return UDAF_MAP;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileSerializer.java
new file mode 100644
index 0000000..a0a2a77
--- /dev/null
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileSerializer.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.measure.percentile;
+
+import java.nio.ByteBuffer;
+
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.datatype.DataTypeSerializer;
+
+public class PercentileSerializer extends DataTypeSerializer<PercentileCounter> {
+ // be thread-safe and avoid repeated obj creation
+ private ThreadLocal<PercentileCounter> current = new ThreadLocal<>();
+
+ private double compression;
+
+ public PercentileSerializer(DataType type) {
+ this.compression = type.getPrecision();
+ }
+
+ @Override
+ public int peekLength(ByteBuffer in) {
+ return current().peekLength(in);
+ }
+
+ @Override
+ public int maxLength() {
+ return current().maxLength();
+ }
+
+ @Override
+ public int getStorageBytesEstimate() {
+ return current().getBytesEstimate();
+ }
+
+ private PercentileCounter current() {
+ PercentileCounter counter = current.get();
+ if (counter == null) {
+ counter = new PercentileCounter(compression);
+ current.set(counter);
+ }
+ return counter;
+ }
+
+ @Override
+ public void serialize(PercentileCounter value, ByteBuffer out) {
+ value.writeRegisters(out);
+ }
+
+ @Override
+ public PercentileCounter deserialize(ByteBuffer in) {
+ PercentileCounter counter = current();
+ counter.readRegisters(in);
+ return counter;
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileAggregatorTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileAggregatorTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileAggregatorTest.java
new file mode 100644
index 0000000..9d59d46
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileAggregatorTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.measure.percentile;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.kylin.common.util.MathUtil;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class PercentileAggregatorTest {
+ @Test
+ public void testAggregate() {
+ double compression = 100;
+ int datasize = 10000;
+ PercentileAggregator aggregator = new PercentileAggregator(compression);
+ Random random = new Random();
+ List<Double> dataset = Lists.newArrayListWithCapacity(datasize);
+ for (int i = 0; i < datasize; i++) {
+ double d = random.nextDouble();
+ dataset.add(d);
+
+ PercentileCounter c = new PercentileCounter(compression, 0.5);
+ c.add(d);
+ aggregator.aggregate(c);
+ }
+ Collections.sort(dataset);
+
+ double actualResult = aggregator.getState().getResultEstimate();
+ double expectResult = MathUtil.findMedianInSortedList(dataset);
+ assertEquals(expectResult, actualResult, 0.001);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileCounterTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileCounterTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileCounterTest.java
new file mode 100644
index 0000000..abaa409
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileCounterTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.measure.percentile;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.kylin.common.util.MathUtil;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.tdunning.math.stats.TDigest;
+
+public class PercentileCounterTest {
+ @Test
+ public void testBasic() {
+ int times = 1;
+ int compression = 100;
+ for (int t = 0; t < times; t++) {
+ PercentileCounter counter = new PercentileCounter(compression, 0.5);
+ Random random = new Random();
+ int dataSize = 10000;
+ List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
+ for (int i = 0; i < dataSize; i++) {
+ double d = random.nextDouble();
+ counter.add(d);
+ dataset.add(d);
+ }
+ Collections.sort(dataset);
+
+ double actualResult = counter.getResultEstimate();
+ double expectedResult = MathUtil.findMedianInSortedList(dataset);
+ assertEquals(expectedResult, actualResult, 0.001);
+ }
+ }
+
+ @Test
+ public void testTDigest() {
+ double compression = 100;
+ double quantile = 0.5;
+
+ PercentileCounter counter = new PercentileCounter(compression, quantile);
+ TDigest tDigest = TDigest.createAvlTreeDigest(compression);
+
+ Random random = new Random();
+ int dataSize = 10000;
+ List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
+ for (int i = 0; i < dataSize; i++) {
+ double d = random.nextDouble();
+ counter.add(d);
+ tDigest.add(d);
+ }
+ double actualResult = counter.getResultEstimate();
+
+ Collections.sort(dataset);
+ double expectedResult = tDigest.quantile(quantile);
+
+ assertEquals(expectedResult, actualResult, 0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileSerializerTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileSerializerTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileSerializerTest.java
new file mode 100644
index 0000000..07b4495
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/PercentileSerializerTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.measure.percentile;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Created by dongli on 5/21/16.
+ */
+public class PercentileSerializerTest extends LocalFileMetadataTestCase {
+
+ @Before
+ public void setup() throws Exception {
+ createTestMetadata();
+ }
+
+ @After
+ public void after() throws Exception {
+ cleanAfterClass();
+ }
+
+ @Test
+ public void testBasic() {
+ PercentileSerializer serializer = new PercentileSerializer(DataType.getType("percentile(100)"));
+ PercentileCounter counter = new PercentileCounter(100, 0.5);
+ Random random = new Random();
+ for (int i = 0; i < 1000; i++) {
+ counter.add(random.nextDouble());
+ }
+ double markResult = counter.getResultEstimate();
+
+ ByteBuffer buffer = ByteBuffer.allocateDirect(serializer.getStorageBytesEstimate());
+ serializer.serialize(counter, buffer);
+
+ buffer.flip();
+ counter = serializer.deserialize(buffer);
+ PercentileCounter counter1 = new PercentileCounter(100, 0.5);
+ counter1.merge(counter);
+
+ assertEquals(markResult, counter1.getResultEstimate(), 0.01);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/core-metadata/src/test/java/org/apache/kylin/measure/percentile/TDigestTest.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/percentile/TDigestTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/TDigestTest.java
new file mode 100644
index 0000000..1adb604
--- /dev/null
+++ b/core-metadata/src/test/java/org/apache/kylin/measure/percentile/TDigestTest.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.measure.percentile;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.kylin.common.util.MathUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.tdunning.math.stats.TDigest;
+
+@Ignore
+public class TDigestTest {
+ @Test
+ public void testBasic() {
+ int times = 1;
+ int compression = 100;
+ for (int t = 0; t < times; t++) {
+ TDigest tDigest = TDigest.createAvlTreeDigest(compression);
+ Random random = new Random();
+ int dataSize = 10000;
+ List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
+ for (int i = 0; i < dataSize; i++) {
+ double d = random.nextDouble();
+ tDigest.add(d);
+ dataset.add(d);
+ }
+ Collections.sort(dataset);
+
+ double actualResult = tDigest.quantile(0.5);
+ double expectedResult = MathUtil.findMedianInSortedList(dataset);
+ assertEquals(expectedResult, actualResult, 0.01);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
index 1ebd7f3..0fda3b3 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
@@ -267,6 +267,16 @@
},
"returntype" : "raw"
}
+ }, {
+ "name" : "GVM_PERCENTILE",
+ "function" : {
+ "expression" : "PERCENTILE",
+ "parameter" : {
+ "type" : "column",
+ "value" : "TEST_KYLIN_FACT.PRICE"
+ },
+ "returntype" : "percentile(100)"
+ }
} ],
"dictionaries": [ {
"column": "TEST_KYLIN_FACT.TEST_COUNT_DISTINCT_BITMAP",
@@ -358,7 +368,7 @@
"name" : "f3",
"columns" : [ {
"qualifier" : "m",
- "measure_refs" : [ "TEST_EXTENDED_COLUMN", "TRANS_ID_RAW", "PRICE_RAW", "CAL_DT_RAW", "BUYER_CONTACT", "SELLER_CONTACT" ]
+ "measure_refs" : [ "TEST_EXTENDED_COLUMN", "TRANS_ID_RAW", "PRICE_RAW", "CAL_DT_RAW", "BUYER_CONTACT", "SELLER_CONTACT", "GVM_PERCENTILE" ]
} ]
} ]
},
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
index 4048b6e..51139ae 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
@@ -267,6 +267,16 @@
},
"returntype" : "raw"
}
+ }, {
+ "name" : "GVM_PERCENTILE",
+ "function" : {
+ "expression" : "PERCENTILE",
+ "parameter" : {
+ "type" : "column",
+ "value" : "TEST_KYLIN_FACT.PRICE"
+ },
+ "returntype" : "percentile(100)"
+ }
} ],
"dictionaries": [ {
"column": "TEST_KYLIN_FACT.TEST_COUNT_DISTINCT_BITMAP",
@@ -358,7 +368,7 @@
"name" : "f3",
"columns" : [ {
"qualifier" : "m",
- "measure_refs" : [ "TEST_EXTENDED_COLUMN", "TRANS_ID_RAW", "PRICE_RAW", "CAL_DT_RAW", "BUYER_CONTACT", "SELLER_CONTACT" ]
+ "measure_refs" : [ "TEST_EXTENDED_COLUMN", "TRANS_ID_RAW", "PRICE_RAW", "CAL_DT_RAW", "BUYER_CONTACT", "SELLER_CONTACT", "GVM_PERCENTILE" ]
} ]
} ]
},
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
index 82d0873..02134d4 100644
--- a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java
@@ -192,17 +192,17 @@ public class ITKylinQueryTest extends KylinTestBase {
public void testSnowflakeQuery() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_snowflake", null, true);
}
-
+
@Test
public void testDateTimeQuery() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_datetime", null, true);
}
-
+
@Test
public void testExtendedColumnQuery() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_extended_column", null, true);
}
-
+
@Test
public void testLikeQuery() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_like", null, true);
@@ -270,8 +270,7 @@ public class ITKylinQueryTest extends KylinTestBase {
this.batchExecuteQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_intersect_count");
}
}
-
-
+
@Test
public void testMultiModelQuery() throws Exception {
if ("left".equalsIgnoreCase(joinType)) {
@@ -280,7 +279,7 @@ public class ITKylinQueryTest extends KylinTestBase {
joinType = "left";
}
}
-
+
@Test
public void testDimDistinctCountQuery() throws Exception {
execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_distinct_dim", null, true);
@@ -392,10 +391,15 @@ public class ITKylinQueryTest extends KylinTestBase {
// compare the result
Assert.assertEquals(expectVersion, queriedVersion);
}
-
+
@Test
public void testSelectStarColumnCount() throws Exception {
execAndCompColumnCount("select * from test_kylin_fact limit 10", 11);
execAndCompColumnCount("select * from test_kylin_fact", 11);
}
+
+ @Test
+ public void testPercentileQuery() throws Exception {
+ batchExecuteQuery("src/test/resources/query/sql_percentile");
+ }
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/kylin-it/src/test/resources/query/sql_percentile/query01.sql
----------------------------------------------------------------------
diff --git a/kylin-it/src/test/resources/query/sql_percentile/query01.sql b/kylin-it/src/test/resources/query/sql_percentile/query01.sql
new file mode 100644
index 0000000..4f6d573
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_percentile/query01.sql
@@ -0,0 +1,2 @@
+select seller_id, percentile(price, 0.5) from test_kylin_fact
+group by seller_id
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e391103..e8ccb6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,6 +102,7 @@
<supercsv.version>2.4.0</supercsv.version>
<cors.version>2.5</cors.version>
<tomcat.version>8.5.9</tomcat.version>
+ <t-digest.version>3.1</t-digest.version>
<!-- REST Service -->
<spring.framework.version>3.2.17.RELEASE</spring.framework.version>
@@ -658,6 +659,11 @@
<version>${roaring.version}</version>
</dependency>
<dependency>
+ <groupId>com.tdunning</groupId>
+ <artifactId>t-digest</artifactId>
+ <version>${t-digest.version}</version>
+ </dependency>
+ <dependency>
<groupId>cglib</groupId>
<artifactId>cglib</artifactId>
<version>${cglib.version}</version>
http://git-wip-us.apache.org/repos/asf/kylin/blob/95d44121/storage-hbase/pom.xml
----------------------------------------------------------------------
diff --git a/storage-hbase/pom.xml b/storage-hbase/pom.xml
index eb2c104..3aea531 100644
--- a/storage-hbase/pom.xml
+++ b/storage-hbase/pom.xml
@@ -121,6 +121,7 @@
<include>org.apache.kylin:kylin-core-cube</include>
<include>com.ning:compress-lzf</include>
<include>org.roaringbitmap:RoaringBitmap</include>
+ <include>com.tdunning:t-digest</include>
<!-- below for inverted index only -->
<include>com.n3twork.druid:extendedset</include>
<include>org.apache.commons:commons-lang3</include>