You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/07 11:55:57 UTC
hive git commit: HIVE-17163: Microbenchmark for vector op processing
(Prasanth Jayachandran, reviewed by Matt McCline)
Repository: hive
Updated Branches:
refs/heads/master 7c22d74c8 -> 0cfd4fead
HIVE-17163: Microbenchmark for vector op processing (Prasanth Jayachandran, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0cfd4fea
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0cfd4fea
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0cfd4fea
Branch: refs/heads/master
Commit: 0cfd4feadd1b7dba206025cc20c5b8235231e8c5
Parents: 7c22d74
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Wed Mar 7 05:55:44 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Wed Mar 7 05:55:44 2018 -0600
----------------------------------------------------------------------
itests/hive-jmh/pom.xml | 20 +-
.../vectorization/AbstractExpression.java | 6 +-
.../vectorization/BlackholeOperator.java | 44 ++++
.../vectorization/ColumnVectorGenUtil.java | 256 +++++++++++++++++++
.../vectorization/VectorizedLikeBench.java | 4 +-
.../vectorization/mapjoin/AbstractMapJoin.java | 30 +--
.../mapjoin/MapJoinMultiKeyBench.java | 94 +------
.../mapjoin/MapJoinOneLongKeyBench.java | 94 +------
.../mapjoin/MapJoinOneStringKeyBench.java | 94 +------
.../operators/AbstractOperatorBench.java | 34 +++
.../operators/VectorGroupByOperatorBench.java | 236 +++++++++++++++++
.../operators/VectorSelectOperatorBench.java | 166 ++++++++++++
.../src/main/resources/log4j2.properties | 68 +++++
.../util/batchgen/VectorBatchGenerateUtil.java | 10 -
14 files changed, 843 insertions(+), 313 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml
index 0ff584c..94abbbf 100644
--- a/itests/hive-jmh/pom.xml
+++ b/itests/hive-jmh/pom.xml
@@ -32,7 +32,7 @@
<properties>
<hive.path.to.root>../..</hive.path.to.root>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <jmh.version>1.4.1</jmh.version>
+ <jmh.version>1.19</jmh.version>
<uberjar.name>benchmarks</uberjar.name>
</properties>
@@ -54,6 +54,16 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-api</artifactId>
+ <version>${tez.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-runtime-internals</artifactId>
+ <version>${tez.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${project.version}</version>
@@ -91,6 +101,7 @@
<configuration>
<finalName>${uberjar.name}</finalName>
<transformers>
+ <transformer implementation="com.github.edwgiz.mavenShadePlugin.log4j2CacheTransformer.PluginsCacheFileTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.openjdk.jmh.Main</mainClass>
</transformer>
@@ -112,6 +123,13 @@
</configuration>
</execution>
</executions>
+ <dependencies>
+ <dependency>
+ <groupId>com.github.edwgiz</groupId>
+ <artifactId>maven-shade-plugin.log4j2-cachefile-transformer</artifactId>
+ <version>2.1</version>
+ </dependency>
+ </dependencies>
</plugin>
</plugins>
</build>
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java
index 4883a94..681f173 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java
@@ -13,6 +13,9 @@
*/
package org.apache.hive.benchmark.vectorization;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -30,9 +33,6 @@ import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-
@BenchmarkMode(Mode.AverageTime)
@Fork(1)
@State(Scope.Thread)
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java
new file mode 100644
index 0000000..d86042a
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.vectorization;
+
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.openjdk.jmh.infra.Blackhole;
+
+public class BlackholeOperator extends Operator {
+ private Blackhole bh;
+
+ public BlackholeOperator(CompilationOpContext cContext, Blackhole bh) {
+ super(cContext);
+ this.bh = bh;
+ }
+
+ @Override
+ public void process(final Object row, final int tag) throws HiveException {
+ bh.consume(row);
+ }
+
+ @Override
+ public String getName() {
+ return "Blackhole Operator";
+ }
+
+ @Override
+ public OperatorType getType() {
+ return OperatorType.FILESINK;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java
new file mode 100644
index 0000000..d80b6d4
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java
@@ -0,0 +1,256 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.benchmark.vectorization;
+
+import java.sql.Timestamp;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+public class ColumnVectorGenUtil {
+
+ private static final long LONG_VECTOR_NULL_VALUE = 1;
+ private static final double DOUBLE_VECTOR_NULL_VALUE = Double.NaN;
+
+ public static VectorizedRowBatch getVectorizedRowBatch(int size, int numCol, int seed) {
+ VectorizedRowBatch vrg = new VectorizedRowBatch(numCol, size);
+ for (int j = 0; j < numCol; j++) {
+ LongColumnVector lcv = new LongColumnVector(size);
+ for (int i = 0; i < size; i++) {
+ lcv.vector[i] = (i + 1) * seed * (j + 1);
+ }
+ vrg.cols[j] = lcv;
+ }
+ vrg.size = size;
+ return vrg;
+ }
+
+ public static ColumnVector generateColumnVector(TypeInfo typeInfo, boolean nulls, boolean repeating, int size,
+ Random rand) {
+ if (typeInfo.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
+ switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case DATE:
+ return generateLongColumnVector(nulls, repeating, size, rand);
+ case FLOAT:
+ case DOUBLE:
+ return generateDoubleColumnVector(nulls, repeating, size, rand);
+ case DECIMAL:
+ return generateDecimalColumnVector(((DecimalTypeInfo) typeInfo), nulls, repeating, size, rand);
+ case CHAR:
+ case VARCHAR:
+ case STRING:
+ case BINARY:
+ return generateBytesColumnVector(nulls, repeating, size, rand);
+ case TIMESTAMP:
+ return generateTimestampColumnVector(nulls, repeating, size, rand);
+ // TODO: add interval and complex types
+ }
+ }
+ throw new RuntimeException("Unsupported type info category: " + typeInfo.getCategory());
+ }
+
+ public static BytesColumnVector generateBytesColumnVector(
+ boolean nulls, boolean repeating, int size, Random rand) {
+ BytesColumnVector bcv = new BytesColumnVector(size);
+ bcv.initBuffer(10);
+ bcv.noNulls = !nulls;
+ bcv.isRepeating = repeating;
+
+ byte[] repeatingValue = new byte[10];
+ rand.nextBytes(repeatingValue);
+
+ int nullFrequency = generateNullFrequency(rand);
+
+ for (int i = 0; i < size; i++) {
+ if (nulls && (repeating || i % nullFrequency == 0)) {
+ bcv.isNull[i] = true;
+ bcv.setVal(0, new byte[]{0});
+ } else {
+ bcv.isNull[i] = false;
+ if (repeating) {
+ bcv.setVal(i, repeatingValue, 0, repeatingValue.length);
+ } else {
+ String val = String.valueOf("value_" + i);
+ bcv.setVal(i, val.getBytes(), 0, val.length());
+ }
+ }
+ }
+ return bcv;
+ }
+
+ public static LongColumnVector generateLongColumnVector(
+ boolean nulls, boolean repeating, int size, Random rand) {
+ LongColumnVector lcv = new LongColumnVector(size);
+
+ lcv.noNulls = !nulls;
+ lcv.isRepeating = repeating;
+
+ long repeatingValue;
+ do {
+ repeatingValue = rand.nextLong();
+ } while (repeatingValue == 0);
+
+ int nullFrequency = generateNullFrequency(rand);
+
+ for (int i = 0; i < size; i++) {
+ if (nulls && (repeating || i % nullFrequency == 0)) {
+ lcv.isNull[i] = true;
+ lcv.vector[i] = LONG_VECTOR_NULL_VALUE;
+
+ } else {
+ lcv.isNull[i] = false;
+ lcv.vector[i] = repeating ? repeatingValue : rand.nextLong();
+ if (lcv.vector[i] == 0) {
+ i--;
+ }
+ }
+ }
+ return lcv;
+ }
+
+ private static ColumnVector generateTimestampColumnVector(final boolean nulls,
+ final boolean repeating, final int size, final Random rand) {
+ Timestamp[] timestamps = new Timestamp[size];
+ for (int i = 0; i < size; i++) {
+ timestamps[i] = new Timestamp(rand.nextInt());
+ }
+ return generateTimestampColumnVector(nulls, repeating, size, rand, timestamps);
+ }
+
+ public static TimestampColumnVector generateTimestampColumnVector(
+ boolean nulls, boolean repeating, int size, Random rand, Timestamp[] timestampValues) {
+ TimestampColumnVector tcv = new TimestampColumnVector(size);
+
+ tcv.noNulls = !nulls;
+ tcv.isRepeating = repeating;
+
+ Timestamp repeatingTimestamp = RandomTypeUtil.getRandTimestamp(rand);
+
+ int nullFrequency = generateNullFrequency(rand);
+
+ for (int i = 0; i < size; i++) {
+ if (nulls && (repeating || i % nullFrequency == 0)) {
+ tcv.isNull[i] = true;
+ tcv.setNullValue(i);
+ timestampValues[i] = null;
+ } else {
+ tcv.isNull[i] = false;
+ if (!repeating) {
+ Timestamp randomTimestamp = RandomTypeUtil.getRandTimestamp(rand);
+ tcv.set(i, randomTimestamp);
+ timestampValues[i] = randomTimestamp;
+ } else {
+ tcv.set(i, repeatingTimestamp);
+ timestampValues[i] = repeatingTimestamp;
+ }
+ }
+ }
+ return tcv;
+ }
+
+ public static DoubleColumnVector generateDoubleColumnVector(boolean nulls,
+ boolean repeating, int size, Random rand) {
+ DoubleColumnVector dcv = new DoubleColumnVector(size);
+
+ dcv.noNulls = !nulls;
+ dcv.isRepeating = repeating;
+
+ double repeatingValue;
+ do {
+ repeatingValue = rand.nextDouble();
+ } while (repeatingValue == 0);
+
+ int nullFrequency = generateNullFrequency(rand);
+
+ for (int i = 0; i < size; i++) {
+ if (nulls && (repeating || i % nullFrequency == 0)) {
+ dcv.isNull[i] = true;
+ dcv.vector[i] = DOUBLE_VECTOR_NULL_VALUE;
+
+ } else {
+ dcv.isNull[i] = false;
+ dcv.vector[i] = repeating ? repeatingValue : rand.nextDouble();
+
+ if (dcv.vector[i] == 0) {
+ i--;
+ }
+ }
+ }
+ return dcv;
+ }
+
+ public static DecimalColumnVector generateDecimalColumnVector(DecimalTypeInfo typeInfo, boolean nulls,
+ boolean repeating, int size, Random rand) {
+ DecimalColumnVector dcv =
+ new DecimalColumnVector(size, typeInfo.precision(), typeInfo.scale());
+
+ dcv.noNulls = !nulls;
+ dcv.isRepeating = repeating;
+
+ HiveDecimalWritable repeatingValue = new HiveDecimalWritable();
+ do {
+ repeatingValue.set(HiveDecimal.create(((Double) rand.nextDouble()).toString())
+ .setScale((short) typeInfo.scale(), HiveDecimal.ROUND_HALF_UP));
+ } while (repeatingValue.getHiveDecimal().doubleValue() == 0);
+
+ int nullFrequency = generateNullFrequency(rand);
+
+ for (int i = 0; i < size; i++) {
+ if (nulls && (repeating || i % nullFrequency == 0)) {
+ dcv.isNull[i] = true;
+ dcv.vector[i] = null;
+
+ } else {
+ dcv.isNull[i] = false;
+ if (repeating) {
+ dcv.vector[i].set(repeatingValue);
+ } else {
+ dcv.vector[i].set(HiveDecimal.create(((Double) rand.nextDouble()).toString())
+ .setScale((short) typeInfo.scale(), HiveDecimal.ROUND_HALF_UP));
+ }
+
+ if (dcv.vector[i].getHiveDecimal().doubleValue() == 0) {
+ i--;
+ }
+ }
+ }
+ return dcv;
+ }
+
+ private static int generateNullFrequency(Random rand) {
+ return 60 + rand.nextInt(20);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java
index c2105bd..8b1b045 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java
@@ -17,6 +17,8 @@
*/
package org.apache.hive.benchmark.vectorization;
+import java.nio.charset.StandardCharsets;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
@@ -25,8 +27,6 @@ import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
-import java.nio.charset.StandardCharsets;
-
/**
* This test measures the performance for vectorization.
* <p/>
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
index 50e2fa8..af446db 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java
@@ -13,31 +13,26 @@
*/
package org.apache.hive.benchmark.vectorization.mapjoin;
+import java.util.concurrent.TimeUnit;
+
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.tez.ObjectCache;
import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator;
import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters;
+import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
@@ -45,15 +40,12 @@ import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-
-// UNDONE: For now, just run once cold.
-@BenchmarkMode(Mode.SingleShotTime)
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(1)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@@ -72,7 +64,6 @@ public abstract class AbstractMapJoin {
protected VectorizedRowBatch[] bigTableBatches;
@Benchmark
- // @Warmup(iterations = 0, time = 1, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.MILLISECONDS)
public void bench() throws Exception {
if (!isVectorOutput) {
@@ -103,6 +94,9 @@ public abstract class AbstractMapJoin {
// Prepare data. Good for ANY implementation variation.
testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10);
+
+ ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
+ ObjectCache.setupObjectRegistry(objectRegistry);
operator = setupBenchmarkImplementation(
mapJoinImplementation, testDesc, testData);
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java
index 39005eb..ca76e6c 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java
@@ -18,107 +18,15 @@
package org.apache.hive.benchmark.vectorization.mapjoin;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench;
-import org.apache.hive.common.util.HashCodeUtil;
-import org.apache.hive.common.util.ReflectionUtil;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.profile.StackProfiler;
/*
* Simple one long key map join benchmarks.
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java
index a2461e0..e13db96 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java
@@ -18,107 +18,15 @@
package org.apache.hive.benchmark.vectorization.mapjoin;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench;
-import org.apache.hive.common.util.HashCodeUtil;
-import org.apache.hive.common.util.ReflectionUtil;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.profile.StackProfiler;
/*
* Simple one long key map join benchmarks.
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java
index f6f5f5f..6a78a9f 100644
--- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java
@@ -18,107 +18,15 @@
package org.apache.hive.benchmark.vectorization.mapjoin;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
-import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench;
-import org.apache.hive.common.util.HashCodeUtil;
-import org.apache.hive.common.util.ReflectionUtil;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.profile.StackProfiler;
/*
* Simple one long key map join benchmarks.
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java
new file mode 100644
index 0000000..f17eb3e
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.benchmark.vectorization.operators;
+
+import java.util.concurrent.TimeUnit;
+
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Warmup;
+
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Fork(1)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+public class AbstractOperatorBench {
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java
new file mode 100644
index 0000000..1f87f8d
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.benchmark.vectorization.operators;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hive.benchmark.vectorization.ColumnVectorGenUtil;
+import org.apache.orc.TypeDescription;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.profile.LinuxPerfAsmProfiler;
+import org.openjdk.jmh.profile.LinuxPerfNormProfiler;
+import org.openjdk.jmh.profile.LinuxPerfProfiler;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import com.google.common.collect.ImmutableList;
+
+@State(Scope.Benchmark)
+public class VectorGroupByOperatorBench extends AbstractOperatorBench {
+
+ @Param({
+ "true",
+ "false"
+ })
+ private boolean hasNulls;
+
+ @Param({
+ "true",
+ "false"
+ })
+ private boolean isRepeating;
+
+ @Param({
+ "PARTIAL1",
+ "PARTIAL2",
+ "FINAL",
+ "COMPLETE"
+ })
+ private GenericUDAFEvaluator.Mode evalMode;
+
+ @Param({
+ "GLOBAL",
+ "HASH"
+ })
+ private VectorGroupByDesc.ProcessingMode processMode;
+
+ @Param({
+ "count",
+ "min",
+ "max",
+ "sum",
+ "avg",
+ "variance",
+ "var_pop",
+ "var_samp",
+ "stddev",
+ "stddev_pop",
+ "stddev_samp",
+ "bloom_filter"
+ })
+ private String aggregation;
+
+ @Param({
+ "bigint",
+ "double",
+ "string",
+ "decimal(7,2)", // to use this via command line arg "decimal(7_2)"
+ "decimal(38,18)", // to use this via command line arg "decimal(38_18)"
+ "timestamp"
+ })
+ private String dataType;
+
+ private Random rand = new Random(1234);
+ private VectorGroupByOperator vgo;
+ private VectorizedRowBatch vrb;
+ private int size = VectorizedRowBatch.DEFAULT_SIZE;
+
+ @Setup
+ public void setup() {
+ try {
+ dataType = dataType.replaceAll("_", ",");
+ TypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo(dataType);
+ ColumnVector cv = ColumnVectorGenUtil.generateColumnVector(typeInfo, hasNulls, isRepeating, size, rand);
+ TypeDescription typeDescription = TypeDescription.fromString(dataType);
+ vrb = typeDescription.createRowBatch(size);
+ vrb.size = size;
+ vrb.cols[0] = cv;
+ VectorizationContext ctx = new VectorizationContext("name", ImmutableList.of("A"));
+ GroupByDesc desc = buildGroupByDescType(aggregation, evalMode, "A", typeInfo, processMode);
+ Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(new CompilationOpContext(), desc);
+ VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
+ vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
+ vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
+ vgo.initialize(new Configuration(), null);
+ } catch (Exception e) {
+ // likely unsupported combination of params
+ // https://bugs.openjdk.java.net/browse/CODETOOLS-7901296 is not available yet to skip benchmark cleanly
+ System.out.println("Skipping.. Exception: " + e.getMessage());
+ System.exit(0);
+ }
+ }
+
+ private GroupByDesc buildGroupByDescType(
+ String aggregate,
+ GenericUDAFEvaluator.Mode mode,
+ String column,
+ TypeInfo dataType,
+ final VectorGroupByDesc.ProcessingMode processMode) throws SemanticException {
+
+ AggregationDesc agg = buildAggregationDesc(aggregate, mode, column, dataType);
+ ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
+ aggs.add(agg);
+
+ ArrayList<String> outputColumnNames = new ArrayList<String>();
+ outputColumnNames.add("_col0");
+
+ GroupByDesc desc = new GroupByDesc();
+ desc.setVectorDesc(new VectorGroupByDesc());
+
+ desc.setOutputColumnNames(outputColumnNames);
+ desc.setAggregators(aggs);
+ ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(processMode);
+
+ return desc;
+ }
+
+ private AggregationDesc buildAggregationDesc(
+ String aggregate,
+ GenericUDAFEvaluator.Mode mode,
+ String column,
+ TypeInfo typeInfo) throws SemanticException {
+
+ ExprNodeDesc inputColumn = new ExprNodeColumnDesc(typeInfo, column, "table", false);
+
+ ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
+ params.add(inputColumn);
+
+ AggregationDesc agg = new AggregationDesc();
+ ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
+ GenericUDAFEvaluator genericUDAFEvaluator = FunctionRegistry.getGenericUDAFEvaluator(aggregate,
+ ImmutableList.of(oi).asList(), false, false);
+ agg.setGenericUDAFEvaluator(genericUDAFEvaluator);
+ if (aggregate.equals("bloom_filter")) {
+ GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
+ (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
+ udafBloomFilterEvaluator.setHintEntries(10000);
+ }
+ agg.setGenericUDAFName(aggregate);
+ agg.setMode(mode);
+ agg.setParameters(params);
+
+ return agg;
+ }
+
+ @TearDown
+ public void tearDown() throws HiveException {
+ vgo.close(false);
+ }
+
+ @Benchmark
+ public void testAggCount() throws HiveException {
+ vgo.process(vrb, 0);
+ }
+
+ /*
+ * ============================== HOW TO RUN THIS TEST: ====================================
+ *
+ * You can run this test:
+ *
+ * a) Via the command line:
+ * $ mvn clean install
+ * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perf -f 1 (Linux)
+ * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perfnorm -f 3 (Linux)
+ * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perfasm -f 1 (Linux)
+ * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof gc -f 1 (allocation counting via gc)
+ * $ java -jar target/benchmarks.jar VectorGroupByOperatorBench -p hasNulls=true -p isRepeating=false -p aggregation=bloom_filter -p processMode=HASH -p evalMode=PARTIAL1
+ * $ java -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:6006,suspend=y,server=y -jar target/benchmarks.jar VectorGroupByOperatorBench
+ */
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VectorGroupByOperatorBench.class.getSimpleName())
+ .addProfiler(LinuxPerfProfiler.class)
+ .addProfiler(LinuxPerfNormProfiler.class)
+ .addProfiler(LinuxPerfAsmProfiler.class)
+ .build();
+ new Runner(opt).run();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java
new file mode 100644
index 0000000..54b200b
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.benchmark.vectorization.operators;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.VectorSelectDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hive.benchmark.vectorization.BlackholeOperator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.profile.LinuxPerfAsmProfiler;
+import org.openjdk.jmh.profile.LinuxPerfNormProfiler;
+import org.openjdk.jmh.profile.LinuxPerfProfiler;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+@State(Scope.Benchmark)
+public class VectorSelectOperatorBench extends AbstractOperatorBench {
+
+ private SelectDesc selDesc;
+ private VectorSelectOperator vso;
+ private VectorizedRowBatch vrg;
+ private List<Operator<?>> child;
+ private List<Operator<?>> EMPTY_CHILD = new ArrayList<>();
+
+ @Setup
+ public void setup(Blackhole bh) throws HiveException {
+ HiveConf hconf = new HiveConf();
+ List<String> columns = new ArrayList<String>();
+ columns.add("a");
+ columns.add("b");
+ columns.add("c");
+ VectorizationContext vc = new VectorizationContext("name", columns);
+
+ selDesc = new SelectDesc(false);
+ List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
+ ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false);
+ ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false);
+ ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false);
+ ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc();
+ GenericUDF gudf = new GenericUDFOPPlus();
+
+ plusDesc.setGenericUDF(gudf);
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(colDesc1);
+ children.add(colDesc2);
+ plusDesc.setChildren(children);
+ plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
+
+ colList.add(plusDesc);
+ colList.add(colDesc3);
+ selDesc.setColList(colList);
+
+ List<String> outputColNames = new ArrayList<String>();
+ outputColNames.add("_col0");
+ outputColNames.add("_col1");
+ selDesc.setOutputColumnNames(outputColNames);
+
+ VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
+ selDesc.setVectorDesc(vectorSelectDesc);
+ List<ExprNodeDesc> selectColList = selDesc.getColList();
+ VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()];
+ for (int i = 0; i < selectColList.size(); i++) {
+ ExprNodeDesc expr = selectColList.get(i);
+ VectorExpression ve = vc.getVectorExpression(expr);
+ vectorSelectExprs[i] = ve;
+ }
+ vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
+ vectorSelectDesc.setProjectedOutputColumns(new int[]{3, 2});
+
+ CompilationOpContext opContext = new CompilationOpContext();
+ vso = new VectorSelectOperator(opContext, selDesc, vc, vectorSelectDesc);
+ // to trigger vectorForward
+ child = new ArrayList<>();
+ child.add(new BlackholeOperator(opContext, bh));
+ child.add(new BlackholeOperator(opContext, bh));
+ vso.initialize(hconf, null);
+ vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
+ VectorizedRowBatch.DEFAULT_SIZE, 4, 17);
+ }
+
+ @TearDown
+ public void tearDown() throws HiveException {
+ vso.close(false);
+ }
+
+ @Benchmark
+ public void testSelectStar() throws HiveException {
+ selDesc.setSelStarNoCompute(true);
+ vso.process(vrg, 0);
+ }
+
+ @Benchmark
+ public void testVectorSelectBaseForward() throws HiveException {
+ selDesc.setSelStarNoCompute(false);
+ vso.setChildOperators(EMPTY_CHILD);
+ vso.process(vrg, 0);
+ }
+
+ @Benchmark
+ public void testVectorSelectVectorForward() throws HiveException {
+ selDesc.setSelStarNoCompute(false);
+ vso.setChildOperators(child);
+ vso.process(vrg, 0);
+ }
+
+ /*
+ * ============================== HOW TO RUN THIS TEST: ====================================
+ *
+ * You can run this test:
+ *
+ * a) Via the command line:
+ * $ mvn clean install
+ * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perf -f 1 (Linux)
+ * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perfnorm -f 3 (Linux)
+ * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perfasm -f 1 (Linux)
+ * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof gc -f 1 (allocation counting via gc)
+ */
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VectorSelectOperatorBench.class.getSimpleName())
+ .addProfiler(LinuxPerfProfiler.class)
+ .addProfiler(LinuxPerfNormProfiler.class)
+ .addProfiler(LinuxPerfAsmProfiler.class)
+ .build();
+ new Runner(opt).run();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/itests/hive-jmh/src/main/resources/log4j2.properties
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/resources/log4j2.properties b/itests/hive-jmh/src/main/resources/log4j2.properties
new file mode 100644
index 0000000..3357240
--- /dev/null
+++ b/itests/hive-jmh/src/main/resources/log4j2.properties
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = FATAL
+name = HiveJMH
+
+# list of properties
+property.hive-jmh.log.level = ERROR
+property.hive-jmh.root.logger = DRFA
+property.hive-jmh.log.dir = target/tmp/log
+property.hive-jmh.log.file = hive-jmh.log
+
+# list of all appenders
+appenders = console, DRFA
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
+
+# daily rolling file appender
+appender.DRFA.type = RollingFile
+appender.DRFA.name = DRFA
+appender.DRFA.fileName = ${sys:hive-jmh.log.dir}/${sys:hive-jmh.log.file}
+appender.DRFA.filePattern = ${sys:hive-jmh.log.dir}/${sys:hive-jmh.log.file}.%d{yyyy-MM-dd}
+appender.DRFA.layout.type = PatternLayout
+appender.DRFA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
+appender.DRFA.policies.type = Policies
+appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy
+appender.DRFA.policies.time.interval = 1
+appender.DRFA.policies.time.modulate = true
+appender.DRFA.strategy.type = DefaultRolloverStrategy
+appender.DRFA.strategy.max = 30
+
+# list of all loggers
+loggers = SparkIMain, SparkILoop, Jetty, AbstractLifeCycle
+
+logger.SparkIMain.name = org.apache.hive-jmh.repl.SparkIMain$exprTyper
+logger.SparkIMain.level = INFO
+
+logger.SparkILoop.name = org.apache.hive-jmh.repl.SparkILoop$SparkILoopInterpreter
+logger.SparkILoop.level = INFO
+
+logger.Jetty.name = org.eclipse.jetty
+logger.Jetty.level = WARN
+
+logger.AbstractLifeCycle.name = org.eclipse.jetty.util.component.AbstractLifeCycle
+logger.AbstractLifeCycle.level = ERROR
+
+# root logger
+rootLogger.level = ${sys:hive-jmh.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive-jmh.root.logger}
http://git-wip-us.apache.org/repos/asf/hive/blob/0cfd4fea/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java
index c6ae866..41a20af 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java
@@ -18,24 +18,14 @@
package org.apache.hadoop.hive.ql.exec.vector.util.batchgen;
-import java.util.Arrays;
-import java.util.Random;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import com.google.common.base.Preconditions;
-
public class VectorBatchGenerateUtil {
public static Object[][] generateRowObjectArray(TypeInfo[] typeInfos,