You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/07/27 23:55:44 UTC
hive git commit: HIVE-14336: Make usage of VectorUDFAdaptor
configurable (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master 9629c6ef1 -> 4941c17d7
HIVE-14336: Make usage of VectorUDFAdaptor configurable (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4941c17d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4941c17d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4941c17d
Branch: refs/heads/master
Commit: 4941c17d7fa2e0a17fcf91ff9e034cd73db74c95
Parents: 9629c6e
Author: Matt McCline <mm...@hortonworks.com>
Authored: Wed Jul 27 16:48:27 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Wed Jul 27 16:48:27 2016 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 7 +
.../ql/exec/vector/VectorGroupByOperator.java | 3 +-
.../exec/vector/VectorMapJoinBaseOperator.java | 3 +-
.../exec/vector/VectorSMBMapJoinOperator.java | 3 +-
.../ql/exec/vector/VectorizationContext.java | 159 +++-
.../hive/ql/optimizer/physical/Vectorizer.java | 13 +-
.../clientpositive/vector_adaptor_usage_mode.q | 177 ++++
.../vector_adaptor_usage_mode.q.out | 933 +++++++++++++++++++
8 files changed, 1283 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e92466f..aa7647b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2592,6 +2592,13 @@ public class HiveConf extends Configuration {
HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false,
"This flag should be set to true to enable vectorizing using row deserialize.\n" +
"The default value is false."),
+ HIVE_VECTOR_ADAPTOR_USAGE_MODE("hive.vectorized.adaptor.usage.mode", "all", new StringSet("none", "chosen", "all"),
+ "Specifies the extent to which the VectorUDFAdaptor will be used for UDFs that do not have a cooresponding vectorized class.\n" +
+ "0. none : disable any usage of VectorUDFAdaptor\n" +
+ "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" +
+ "2. all : use VectorUDFAdaptor for all UDFs"
+ ),
+
HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control "
+ "whether to check, convert, and normalize partition value to conform to its column type in "
+ "partition operations including but not limited to insert, such as alter, describe etc."),
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 6e53526..2605203 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -767,7 +767,8 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements
isVectorOutput = desc.getVectorDesc().isVectorOutput();
- vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames());
+ vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(),
+ /* vContextEnvironment */ vContext);
}
/** Kryo ctor. */
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java
index 902a183..bcde25f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java
@@ -88,7 +88,8 @@ public class VectorMapJoinBaseOperator extends MapJoinOperator implements Vector
noOuterJoin = desc.isNoOuterJoin();
// We are making a new output vectorized row batch.
- vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames());
+ vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(),
+ /* vContextEnvironment */ vContext);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index 59153c8..80b0a14 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -126,7 +126,8 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements Vect
bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
// We are making a new output vectorized row batch.
- vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames());
+ vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(),
+ /* vContextEnvironment */ vContext);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index c0b9a4c..fca844a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
@@ -138,6 +139,8 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.DateUtils;
+import com.google.common.annotations.VisibleForTesting;
+
/**
* Context class for vectorization execution.
@@ -165,9 +168,32 @@ public class VectorizationContext {
// private final Map<String, Integer> columnMap;
private int firstOutputColumnIndex;
+ private enum HiveVectorAdaptorUsageMode {
+ NONE,
+ CHOSEN,
+ ALL;
+
+ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) {
+ String string = HiveConf.getVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_USAGE_MODE);
+ return valueOf(string.toUpperCase());
+ }
+ }
+
+ private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode;
+
+ private void setHiveConfVars(HiveConf hiveConf) {
+ hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf);
+ }
+
+ private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
+ hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode;
+ }
+
// Convenient constructor for initial batch creation takes
// a list of columns names and maps them to 0..n-1 indices.
- public VectorizationContext(String contextName, List<String> initialColumnNames) {
+ public VectorizationContext(String contextName, List<String> initialColumnNames,
+ HiveConf hiveConf) {
this.contextName = contextName;
level = 0;
this.initialColumnNames = initialColumnNames;
@@ -183,11 +209,26 @@ public class VectorizationContext {
this.ocm = new OutputColumnManager(firstOutputColumnIndex);
this.firstOutputColumnIndex = firstOutputColumnIndex;
vMap = new VectorExpressionDescriptor();
+
+ if (hiveConf != null) {
+ setHiveConfVars(hiveConf);
+ }
+ }
+
+ public VectorizationContext(String contextName, List<String> initialColumnNames,
+ VectorizationContext vContextEnvironment) {
+ this(contextName, initialColumnNames, (HiveConf) null);
+ copyHiveConfVars(vContextEnvironment);
+ }
+
+ @VisibleForTesting
+ public VectorizationContext(String contextName, List<String> initialColumnNames) {
+ this(contextName, initialColumnNames, (HiveConf) null);
}
// Constructor to with the individual addInitialColumn method
// followed by a call to finishedAddingInitialColumns.
- public VectorizationContext(String contextName) {
+ public VectorizationContext(String contextName, HiveConf hiveConf) {
this.contextName = contextName;
level = 0;
initialColumnNames = new ArrayList<String>();
@@ -197,6 +238,16 @@ public class VectorizationContext {
this.ocm = new OutputColumnManager(0);
this.firstOutputColumnIndex = 0;
vMap = new VectorExpressionDescriptor();
+
+ if (hiveConf != null) {
+ setHiveConfVars(hiveConf);
+ }
+
+ }
+
+ @VisibleForTesting
+ public VectorizationContext(String contextName) {
+ this(contextName, (HiveConf) null);
}
// Constructor useful making a projection vectorization context.
@@ -213,6 +264,8 @@ public class VectorizationContext {
this.ocm = vContext.ocm;
this.firstOutputColumnIndex = vContext.firstOutputColumnIndex;
vMap = new VectorExpressionDescriptor();
+
+ copyHiveConfVars(vContext);
}
// Add an initial column to a vectorization context when
@@ -491,10 +544,46 @@ public class VectorizationContext {
ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
childExpressions, mode, exprDesc.getTypeInfo());
if (ve == null) {
- /*
- * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor.
- */
- ve = getCustomUDFExpression(expr, mode);
+ // Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor
+ // when configured.
+ //
+ // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it because we are
+ // executing a test that didn't create a HiveConf, etc. No usage of VectorUDFAdaptor in
+ // that case.
+ if (hiveVectorAdaptorUsageMode != null) {
+ switch (hiveVectorAdaptorUsageMode) {
+ case NONE:
+ // No VectorUDFAdaptor usage.
+ throw new HiveException(
+ "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()
+ + " because hive.vectorized.adaptor.usage.mode=none");
+ case CHOSEN:
+ if (isNonVectorizedPathUDF(expr, mode)) {
+ ve = getCustomUDFExpression(expr, mode);
+ } else {
+ throw new HiveException(
+ "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()
+ + " because hive.vectorized.adaptor.usage.mode=chosen "
+ + " and the UDF wasn't one of the chosen ones");
+ }
+ break;
+ case ALL:
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("We will try to use the VectorUDFAdaptor for " + exprDesc.toString()
+ + " because hive.vectorized.adaptor.usage.mode=all");
+ }
+ ve = getCustomUDFExpression(expr, mode);
+ break;
+ default:
+ throw new RuntimeException("Unknown hive vector adaptor usage mode " +
+ hiveVectorAdaptorUsageMode.name());
+ }
+ if (ve == null) {
+ throw new HiveException(
+ "Unable vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()
+ + " even for the VectorUDFAdaptor");
+ }
+ }
}
}
} else if (exprDesc instanceof ExprNodeConstantDesc) {
@@ -778,6 +867,64 @@ public class VectorizationContext {
return genericUdf;
}
+ /* Return true if this is one of a small set of functions for which
+ * it is significantly easier to use the old code path in vectorized
+ * mode instead of implementing a new, optimized VectorExpression.
+ *
+ * Depending on performance requirements and frequency of use, these
+ * may be implemented in the future with an optimized VectorExpression.
+ */
+ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
+ VectorExpressionDescriptor.Mode mode) {
+ GenericUDF gudf = expr.getGenericUDF();
+ if (gudf instanceof GenericUDFBridge) {
+ GenericUDFBridge bridge = (GenericUDFBridge) gudf;
+ Class<? extends UDF> udfClass = bridge.getUdfClass();
+ if (udfClass.equals(UDFHex.class)
+ || udfClass.equals(UDFRegExpExtract.class)
+ || udfClass.equals(UDFRegExpReplace.class)
+ || udfClass.equals(UDFConv.class)
+ || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr))
+ || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
+ || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
+ || udfClass.equals(UDFToString.class) &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
+ }
+ } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr)))
+
+ /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because
+ * of their complexity and generality. In the future, variations of these
+ * can be optimized to run faster for the vectorized code path. For example,
+ * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
+ * is an example of a GenericUDFCase that has all constant arguments
+ * except for the first argument. This is probably a common case and a
+ * good candidate for a fast, special-purpose VectorExpression. Then
+ * the UDF Adaptor code path could be used as a catch-all for
+ * non-optimized general cases.
+ */
+ || gudf instanceof GenericUDFCase
+ || gudf instanceof GenericUDFWhen) {
+ return true;
+ } else if (gudf instanceof GenericUDFToChar &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
+ } else if (gudf instanceof GenericUDFToVarchar &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
+ } else if (gudf instanceof GenericUDFBetween && (mode == VectorExpressionDescriptor.Mode.PROJECTION)) {
+ // between has 4 args here, but can be vectorized like this
+ return true;
+ }
+ return false;
+ }
+
public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
return udfClass.equals(UDFToByte.class)
|| udfClass.equals(UDFToShort.class)
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 9802afc..b760988 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1293,7 +1293,7 @@ public class Vectorizer implements PhysicalPlanResolver {
if (op.getParentOperators().size() == 0) {
LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.allColumnNames.toString());
- vContext = new VectorizationContext("__Reduce_Shuffle__", vectorTaskColumnInfo.allColumnNames);
+ vContext = new VectorizationContext("__Reduce_Shuffle__", vectorTaskColumnInfo.allColumnNames, hiveConf);
taskVectorizationContext = vContext;
saveRootVectorOp = true;
@@ -1332,8 +1332,8 @@ public class Vectorizer implements PhysicalPlanResolver {
}
private static class ValidatorVectorizationContext extends VectorizationContext {
- private ValidatorVectorizationContext() {
- super("No Name");
+ private ValidatorVectorizationContext(HiveConf hiveConf) {
+ super("No Name", hiveConf);
}
@Override
@@ -1851,7 +1851,7 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
try {
- VectorizationContext vc = new ValidatorVectorizationContext();
+ VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
if (vc.getVectorExpression(desc, mode) == null) {
// TODO: this cannot happen - VectorizationContext throws in such cases.
LOG.info("getVectorExpression returned null");
@@ -1905,7 +1905,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
// See if we can vectorize the aggregation.
- VectorizationContext vc = new ValidatorVectorizationContext();
+ VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
VectorAggregateExpression vectorAggrExpr;
try {
vectorAggrExpr = vc.getAggregatorExpression(aggDesc);
@@ -1944,7 +1944,8 @@ public class Vectorizer implements PhysicalPlanResolver {
private VectorizationContext getVectorizationContext(String contextName,
VectorTaskColumnInfo vectorTaskColumnInfo) {
- VectorizationContext vContext = new VectorizationContext(contextName, vectorTaskColumnInfo.allColumnNames);
+ VectorizationContext vContext =
+ new VectorizationContext(contextName, vectorTaskColumnInfo.allColumnNames, hiveConf);
return vContext;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q b/ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q
new file mode 100644
index 0000000..cef4e4c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_adaptor_usage_mode.q
@@ -0,0 +1,177 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+
+-- SORT_QUERY_RESULTS
+
+drop table varchar_udf_1;
+
+create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC;
+insert overwrite table varchar_udf_1
+ select key, value, key, value from src where key = '238' limit 1;
+
+DROP TABLE IF EXISTS DECIMAL_UDF_txt;
+DROP TABLE IF EXISTS DECIMAL_UDF;
+
+CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt;
+
+CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int)
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt;
+
+drop table if exists count_case_groupby;
+
+create table count_case_groupby (key string, bool boolean) STORED AS orc;
+insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL);
+
+set hive.vectorized.adaptor.usage.mode=none;
+
+explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1;
+
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1;
+
+explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1;
+
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1;
+
+explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1;
+
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1;
+
+
+set hive.vectorized.adaptor.usage.mode=chosen;
+
+explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1;
+
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1;
+
+explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1;
+
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1;
+
+explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1;
+
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1;
+
+
+set hive.vectorized.adaptor.usage.mode=none;
+
+EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF;
+
+SELECT POWER(key, 2) FROM DECIMAL_UDF;
+
+EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10;
+
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10;
+
+set hive.vectorized.adaptor.usage.mode=chosen;
+
+EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF;
+
+SELECT POWER(key, 2) FROM DECIMAL_UDF;
+
+EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10;
+
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10;
+
+
+set hive.vectorized.adaptor.usage.mode=none;
+
+explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key;
+
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key;
+
+set hive.vectorized.adaptor.usage.mode=chosen;
+
+explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key;
+
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key;
+
+
+drop table varchar_udf_1;
+
+DROP TABLE DECIMAL_UDF_txt;
+DROP TABLE DECIMAL_UDF;
+
+drop table count_case_groupby;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/4941c17d/ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out b/ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out
new file mode 100644
index 0000000..bacb3bb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_adaptor_usage_mode.q.out
@@ -0,0 +1,933 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table varchar_udf_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table varchar_udf_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchar_udf_1
+PREHOOK: query: insert overwrite table varchar_udf_1
+ select key, value, key, value from src where key = '238' limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: insert overwrite table varchar_udf_1
+ select key, value, key, value from src where key = '238' limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@varchar_udf_1
+POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF_txt
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS DECIMAL_UDF
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@DECIMAL_UDF_txt
+POSTHOOK: query: CREATE TABLE DECIMAL_UDF_txt (key decimal(20,10), value int)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@DECIMAL_UDF_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@decimal_udf_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@decimal_udf_txt
+PREHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@DECIMAL_UDF
+POSTHOOK: query: CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@DECIMAL_UDF
+PREHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf_txt
+PREHOOK: Output: default@decimal_udf
+POSTHOOK: query: INSERT OVERWRITE TABLE DECIMAL_UDF SELECT * FROM DECIMAL_UDF_txt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf_txt
+POSTHOOK: Output: default@decimal_udf
+POSTHOOK: Lineage: decimal_udf.key SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ]
+POSTHOOK: Lineage: decimal_udf.value SIMPLE [(decimal_udf_txt)decimal_udf_txt.FieldSchema(name:value, type:int, comment:null), ]
+PREHOOK: query: drop table if exists count_case_groupby
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists count_case_groupby
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@count_case_groupby
+POSTHOOK: query: create table count_case_groupby (key string, bool boolean) STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@count_case_groupby
+PREHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@count_case_groupby
+POSTHOOK: query: insert into table count_case_groupby values ('key1', true),('key2', false),('key3', NULL),('key4', false),('key5',NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@count_case_groupby
+POSTHOOK: Lineage: count_case_groupby.bool EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: count_case_groupby.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+true true true
+PREHOOK: query: explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+238 238 true
+PREHOOK: query: explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+replaced_238 replaced_238 true
+PREHOOK: query: explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ c2 regexp 'val',
+ c4 regexp 'val',
+ (c2 regexp 'val') = (c4 regexp 'val')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+true true true
+PREHOOK: query: explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ regexp_extract(c2, 'val_([0-9]+)', 1),
+ regexp_extract(c4, 'val_([0-9]+)', 1),
+ regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+238 238 true
+PREHOOK: query: explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ regexp_replace(c2, 'val', 'replaced'),
+ regexp_replace(c4, 'val', 'replaced'),
+ regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from varchar_udf_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_1
+#### A masked pattern was here ####
+replaced_238 replaced_238 true
+PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: decimal_udf
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: power(key, 2) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+0.0
+0.0
+0.0
+0.010000000000000002
+0.04000000000000001
+0.09
+0.09
+0.10890000000000001
+0.10890000000000001
+0.11088900000000002
+0.11088900000000002
+1.0
+1.0
+1.0
+1.0E-4
+1.2544000000000002
+1.2544000000000002
+1.2544000000000002
+1.2588840000000003
+1.2588840000000003
+1.52415787532388352E18
+1.52415787532388352E18
+1.936E7
+100.0
+10000.0
+15376.0
+15675.04
+1576255.1401
+4.0
+4.0
+4.0E-4
+400.0
+40000.0
+9.8596
+9.8596
+9.8596
+9.8596
+NULL
+PREHOOK: query: EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: decimal_udf
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 10) (type: boolean)
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795
+PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: decimal_udf
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: power(key, 2) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+0.0
+0.0
+0.0
+0.010000000000000002
+0.04000000000000001
+0.09
+0.09
+0.10890000000000001
+0.10890000000000001
+0.11088900000000002
+0.11088900000000002
+1.0
+1.0
+1.0
+1.0E-4
+1.2544000000000002
+1.2544000000000002
+1.2544000000000002
+1.2588840000000003
+1.2588840000000003
+1.52415787532388352E18
+1.52415787532388352E18
+1.936E7
+100.0
+10000.0
+15376.0
+15675.04
+1576255.1401
+4.0
+4.0
+4.0E-4
+400.0
+40000.0
+9.8596
+9.8596
+9.8596
+9.8596
+NULL
+PREHOOK: query: EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: decimal_udf
+ Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 10) (type: boolean)
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ exp(key), ln(key),
+ log(key), log(key, key), log(key, value), log(value, key),
+ log10(key), sqrt(key)
+FROM DECIMAL_UDF WHERE key = 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_udf
+#### A masked pattern was here ####
+22026.465794806718 2.302585092994046 2.302585092994046 1.0 1.0 1.0 1.0 3.1622776601683795
+PREHOOK: query: explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: count_case_groupby
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@count_case_groupby
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@count_case_groupby
+#### A masked pattern was here ####
+key1 1
+key2 1
+key3 0
+key4 1
+key5 0
+PREHOOK: query: explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: count_case_groupby
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@count_case_groupby
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, COUNT(CASE WHEN bool THEN 1 WHEN NOT bool THEN 0 ELSE NULL END) AS cnt_bool0_ok FROM count_case_groupby GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@count_case_groupby
+#### A masked pattern was here ####
+key1 1
+key2 1
+key3 0
+key4 1
+key5 0
+PREHOOK: query: drop table varchar_udf_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_udf_1
+PREHOOK: Output: default@varchar_udf_1
+POSTHOOK: query: drop table varchar_udf_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_udf_1
+POSTHOOK: Output: default@varchar_udf_1
+PREHOOK: query: DROP TABLE DECIMAL_UDF_txt
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_udf_txt
+PREHOOK: Output: default@decimal_udf_txt
+POSTHOOK: query: DROP TABLE DECIMAL_UDF_txt
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_udf_txt
+POSTHOOK: Output: default@decimal_udf_txt
+PREHOOK: query: DROP TABLE DECIMAL_UDF
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_udf
+PREHOOK: Output: default@decimal_udf
+POSTHOOK: query: DROP TABLE DECIMAL_UDF
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_udf
+POSTHOOK: Output: default@decimal_udf
+PREHOOK: query: drop table count_case_groupby
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@count_case_groupby
+PREHOOK: Output: default@count_case_groupby
+POSTHOOK: query: drop table count_case_groupby
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@count_case_groupby
+POSTHOOK: Output: default@count_case_groupby