You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/01/26 19:13:23 UTC
[34/34] hive git commit: HIVE-18517: Vectorization: Fix
VectorMapOperator to accept VRBs and check vectorized flag correctly to
support LLAP Caching (Matt McCline, review by Gopal Vijayaraghavan)
HIVE-18517: Vectorization: Fix VectorMapOperator to accept VRBs and check vectorized flag correctly to support LLAP Caching (Matt McCline, review by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/255cf4ab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/255cf4ab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/255cf4ab
Branch: refs/heads/master
Commit: 255cf4ab22295996262ac39af703c2747b98dbf3
Parents: 8de3334
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Jan 26 13:12:40 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Jan 26 13:12:40 2018 -0600
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 29 +
.../hive/llap/io/api/impl/LlapInputFormat.java | 2 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 14 +-
.../hive/ql/exec/vector/VectorMapOperator.java | 112 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 2 +-
.../hadoop/hive/ql/io/NullRowsInputFormat.java | 2 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 +-
.../ql/io/parquet/MapredParquetInputFormat.java | 2 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 74 +-
.../org/apache/hadoop/hive/ql/plan/MapWork.java | 19 +-
.../hive/ql/plan/VectorPartitionDesc.java | 9 +-
.../schema_evol_orc_acid_part_llap_io.q | 320 ++++
.../schema_evol_orc_acid_part_update_llap_io.q | 161 ++
.../schema_evol_orc_acid_table_llap_io.q | 318 +++
.../schema_evol_orc_acid_table_update_llap_io.q | 113 ++
.../schema_evol_orc_acidvec_part_llap_io.q | 359 ++++
...chema_evol_orc_acidvec_part_update_llap_io.q | 119 ++
.../schema_evol_orc_acidvec_table_llap_io.q | 357 ++++
...hema_evol_orc_acidvec_table_update_llap_io.q | 113 ++
...a_evol_orc_nonvec_part_all_complex_llap_io.q | 164 ++
...evol_orc_nonvec_part_all_primitive_llap_io.q | 207 ++
.../schema_evol_orc_nonvec_part_llap_io.q | 338 ++++
.../schema_evol_orc_nonvec_table_llap_io.q | 325 ++++
...hema_evol_orc_vec_part_all_complex_llap_io.q | 164 ++
...ma_evol_orc_vec_part_all_primitive_llap_io.q | 207 ++
.../schema_evol_orc_vec_part_llap_io.q | 339 ++++
.../schema_evol_orc_vec_table_llap_io.q | 324 ++++
..._evol_text_nonvec_part_all_complex_llap_io.q | 164 ++
...vol_text_nonvec_part_all_primitive_llap_io.q | 222 +++
.../schema_evol_text_nonvec_part_llap_io.q | 338 ++++
.../schema_evol_text_nonvec_table_llap_io.q | 324 ++++
...ema_evol_text_vec_part_all_complex_llap_io.q | 174 ++
...a_evol_text_vec_part_all_primitive_llap_io.q | 223 +++
.../schema_evol_text_vec_part_llap_io.q | 374 ++++
.../schema_evol_text_vec_table_llap_io.q | 360 ++++
..._evol_text_vecrow_part_all_complex_llap_io.q | 175 ++
...vol_text_vecrow_part_all_primitive_llap_io.q | 223 +++
.../schema_evol_text_vecrow_part_llap_io.q | 374 ++++
.../schema_evol_text_vecrow_table_llap_io.q | 363 ++++
.../queries/clientpositive/vector_llap_text_1.q | 45 +
.../schema_evol_orc_acid_part_llap_io.q.out | 1080 +++++++++++
...hema_evol_orc_acid_part_update_llap_io.q.out | 1681 ++++++++++++++++
.../schema_evol_orc_acid_table_llap_io.q.out | 1077 +++++++++++
...ema_evol_orc_acid_table_update_llap_io.q.out | 449 +++++
.../schema_evol_orc_acidvec_part_llap_io.q.out | 1811 ++++++++++++++++++
...a_evol_orc_acidvec_part_update_llap_io.q.out | 537 ++++++
.../schema_evol_orc_acidvec_table_llap_io.q.out | 1794 +++++++++++++++++
..._evol_orc_acidvec_table_update_llap_io.q.out | 449 +++++
...ol_orc_nonvec_part_all_complex_llap_io.q.out | 687 +++++++
..._orc_nonvec_part_all_primitive_llap_io.q.out | 1042 ++++++++++
.../schema_evol_orc_nonvec_part_llap_io.q.out | 1474 ++++++++++++++
.../schema_evol_orc_nonvec_table_llap_io.q.out | 1291 +++++++++++++
..._evol_orc_vec_part_all_complex_llap_io.q.out | 765 ++++++++
...vol_orc_vec_part_all_primitive_llap_io.q.out | 1172 ++++++++++++
.../llap/schema_evol_orc_vec_part_llap_io.q.out | 1708 +++++++++++++++++
.../schema_evol_orc_vec_table_llap_io.q.out | 1416 ++++++++++++++
...l_text_nonvec_part_all_complex_llap_io.q.out | 687 +++++++
...text_nonvec_part_all_primitive_llap_io.q.out | 644 +++++++
.../schema_evol_text_nonvec_part_llap_io.q.out | 1474 ++++++++++++++
.../schema_evol_text_nonvec_table_llap_io.q.out | 1291 +++++++++++++
...evol_text_vec_part_all_complex_llap_io.q.out | 388 ++++
...ol_text_vec_part_all_primitive_llap_io.q.out | 779 ++++++++
.../schema_evol_text_vec_part_llap_io.q.out | 1204 ++++++++++++
.../schema_evol_text_vec_table_llap_io.q.out | 1185 ++++++++++++
...l_text_vecrow_part_all_complex_llap_io.q.out | 385 ++++
...text_vecrow_part_all_primitive_llap_io.q.out | 774 ++++++++
.../schema_evol_text_vecrow_part_llap_io.q.out | 1195 ++++++++++++
.../schema_evol_text_vecrow_table_llap_io.q.out | 1176 ++++++++++++
.../llap/vector_llap_text_1.q.out | 351 ++++
.../clientpositive/parquet_no_row_serde.q.out | 2 +-
.../vectorization_input_format_excludes.q.out | 24 +-
71 files changed, 37466 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1362079..80e6aee 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -598,34 +598,62 @@ minillaplocal.query.files=\
quotedid_smb.q,\
resourceplan.q,\
sample10.q,\
+ schema_evol_orc_acid_part_llap_io.q,\
schema_evol_orc_acid_part.q,\
+ schema_evol_orc_acid_part_update_llap_io.q,\
schema_evol_orc_acid_part_update.q,\
+ schema_evol_orc_acid_table_llap_io.q,\
schema_evol_orc_acid_table.q,\
+ schema_evol_orc_acid_table_update_llap_io.q,\
schema_evol_orc_acid_table_update.q,\
+ schema_evol_orc_acidvec_part_llap_io.q,\
schema_evol_orc_acidvec_part.q,\
+ schema_evol_orc_acidvec_part_update_llap_io.q,\
schema_evol_orc_acidvec_part_update.q,\
+ schema_evol_orc_acidvec_table_llap_io.q,\
schema_evol_orc_acidvec_table.q,\
+ schema_evol_orc_acidvec_table_update_llap_io.q,\
schema_evol_orc_acidvec_table_update.q,\
+ schema_evol_orc_nonvec_part_llap_io.q,\
schema_evol_orc_nonvec_part.q,\
+ schema_evol_orc_nonvec_part_all_complex_llap_io.q,\
schema_evol_orc_nonvec_part_all_complex.q,\
+ schema_evol_orc_nonvec_part_all_primitive_llap_io.q,\
schema_evol_orc_nonvec_part_all_primitive.q,\
+ schema_evol_orc_nonvec_table_llap_io.q,\
schema_evol_orc_nonvec_table.q,\
+ schema_evol_orc_vec_part_llap_io.q,\
schema_evol_orc_vec_part.q,\
+ schema_evol_orc_vec_part_all_complex_llap_io.q,\
schema_evol_orc_vec_part_all_complex.q,\
+ schema_evol_orc_vec_part_all_primitive_llap_io.q,\
schema_evol_orc_vec_part_all_primitive.q,\
+ schema_evol_orc_vec_table_llap_io.q,\
schema_evol_orc_vec_table.q,\
schema_evol_stats.q,\
+ schema_evol_text_nonvec_part_llap_io.q,\
schema_evol_text_nonvec_part.q,\
+ schema_evol_text_nonvec_part_all_complex_llap_io.q,\
schema_evol_text_nonvec_part_all_complex.q,\
+ schema_evol_text_nonvec_part_all_primitive_llap_io.q,\
schema_evol_text_nonvec_part_all_primitive.q,\
+ schema_evol_text_nonvec_table_llap_io.q,\
schema_evol_text_nonvec_table.q,\
+ schema_evol_text_vec_part_llap_io.q,\
schema_evol_text_vec_part.q,\
+ schema_evol_text_vec_part_all_complex_llap_io.q,\
schema_evol_text_vec_part_all_complex.q,\
+ schema_evol_text_vec_part_all_primitive_llap_io.q,\
schema_evol_text_vec_part_all_primitive.q,\
+ schema_evol_text_vec_table_llap_io.q,\
schema_evol_text_vec_table.q,\
+ schema_evol_text_vecrow_part_llap_io.q,\
schema_evol_text_vecrow_part.q,\
+ schema_evol_text_vecrow_part_all_complex_llap_io.q,\
schema_evol_text_vecrow_part_all_complex.q,\
+ schema_evol_text_vecrow_part_all_primitive_llap_io.q,\
schema_evol_text_vecrow_part_all_primitive.q,\
+ schema_evol_text_vecrow_table_llap_io.q,\
schema_evol_text_vecrow_table.q,\
selectDistinctStar.q,\
semijoin.q,\
@@ -698,6 +726,7 @@ minillaplocal.query.files=\
vector_join30.q,\
vector_join_filters.q,\
vector_leftsemi_mapjoin.q,\
+ vector_llap_text_1.q,\
vector_mapjoin_reduce.q,\
vector_number_compare_projection.q,\
vector_partitioned_date_time.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 4336220..bb319f0 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -110,7 +110,7 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
}
// For non-vectorized operator case, wrap the reader if possible.
RecordReader<NullWritable, VectorizedRowBatch> result = rr;
- if (!Utilities.getUseVectorizedInputFileFormat(job)) {
+ if (!Utilities.getIsVectorized(job)) {
result = wrapLlapReader(includedCols, rr, split);
if (result == null) {
// Cannot wrap a reader for non-vectorized pipeline.
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 2e1fd37..d7b3e4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -3684,12 +3684,12 @@ public final class Utilities {
/**
* Returns true if a plan is both configured for vectorized execution
- * and the node is vectorized and the Input File Format is marked VectorizedInputFileFormat.
+ * and the node is vectorized.
*
* The plan may be configured for vectorization
* but vectorization disallowed eg. for FetchOperator execution.
*/
- public static boolean getUseVectorizedInputFileFormat(Configuration conf) {
+ public static boolean getIsVectorized(Configuration conf) {
if (conf.get(VECTOR_MODE) != null) {
// this code path is necessary, because with HS2 and client
// side split generation we end up not finding the map work.
@@ -3697,13 +3697,12 @@ public final class Utilities {
// generation is multi-threaded - HS2 plan cache uses thread
// locals).
return
- conf.getBoolean(VECTOR_MODE, false) &&
- conf.getBoolean(USE_VECTORIZED_INPUT_FILE_FORMAT, false);
+ conf.getBoolean(VECTOR_MODE, false);
} else {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) &&
Utilities.getPlanPath(conf) != null) {
MapWork mapWork = Utilities.getMapWork(conf);
- return (mapWork.getVectorMode() && mapWork.getUseVectorizedInputFileFormat());
+ return mapWork.getVectorMode();
} else {
return false;
}
@@ -3711,10 +3710,9 @@ public final class Utilities {
}
- public static boolean getUseVectorizedInputFileFormat(Configuration conf, MapWork mapWork) {
+ public static boolean getIsVectorized(Configuration conf, MapWork mapWork) {
return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) &&
- mapWork.getVectorMode() &&
- mapWork.getUseVectorizedInputFileFormat();
+ mapWork.getVectorMode();
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index cd12a0b..6f1346d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -620,6 +620,25 @@ public class VectorMapOperator extends AbstractMapOperator {
}
/*
+ * Flush a partially full deserializerBatch.
+ * @return Return true if the operator tree is not done yet.
+ */
+ private boolean flushDeserializerBatch() throws HiveException {
+ if (deserializerBatch.size > 0) {
+
+ batchCounter++;
+ oneRootOperator.process(deserializerBatch, 0);
+ deserializerBatch.reset();
+ if (oneRootOperator.getDone()) {
+ setDone(true);
+ return false;
+ }
+
+ }
+ return true;
+ }
+
+ /*
* Setup the context for reading from the next partition file.
*/
private void setupPartitionContextVars(String nominalPath) throws HiveException {
@@ -672,20 +691,14 @@ public class VectorMapOperator extends AbstractMapOperator {
currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE ||
currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
- if (deserializerBatch.size > 0) {
-
- /*
- * Clear out any rows in the batch from previous partition since we are going to change
- * the repeating partition column values.
- */
- batchCounter++;
- oneRootOperator.process(deserializerBatch, 0);
- deserializerBatch.reset();
- if (oneRootOperator.getDone()) {
- setDone(true);
- return;
- }
+ /*
+ * Clear out any rows in the batch from previous partition since we are going to change
+ * the repeating partition column values.
+ */
+ if (!flushDeserializerBatch()) {
+ // Operator tree is now done.
+ return;
}
/*
@@ -773,6 +786,38 @@ public class VectorMapOperator extends AbstractMapOperator {
return null;
}
+ /*
+ * Deliver a vector batch to the operator tree.
+ *
+ * The Vectorized Input File Format reader has already set the partition column
+ * values, reset and filled in the batch, etc.
+ *
+ * We pass the VectorizedRowBatch through here.
+ *
+ * @return Return true if the operator tree is not done yet.
+ */
+ private boolean deliverVectorizedRowBatch(Writable value) throws HiveException {
+
+ batchCounter++;
+ if (value != null) {
+ VectorizedRowBatch batch = (VectorizedRowBatch) value;
+ numRows += batch.size;
+ if (hasRowIdentifier) {
+ if (batchContext.getRecordIdColumnVector() == null) {
+ setRowIdentiferToNull(batch);
+ } else {
+ batch.cols[rowIdentifierColumnNum] = batchContext.getRecordIdColumnVector();
+ }
+ }
+ }
+ oneRootOperator.process(value, 0);
+ if (oneRootOperator.getDone()) {
+ setDone(true);
+ return false;
+ }
+ return true;
+ }
+
@Override
public void process(Writable value) throws HiveException {
@@ -798,30 +843,33 @@ public class VectorMapOperator extends AbstractMapOperator {
try {
if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
+ if (!deliverVectorizedRowBatch(value)) {
+
+ // Operator tree is now done.
+ return;
+ }
+
+ } else if (value instanceof VectorizedRowBatch) {
+
/*
- * The Vectorized Input File Format reader has already set the partition column
- * values, reset and filled in the batch, etc.
- *
- * We pass the VectorizedRowBatch through here.
+ * This case can happen with LLAP. If it is able to deserialize and cache data from the
+ * input format, it will deliver that cached data to us as VRBs.
*/
- batchCounter++;
- if (value != null) {
- VectorizedRowBatch batch = (VectorizedRowBatch) value;
- numRows += batch.size;
- if (hasRowIdentifier) {
- if (batchContext.getRecordIdColumnVector() == null) {
- setRowIdentiferToNull(batch);
- } else {
- batch.cols[rowIdentifierColumnNum] = batchContext.getRecordIdColumnVector();
- }
- }
- }
- oneRootOperator.process(value, 0);
- if (oneRootOperator.getDone()) {
- setDone(true);
+
+ /*
+ * Clear out any rows we may have processed in row-mode for the current partition..
+ */
+ if (!flushDeserializerBatch()) {
+
+ // Operator tree is now done.
return;
}
+ if (!deliverVectorizedRowBatch(value)) {
+
+ // Operator tree is now done.
+ return;
+ }
} else {
/*
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 2687d33..7d3ff36 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -222,7 +222,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
String ifName = inputFormat.getClass().getCanonicalName();
boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface;
boolean isCacheOnly = inputFormat instanceof LlapCacheOnlyInputFormatInterface;
- boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(conf);
+ boolean isVectorized = Utilities.getIsVectorized(conf);
if (!isVectorized) {
// Pretend it's vectorized if the non-vector wrapped is enabled.
isVectorized = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED)
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java
index ce5dae0..6a372a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java
@@ -71,7 +71,7 @@ public class NullRowsInputFormat implements InputFormat<NullWritable, NullWritab
private boolean addPartitionCols = true;
public NullRowsRecordReader(Configuration conf, InputSplit split) throws IOException {
- boolean isVectorMode = Utilities.getUseVectorizedInputFileFormat(conf);
+ boolean isVectorMode = Utilities.getIsVectorized(conf);
if (LOG.isDebugEnabled()) {
LOG.debug(getClass().getSimpleName() + " in "
+ (isVectorMode ? "" : "non-") + "vector mode");
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index e92bcb4..ff2cc04 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -506,7 +506,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
List<FileStatus> files
) throws IOException {
- if (Utilities.getUseVectorizedInputFileFormat(conf)) {
+ if (Utilities.getIsVectorized(conf)) {
return new VectorizedOrcInputFormat().validateInput(fs, conf, files);
}
@@ -1890,7 +1890,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
getRecordReader(InputSplit inputSplit, JobConf conf,
Reporter reporter) throws IOException {
//CombineHiveInputFormat may produce FileSplit that is not OrcSplit
- boolean vectorMode = Utilities.getUseVectorizedInputFileFormat(conf);
+ boolean vectorMode = Utilities.getIsVectorized(conf);
boolean isAcidRead = isAcidRead(conf, inputSplit);
if (!isAcidRead) {
if (vectorMode) {
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
index ab8c0ca..10e6a18 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
@@ -74,7 +74,7 @@ public class MapredParquetInputFormat extends FileInputFormat<NullWritable, Arra
final org.apache.hadoop.mapred.Reporter reporter
) throws IOException {
try {
- if (Utilities.getUseVectorizedInputFileFormat(job)) {
+ if (Utilities.getIsVectorized(job)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Using vectorized record reader");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 215aaad..99618c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -35,6 +35,7 @@ import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.Stack;
+import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
@@ -1111,6 +1112,25 @@ public class Vectorizer implements PhysicalPlanResolver {
}
/*
+ * Add a vector partition descriptor to partition descriptor, removing duplicate object.
+ *
+ * If the same vector partition descriptor has already been allocated, share that object.
+ */
+ private void addVectorPartitionDesc(PartitionDesc pd, VectorPartitionDesc vpd,
+ Map<VectorPartitionDesc, VectorPartitionDesc> vectorPartitionDescMap) {
+
+ VectorPartitionDesc existingEle = vectorPartitionDescMap.get(vpd);
+ if (existingEle != null) {
+
+ // Use the object we already have.
+ vpd = existingEle;
+ } else {
+ vectorPartitionDescMap.put(vpd, vpd);
+ }
+ pd.setVectorPartitionDesc(vpd);
+ }
+
+ /*
* There are 3 modes of reading for vectorization:
*
* 1) One for the Vectorized Input File Format which returns VectorizedRowBatch as the row.
@@ -1127,6 +1147,7 @@ public class Vectorizer implements PhysicalPlanResolver {
private boolean verifyAndSetVectorPartDesc(
PartitionDesc pd, boolean isAcidTable,
Set<String> inputFileFormatClassNameSet,
+ Map<VectorPartitionDesc, VectorPartitionDesc> vectorPartitionDescMap,
Set<String> enabledConditionsMetSet, ArrayList<String> enabledConditionsNotMetList,
Set<Support> newSupportSet) {
@@ -1154,9 +1175,11 @@ public class Vectorizer implements PhysicalPlanResolver {
addVectorizedInputFileFormatSupport(
newSupportSet, isInputFileFormatVectorized, inputFileFormatClass);
- pd.setVectorPartitionDesc(
+ addVectorPartitionDesc(
+ pd,
VectorPartitionDesc.createVectorizedInputFileFormat(
- inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd)));
+ inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd)),
+ vectorPartitionDescMap);
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname);
return true;
@@ -1173,9 +1196,11 @@ public class Vectorizer implements PhysicalPlanResolver {
addVectorizedInputFileFormatSupport(
newSupportSet, isInputFileFormatVectorized, inputFileFormatClass);
- pd.setVectorPartitionDesc(
+ addVectorPartitionDesc(
+ pd,
VectorPartitionDesc.createVectorizedInputFileFormat(
- inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd)));
+ inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd)),
+ vectorPartitionDescMap);
enabledConditionsMetSet.add(
HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname);
@@ -1239,18 +1264,22 @@ public class Vectorizer implements PhysicalPlanResolver {
// Add the support for read variations in Vectorized Text.
newSupportSet.addAll(vectorDeserializeTextSupportSet);
- pd.setVectorPartitionDesc(
+ addVectorPartitionDesc(
+ pd,
VectorPartitionDesc.createVectorDeserialize(
- inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE));
+ inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE),
+ vectorPartitionDescMap);
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname);
return true;
}
} else if (isSequenceFormat) {
- pd.setVectorPartitionDesc(
+ addVectorPartitionDesc(
+ pd,
VectorPartitionDesc.createVectorDeserialize(
- inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY));
+ inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY),
+ vectorPartitionDescMap);
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname);
return true;
@@ -1262,15 +1291,27 @@ public class Vectorizer implements PhysicalPlanResolver {
// inspect-able Object[] row to a VectorizedRowBatch in the VectorMapOperator.
if (useRowDeserialize) {
- if (!isInputFormatExcluded(inputFileFormatClassName, rowDeserializeInputFormatExcludes)) {
- pd.setVectorPartitionDesc(
+ boolean isRowDeserializeExcluded =
+ isInputFormatExcluded(inputFileFormatClassName, rowDeserializeInputFormatExcludes);
+ if (!isRowDeserializeExcluded && !isInputFileFormatVectorized) {
+ addVectorPartitionDesc(
+ pd,
VectorPartitionDesc.createRowDeserialize(
inputFileFormatClassName,
Utilities.isInputFileFormatSelfDescribing(pd),
- deserializerClassName));
-
+ deserializerClassName),
+ vectorPartitionDescMap);
+
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname);
return true;
+ } else if (isInputFileFormatVectorized) {
+
+ /*
+ * Vectorizer does not vectorize in row deserialize mode if the input format has
+ * VectorizedInputFormat so input formats will be clear if the isVectorized flag
+ * is on, they are doing VRB work.
+ */
+ enabledConditionsNotMetList.add("Row deserialization of vectorized input format not supported");
} else {
enabledConditionsNotMetList.add(ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname
+ " IS true AND " + ConfVars.HIVE_VECTORIZATION_ROW_DESERIALIZE_INPUTFORMAT_EXCLUDES.varname
@@ -1369,6 +1410,8 @@ public class Vectorizer implements PhysicalPlanResolver {
// Remember the input file formats we validated and why.
Set<String> inputFileFormatClassNameSet = new HashSet<String>();
+ Map<VectorPartitionDesc, VectorPartitionDesc> vectorPartitionDescMap =
+ new LinkedHashMap<VectorPartitionDesc, VectorPartitionDesc>();
Set<String> enabledConditionsMetSet = new HashSet<String>();
ArrayList<String> enabledConditionsNotMetList = new ArrayList<String>();
Set<Support> inputFormatSupportSet = new TreeSet<Support>();
@@ -1395,11 +1438,15 @@ public class Vectorizer implements PhysicalPlanResolver {
if (!verifyAndSetVectorPartDesc(
partDesc, isAcidTable,
inputFileFormatClassNameSet,
+ vectorPartitionDescMap,
enabledConditionsMetSet, enabledConditionsNotMetList,
newSupportSet)) {
// Always set these so EXPLAIN can see.
mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet);
+ ArrayList<VectorPartitionDesc> vectorPartitionDescList = new ArrayList<VectorPartitionDesc>();
+ vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet());
+ mapWork.setVectorPartitionDescList(vectorPartitionDescList);
mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet));
mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList);
@@ -1517,6 +1564,9 @@ public class Vectorizer implements PhysicalPlanResolver {
// Always set these so EXPLAIN can see.
mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet);
+ ArrayList<VectorPartitionDesc> vectorPartitionDescList = new ArrayList<VectorPartitionDesc>();
+ vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet());
+ mapWork.setVectorPartitionDescList(vectorPartitionDescList);
mapWork.setVectorizationEnabledConditionsMet(new ArrayList(enabledConditionsMetSet));
mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList);
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
index 92946f5..f2b2fc5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
@@ -149,6 +149,7 @@ public class MapWork extends BaseWork {
private VectorizerReason notEnabledInputFileFormatReason;
private Set<String> vectorizationInputFileFormatClassNameSet;
+ private List<VectorPartitionDesc> vectorPartitionDescList;
private List<String> vectorizationEnabledConditionsMet;
private List<String> vectorizationEnabledConditionsNotMet;
@@ -268,7 +269,7 @@ public class MapWork extends BaseWork {
boolean canWrapAny = false, doCheckIfs = false;
if (isLlapOn) {
// We can wrap inputs if the execution is vectorized, or if we use a wrapper.
- canWrapAny = Utilities.getUseVectorizedInputFileFormat(conf, this);
+ canWrapAny = Utilities.getIsVectorized(conf, this);
// ExecDriver has no plan path, so we cannot derive VRB stuff for the wrapper.
if (!canWrapAny && !isExecDriver) {
canWrapAny = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED);
@@ -792,6 +793,14 @@ public class MapWork extends BaseWork {
return vectorizationInputFileFormatClassNameSet;
}
+ public void setVectorPartitionDescList(List<VectorPartitionDesc> vectorPartitionDescList) {
+ this.vectorPartitionDescList = vectorPartitionDescList;
+ }
+
+ public List<VectorPartitionDesc> getVectorPartitionDescList() {
+ return vectorPartitionDescList;
+ }
+
public void setVectorizationEnabledConditionsMet(ArrayList<String> vectorizationEnabledConditionsMet) {
this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true);
}
@@ -822,6 +831,14 @@ public class MapWork extends BaseWork {
return mapWork.getVectorizationInputFileFormatClassNameSet();
}
+ /*
+ // Too many Q out file changes for the moment...
+ @Explain(vectorization = Vectorization.DETAIL, displayName = "vectorPartitionDescs", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+ public String vectorPartitionDescs() {
+ return mapWork.getVectorPartitionDescList().toString();
+ }
+ */
+
@Explain(vectorization = Vectorization.SUMMARY, displayName = "inputFormatFeatureSupport", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getInputFormatSupport() {
Set<Support> inputFormatSupportSet = mapWork.getInputFormatSupportSet();
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java
index 787d0c2..2c8904d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java
@@ -227,24 +227,25 @@ public class VectorPartitionDesc {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("vector map operator read type ");
+ sb.append("(");
sb.append(vectorMapOperatorReadType.name());
- sb.append(", input file format class name ");
+ sb.append(", ");
sb.append(inputFileFormatClassName);
switch (vectorMapOperatorReadType) {
case VECTORIZED_INPUT_FILE_FORMAT:
break;
case VECTOR_DESERIALIZE:
- sb.append(", deserialize type ");
+ sb.append(", ");
sb.append(vectorDeserializeType.name());
break;
case ROW_DESERIALIZE:
- sb.append(", deserializer class name ");
+ sb.append(", ");
sb.append(rowDeserializerClassName);
break;
default:
throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name());
}
+ sb.append(")");
return sb.toString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
new file mode 100644
index 0000000..a526f57
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
@@ -0,0 +1,320 @@
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.mapred.mode=nonstrict;
+set hive.cli.print.header=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.exec.schema.evolution=false;
+SET hive.vectorized.use.vectorized.input.format=true;
+SET hive.vectorized.use.vector.serde.deserialize=false;
+SET hive.vectorized.use.row.serde.deserialize=false;
+SET hive.vectorized.execution.enabled=false;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.metastore.disallow.incompatible.col.type.changes=true;
+set hive.default.fileformat=orc;
+set hive.llap.io.enabled=true;
+set hive.llap.io.encode.enabled=true;
+
+-- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
+-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
+-- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Instead just one explain vectorization only detail
+--
+
+CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data.txt' overwrite into table schema_evolution_data;
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE ADD COLUMNS
+--
+--
+-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT PERMUTE SELECT
+--
+--
+CREATE TABLE part_add_int_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_add_int_permute_select partition(part=1) VALUES (1, 1111, 'new');
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table part_add_int_permute_select add columns(c int);
+
+insert into table part_add_int_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333);
+
+-- SELECT permutation columns to make sure NULL defaulting works right
+select insert_num,part,a,b from part_add_int_permute_select;
+select insert_num,part,a,b,c from part_add_int_permute_select;
+select insert_num,part,c from part_add_int_permute_select;
+
+drop table part_add_int_permute_select;
+
+
+-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT, STRING, PERMUTE SELECT
+--
+--
+CREATE TABLE part_add_int_string_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_add_int_string_permute_select partition(part=1) VALUES (1, 1111, 'new');
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table part_add_int_string_permute_select add columns(c int, d string);
+
+insert into table part_add_int_string_permute_select partition(part=1) VALUES (2, 2222, 'new', 3333, '4444');
+
+explain vectorization only detail
+select insert_num,part,a,b,c,d from part_add_int_string_permute_select;
+
+-- SELECT permutation columns to make sure NULL defaulting works right
+select insert_num,part,a,b from part_add_int_string_permute_select;
+select insert_num,part,a,b,c from part_add_int_string_permute_select;
+select insert_num,part,a,b,c,d from part_add_int_string_permute_select;
+select insert_num,part,a,c,d from part_add_int_string_permute_select;
+select insert_num,part,a,d from part_add_int_string_permute_select;
+select insert_num,part,c from part_add_int_string_permute_select;
+select insert_num,part,d from part_add_int_string_permute_select;
+
+drop table part_add_int_string_permute_select;
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE: (STRING, CHAR, VARCHAR)
+--
+CREATE TABLE part_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) PARTITIONED BY(part INT) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double_str, double_str, double_str, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING);
+
+insert into table part_change_string_group_double partition(part=1) SELECT insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE insert_num = 111;
+
+select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
+
+drop table part_change_string_group_double;
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP: DATE,TIMESTAMP, (STRING, CHAR, CHAR trunc, VARCHAR, VARCHAR trunc)
+--
+CREATE TABLE part_change_date_group_string_group_date_timestamp(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) PARTITIONED BY(part INT) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_change_date_group_string_group_date_timestamp partition(part=1) SELECT insert_num, date1, date1, date1, date1, date1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_date_group_string_group_date_timestamp replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING);
+
+insert into table part_change_date_group_string_group_date_timestamp partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp;
+
+drop table part_change_date_group_string_group_date_timestamp;
+
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP:
+-- (TINYINT, SMALLINT, INT, BIGINT), STRING and
+-- (TINYINT, SMALLINT, INT, BIGINT), CHAR and CHAR trunc and
+-- (TINYINT, SMALLINT, INT, BIGINT), VARCHAR and VARCHAR trunc
+--
+--
+CREATE TABLE part_change_numeric_group_string_group_multi_ints_string_group(insert_num int,
+ c1 tinyint, c2 smallint, c3 int, c4 bigint,
+ c5 tinyint, c6 smallint, c7 int, c8 bigint, c9 tinyint, c10 smallint, c11 int, c12 bigint,
+ c13 tinyint, c14 smallint, c15 int, c16 bigint, c17 tinyint, c18 smallint, c19 int, c20 bigint,
+ b STRING) PARTITIONED BY(part INT);
+
+insert into table part_change_numeric_group_string_group_multi_ints_string_group partition(part=1) SELECT insert_num,
+ tinyint1, smallint1, int1, bigint1,
+ tinyint1, smallint1, int1, bigint1, tinyint1, smallint1, int1, bigint1,
+ tinyint1, smallint1, int1, bigint1, tinyint1, smallint1, int1, bigint1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int,
+ c1 STRING, c2 STRING, c3 STRING, c4 STRING,
+ c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 CHAR(50), c9 CHAR(5), c10 CHAR(5), c11 CHAR(5), c12 CHAR(5),
+ c13 VARCHAR(50), c14 VARCHAR(50), c15 VARCHAR(50), c16 VARCHAR(50), c17 VARCHAR(5), c18 VARCHAR(5), c19 VARCHAR(5), c20 VARCHAR(5),
+ b STRING) ;
+
+insert into table part_change_numeric_group_string_group_multi_ints_string_group partition(part=1) VALUES (111,
+ 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group;
+
+drop table part_change_numeric_group_string_group_multi_ints_string_group;
+
+
+
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP:
+-- (DECIMAL, FLOAT, DOUBLE), STRING and
+-- (DECIMAL, FLOAT, DOUBLE), CHAR and CHAR trunc and
+-- (DECIMAL, FLOAT, DOUBLE), VARCHAR and VARCHAR trunc
+--
+--
+CREATE TABLE part_change_numeric_group_string_group_floating_string_group(insert_num int,
+ c1 decimal(38,18), c2 float, c3 double,
+ c4 decimal(38,18), c5 float, c6 double, c7 decimal(38,18), c8 float, c9 double,
+ c10 decimal(38,18), c11 float, c12 double, c13 decimal(38,18), c14 float, c15 double,
+ b STRING) PARTITIONED BY(part INT);
+
+insert into table part_change_numeric_group_string_group_floating_string_group partition(part=1) SELECT insert_num,
+ decimal1, float1, double1,
+ decimal1, float1, double1, decimal1, float1, double1,
+ decimal1, float1, double1, decimal1, float1, double1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_numeric_group_string_group_floating_string_group replace columns (insert_num int,
+ c1 STRING, c2 STRING, c3 STRING,
+ c4 CHAR(50), c5 CHAR(50), c6 CHAR(50), c7 CHAR(7), c8 CHAR(7), c9 CHAR(7),
+ c10 VARCHAR(50), c11 VARCHAR(50), c12 VARCHAR(50), c13 VARCHAR(7), c14 VARCHAR(7), c15 VARCHAR(7),
+ b STRING);
+
+insert into table part_change_numeric_group_string_group_floating_string_group partition(part=1) VALUES (111,
+ 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group;
+
+drop table part_change_numeric_group_string_group_floating_string_group;
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP: STRING, (CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) and
+-- CHAR, (VARCHAR, VARCHAR trunc, STRING) and VARCHAR, (CHAR, CHAR trunc, STRING)
+--
+CREATE TABLE part_change_string_group_string_group_string(insert_num int,
+ c1 string, c2 string, c3 string, c4 string,
+ c5 CHAR(50), c6 CHAR(50), c7 CHAR(50),
+ c8 VARCHAR(50), c9 VARCHAR(50), c10 VARCHAR(50), b STRING) PARTITIONED BY(part INT) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_change_string_group_string_group_string partition(part=1) SELECT insert_num,
+ string2, string2, string2, string2,
+ string2, string2, string2,
+ string2, string2, string2,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,part,c1,c2,c3,c4,b from part_change_string_group_string_group_string;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_string_group_string_group_string replace columns (insert_num int,
+ c1 CHAR(50), c2 CHAR(9), c3 VARCHAR(50), c4 CHAR(9),
+ c5 VARCHAR(50), c6 VARCHAR(9), c7 STRING,
+ c8 CHAR(50), c9 CHAR(9), c10 STRING, b STRING) ;
+
+insert into table part_change_string_group_string_group_string partition(part=1) VALUES (111,
+ 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string;
+
+drop table part_change_string_group_string_group_string;
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP:
+-- TINYINT, (SMALLINT, INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- SMALLINT, (INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- INT, (BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- BIGINT, (DECIMAL, FLOAT, DOUBLE)
+--
+CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int,
+ c1 tinyint, c2 tinyint, c3 tinyint, c4 tinyint, c5 tinyint, c6 tinyint,
+ c7 smallint, c8 smallint, c9 smallint, c10 smallint, c11 smallint,
+ c12 int, c13 int, c14 int, c15 int,
+ c16 bigint, c17 bigint, c18 bigint,
+ b STRING) PARTITIONED BY(part INT) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint partition(part=1) SELECT insert_num,
+ tinyint1, tinyint1, tinyint1, tinyint1, tinyint1, tinyint1,
+ smallint1, smallint1, smallint1, smallint1, smallint1,
+ int1, int1, int1, int1,
+ bigint1, bigint1, bigint1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int,
+ c1 SMALLINT, c2 INT, c3 BIGINT, c4 decimal(38,18), c5 FLOAT, c6 DOUBLE,
+ c7 INT, c8 BIGINT, c9 decimal(38,18), c10 FLOAT, c11 DOUBLE,
+ c12 BIGINT, c13 decimal(38,18), c14 FLOAT, c15 DOUBLE,
+ c16 decimal(38,18), c17 FLOAT, c18 DOUBLE,
+ b STRING) ;
+
+insert into table part_change_lower_to_higher_numeric_group_tinyint_to_bigint partition(part=1) VALUES (111,
+ 7000, 80000, 90000000, 1234.5678, 9876.543, 789.321,
+ 80000, 90000000, 1234.5678, 9876.543, 789.321,
+ 90000000, 1234.5678, 9876.543, 789.321,
+ 1234.5678, 9876.543, 789.321,
+ 'new');
+
+select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+drop table part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+
+
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP:
+-- DECIMAL, (FLOAT, DOUBLE) and
+-- FLOAT, (DOUBLE)
+--
+CREATE TABLE part_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int,
+ c1 decimal(38,18), c2 decimal(38,18),
+ c3 float,
+ b STRING) PARTITIONED BY(part INT) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) SELECT insert_num,
+ decimal1, decimal1,
+ float1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) ;
+
+insert into table part_change_lower_to_higher_numeric_group_decimal_to_float partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
+
+select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float;
+
+drop table part_change_lower_to_higher_numeric_group_decimal_to_float;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
new file mode 100644
index 0000000..1866fc8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
@@ -0,0 +1,161 @@
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.mapred.mode=nonstrict;
+set hive.cli.print.header=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.exec.schema.evolution=false;
+SET hive.vectorized.use.vectorized.input.format=true;
+SET hive.vectorized.use.vector.serde.deserialize=false;
+SET hive.vectorized.use.row.serde.deserialize=false;
+SET hive.vectorized.execution.enabled=false;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.metastore.disallow.incompatible.col.type.changes=true;
+set hive.default.fileformat=orc;
+set hive.llap.io.enabled=true;
+set hive.llap.io.encode.enabled=true;
+
+-- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
+-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
+-- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+--
+
+CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data.txt' overwrite into table schema_evolution_data;
+
+CREATE TABLE schema_evolution_data_2(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data_2.txt' overwrite into table schema_evolution_data_2;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns
+---
+CREATE TABLE partitioned_update_1(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table partitioned_update_1 partition(part=1) SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table partitioned_update_1 add columns(c int, d string);
+
+insert into table partitioned_update_1 partition(part=2) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num <=110;
+
+insert into table partitioned_update_1 partition(part=1) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num > 110;
+
+select insert_num,part,a,b,c,d from partitioned_update_1;
+
+-- UPDATE New Columns
+update partitioned_update_1 set c=99;
+
+select insert_num,part,a,b,c,d from partitioned_update_1;
+
+alter table partitioned_update_1 partition(part=1) compact 'major';
+alter table partitioned_update_1 partition(part=2) compact 'major';
+
+select insert_num,part,a,b,c,d from partitioned_update_1;
+
+DROP TABLE partitioned_update_1;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column
+---
+CREATE TABLE partitioned_delete_1(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table partitioned_delete_1 partition(part=1) SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table partitioned_delete_1 add columns(c int, d string);
+
+insert into table partitioned_delete_1 partition(part=2) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num <=110;
+
+insert into table partitioned_delete_1 partition(part=1) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num > 110;
+
+select part,a,b,c,d from partitioned_delete_1;
+
+-- DELETE where old column
+delete from partitioned_delete_1 where insert_num = 102 or insert_num = 104 or insert_num = 106;
+
+select insert_num,part,a,b,c,d from partitioned_delete_1;
+
+alter table partitioned_delete_1 partition(part=1) compact 'major';
+alter table partitioned_delete_1 partition(part=2) compact 'major';
+
+select insert_num,part,a,b,c,d from partitioned_delete_1;
+
+DROP TABLE partitioned_delete_1;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column
+---
+CREATE TABLE partitioned_delete_2(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table partitioned_delete_2 partition(part=1) SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table partitioned_delete_2 add columns(c int, d string);
+
+insert into table partitioned_delete_2 partition(part=2) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num <=110;
+
+insert into table partitioned_delete_2 partition(part=1) SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num > 110;
+
+select insert_num,part,a,b,c,d from partitioned_delete_2;
+
+-- DELETE where new column
+delete from partitioned_delete_2 where insert_num = 108 or insert_num > 113;
+
+select insert_num,part,a,b,c,d from partitioned_delete_2;
+
+alter table partitioned_delete_2 partition(part=1) compact 'major';
+alter table partitioned_delete_2 partition(part=2) compact 'major';
+
+select insert_num,part,a,b,c,d from partitioned_delete_2;
+
+DROP TABLE partitioned_delete_2;
+
+--following tests is moved from system tests
+drop table if exists missing_ddl_2;
+create table missing_ddl_2(name string, age int);
+insert overwrite table missing_ddl_2 select value, key from srcbucket;
+alter table missing_ddl_2 add columns (gps double);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.optimize.sort.dynamic.partition=true;
+
+DROP TABLE IF EXISTS all100kjson_textfile_orc;
+CREATE TABLE all100kjson_textfile_orc (
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ s string,
+ bo boolean,
+ ts timestamp)
+ PARTITIONED BY (t tinyint)
+ ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+ WITH SERDEPROPERTIES ('timestamp.formats'='yyyy-MM-dd\'T\'HH:mm:ss')
+ STORED AS TEXTFILE;
+
+INSERT INTO TABLE all100kjson_textfile_orc PARTITION (t) SELECT csmallint, cint, cbigint, cfloat, cdouble, cstring1, cboolean1, ctimestamp1, ctinyint FROM alltypesorc WHERE ctinyint > 0;
+
+ALTER TABLE all100kjson_textfile_orc
+ SET FILEFORMAT
+ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+ SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+INSERT INTO TABLE all100kjson_textfile_orc PARTITION (t) SELECT csmallint, cint, cbigint, cfloat, cdouble, cstring1, cboolean1, ctimestamp1, ctinyint FROM alltypesorc WHERE ctinyint < 1 and ctinyint > -50 ;
+
+-- HIVE-11977: Hive should handle an external avro table with zero length files present
+DROP TABLE IF EXISTS emptyavro;
+CREATE TABLE emptyavro (i int)
+ PARTITIONED BY (s string)
+ STORED AS AVRO;
+load data local inpath '../../data/files/empty1.txt' into table emptyavro PARTITION (s='something');
+SELECT COUNT(*) from emptyavro;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
new file mode 100644
index 0000000..66e6da4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
@@ -0,0 +1,318 @@
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.cli.print.header=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.exec.schema.evolution=false;
+SET hive.vectorized.use.vectorized.input.format=true;
+SET hive.vectorized.use.vector.serde.deserialize=false;
+SET hive.vectorized.use.row.serde.deserialize=false;
+SET hive.vectorized.execution.enabled=false;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.metastore.disallow.incompatible.col.type.changes=true;
+set hive.default.fileformat=orc;
+set hive.llap.io.enabled=true;
+set hive.llap.io.encode.enabled=true;
+
+-- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
+-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
+-- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Instead just one explain vectorization only detail
+--
+
+CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data.txt' overwrite into table schema_evolution_data;
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE ADD COLUMNS
+--
+--
+-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT PERMUTE SELECT
+--
+--
+CREATE TABLE table_add_int_permute_select(insert_num int, a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_add_int_permute_select SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table table_add_int_permute_select add columns(c int);
+
+insert into table table_add_int_permute_select VALUES (111, 80000, 'new', 80000);
+
+explain vectorization only detail
+select insert_num,a,b,c from table_add_int_permute_select;
+
+-- SELECT permutation columns to make sure NULL defaulting works right
+select insert_num,a,b from table_add_int_permute_select;
+select insert_num,a,b,c from table_add_int_permute_select;
+select insert_num,c from table_add_int_permute_select;
+
+drop table table_add_int_permute_select;
+
+
+-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT, STRING, PERMUTE SELECT
+--
+--
+CREATE TABLE table_add_int_string_permute_select(insert_num int, a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_add_int_string_permute_select SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table table_add_int_string_permute_select add columns(c int, d string);
+
+insert into table table_add_int_string_permute_select VALUES (111, 80000, 'new', 80000, 'filler');
+
+-- SELECT permutation columns to make sure NULL defaulting works right
+select insert_num,a,b from table_add_int_string_permute_select;
+select insert_num,a,b,c from table_add_int_string_permute_select;
+select insert_num,a,b,c,d from table_add_int_string_permute_select;
+select insert_num,a,c,d from table_add_int_string_permute_select;
+select insert_num,a,d from table_add_int_string_permute_select;
+select insert_num,c from table_add_int_string_permute_select;
+select insert_num,d from table_add_int_string_permute_select;
+
+drop table table_add_int_string_permute_select;
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE: (STRING, CHAR, VARCHAR)
+--
+CREATE TABLE table_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_string_group_double SELECT insert_num, double_str, double_str, double_str, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING);
+
+insert into table table_change_string_group_double VALUES (111, 789.321, 789.321, 789.321, 'new');
+
+select insert_num,c1,c2,c3,b from table_change_string_group_double;
+
+drop table table_change_string_group_double;
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP: DATE,TIMESTAMP, (STRING, CHAR, CHAR trunc, VARCHAR, VARCHAR trunc)
+--
+CREATE TABLE table_change_date_group_string_group_date_group(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_date_group_string_group_date_group SELECT insert_num, date1, date1, date1, date1, date1, timestamp1, timestamp1, timestamp1, timestamp1, timestamp1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_date_group_string_group_date_group replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING);
+
+insert into table table_change_date_group_string_group_date_group VALUES (111, 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from table_change_date_group_string_group_date_group;
+
+drop table table_change_date_group_string_group_date_group;
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP:
+-- (TINYINT, SMALLINT, INT, BIGINT), STRING and
+-- (TINYINT, SMALLINT, INT, BIGINT), CHAR and CHAR trunc and
+-- (TINYINT, SMALLINT, INT, BIGINT), VARCHAR and VARCHAR trunc
+--
+--
+CREATE TABLE table_change_numeric_group_string_group_multi_ints_string_group(insert_num int,
+ c1 tinyint, c2 smallint, c3 int, c4 bigint,
+ c5 tinyint, c6 smallint, c7 int, c8 bigint, c9 tinyint, c10 smallint, c11 int, c12 bigint,
+ c13 tinyint, c14 smallint, c15 int, c16 bigint, c17 tinyint, c18 smallint, c19 int, c20 bigint,
+ b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_numeric_group_string_group_multi_ints_string_group SELECT insert_num,
+ tinyint1, smallint1, int1, bigint1,
+ tinyint1, smallint1, int1, bigint1, tinyint1, smallint1, int1, bigint1,
+ tinyint1, smallint1, int1, bigint1, tinyint1, smallint1, int1, bigint1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int,
+ c1 STRING, c2 STRING, c3 STRING, c4 STRING,
+ c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 CHAR(50), c9 CHAR(5), c10 CHAR(5), c11 CHAR(5), c12 CHAR(5),
+ c13 VARCHAR(50), c14 VARCHAR(50), c15 VARCHAR(50), c16 VARCHAR(50), c17 VARCHAR(5), c18 VARCHAR(5), c19 VARCHAR(5), c20 VARCHAR(5),
+ b STRING) ;
+
+insert into table table_change_numeric_group_string_group_multi_ints_string_group VALUES (111,
+ 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group;
+
+drop table table_change_numeric_group_string_group_multi_ints_string_group;
+
+
+
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP:
+-- (DECIMAL, FLOAT, DOUBLE), STRING and
+-- (DECIMAL, FLOAT, DOUBLE), CHAR and CHAR trunc and
+-- (DECIMAL, FLOAT, DOUBLE), VARCHAR and VARCHAR trunc
+--
+--
+CREATE TABLE table_change_numeric_group_string_group_floating_string_group(insert_num int,
+ c1 decimal(38,18), c2 float, c3 double,
+ c4 decimal(38,18), c5 float, c6 double, c7 decimal(38,18), c8 float, c9 double,
+ c10 decimal(38,18), c11 float, c12 double, c13 decimal(38,18), c14 float, c15 double,
+ b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_numeric_group_string_group_floating_string_group SELECT insert_num,
+ decimal1, float1, double1,
+ decimal1, float1, double1, decimal1, float1, double1,
+ decimal1, float1, double1, decimal1, float1, double1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_numeric_group_string_group_floating_string_group replace columns (insert_num int,
+ c1 STRING, c2 STRING, c3 STRING,
+ c4 CHAR(50), c5 CHAR(50), c6 CHAR(50), c7 CHAR(7), c8 CHAR(7), c9 CHAR(7),
+ c10 VARCHAR(50), c11 VARCHAR(50), c12 VARCHAR(50), c13 VARCHAR(7), c14 VARCHAR(7), c15 VARCHAR(7),
+ b STRING);
+
+insert into table table_change_numeric_group_string_group_floating_string_group VALUES (111,
+ 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group;
+
+drop table table_change_numeric_group_string_group_floating_string_group;
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP: STRING, (CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) and
+-- CHAR, (VARCHAR, VARCHAR trunc, STRING) and VARCHAR, (CHAR, CHAR trunc, STRING)
+--
+CREATE TABLE table_change_string_group_string_group_string(insert_num int,
+ c1 string, c2 string, c3 string, c4 string,
+ c5 CHAR(50), c6 CHAR(50), c7 CHAR(50),
+ c8 VARCHAR(50), c9 VARCHAR(50), c10 VARCHAR(50), b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_string_group_string_group_string SELECT insert_num,
+ string2, string2, string2, string2,
+ string2, string2, string2,
+ string2, string2, string2,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,c1,c2,c3,c4,b from table_change_string_group_string_group_string;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_string_group_string_group_string replace columns (insert_num int,
+ c1 CHAR(50), c2 CHAR(9), c3 VARCHAR(50), c4 CHAR(9),
+ c5 VARCHAR(50), c6 VARCHAR(9), c7 STRING,
+ c8 CHAR(50), c9 CHAR(9), c10 STRING, b STRING) ;
+
+insert into table table_change_string_group_string_group_string VALUES (111,
+ 'filler', 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler',
+ 'filler', 'filler', 'filler',
+ 'new');
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from table_change_string_group_string_group_string;
+
+drop table table_change_string_group_string_group_string;
+
+
+
+------------------------------------------------------------------------------------------
+-- SECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP
+--
+--
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP:
+-- TINYINT, (SMALLINT, INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- SMALLINT, (INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- INT, (BIGINT, DECIMAL, FLOAT, DOUBLE) and
+-- BIGINT, (DECIMAL, FLOAT, DOUBLE)
+--
+CREATE TABLE table_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int,
+ c1 tinyint, c2 tinyint, c3 tinyint, c4 tinyint, c5 tinyint, c6 tinyint,
+ c7 smallint, c8 smallint, c9 smallint, c10 smallint, c11 smallint,
+ c12 int, c13 int, c14 int, c15 int,
+ c16 bigint, c17 bigint, c18 bigint,
+ b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_lower_to_higher_numeric_group_tinyint_to_bigint SELECT insert_num,
+ tinyint1, tinyint1, tinyint1, tinyint1, tinyint1, tinyint1,
+ smallint1, smallint1, smallint1, smallint1, smallint1,
+ int1, int1, int1, int1,
+ bigint1, bigint1, bigint1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from table_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int,
+ c1 SMALLINT, c2 INT, c3 BIGINT, c4 decimal(38,18), c5 FLOAT, c6 DOUBLE,
+ c7 INT, c8 BIGINT, c9 decimal(38,18), c10 FLOAT, c11 DOUBLE,
+ c12 BIGINT, c13 decimal(38,18), c14 FLOAT, c15 DOUBLE,
+ c16 decimal(38,18), c17 FLOAT, c18 DOUBLE,
+ b STRING) ;
+
+insert into table table_change_lower_to_higher_numeric_group_tinyint_to_bigint VALUES (111,
+ 7000, 80000, 90000000, 1234.5678, 9876.543, 789.321,
+ 80000, 90000000, 1234.5678, 9876.543, 789.321,
+ 90000000, 1234.5678, 9876.543, 789.321,
+ 1234.5678, 9876.543, 789.321,
+ 'new');
+
+select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from table_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+drop table table_change_lower_to_higher_numeric_group_tinyint_to_bigint;
+
+
+
+--
+-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP:
+-- DECIMAL, (FLOAT, DOUBLE) and
+-- FLOAT, (DOUBLE)
+--
+CREATE TABLE table_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int,
+ c1 decimal(38,18), c2 decimal(38,18),
+ c3 float,
+ b STRING) clustered by (c1) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table_change_lower_to_higher_numeric_group_decimal_to_float SELECT insert_num,
+ decimal1, decimal1,
+ float1,
+ 'original' FROM schema_evolution_data;
+
+select insert_num,c1,c2,c3,b from table_change_lower_to_higher_numeric_group_decimal_to_float;
+
+-- Table-Non-Cascade CHANGE COLUMNS ...
+alter table table_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) ;
+
+insert into table table_change_lower_to_higher_numeric_group_decimal_to_float VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
+
+select insert_num,c1,c2,c3,b from table_change_lower_to_higher_numeric_group_decimal_to_float;
+
+drop table table_change_lower_to_higher_numeric_group_decimal_to_float;
http://git-wip-us.apache.org/repos/asf/hive/blob/255cf4ab/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
new file mode 100644
index 0000000..16df15a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
@@ -0,0 +1,113 @@
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.cli.print.header=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.exec.schema.evolution=false;
+SET hive.vectorized.use.vectorized.input.format=true;
+SET hive.vectorized.use.vector.serde.deserialize=false;
+SET hive.vectorized.use.row.serde.deserialize=false;
+SET hive.vectorized.execution.enabled=false;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.metastore.disallow.incompatible.col.type.changes=true;
+set hive.default.fileformat=orc;
+set hive.llap.io.enabled=true;
+set hive.llap.io.encode.enabled=true;
+
+-- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
+-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
+-- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+--
+
+CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data.txt' overwrite into table schema_evolution_data;
+
+CREATE TABLE schema_evolution_data_2(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
+row format delimited fields terminated by '|' stored as textfile;
+load data local inpath '../../data/files/schema_evolution/schema_evolution_data_2.txt' overwrite into table schema_evolution_data_2;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns
+---
+CREATE TABLE table5(insert_num int, a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table5 SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table table5 add columns(c int, d string);
+
+insert into table table5 SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2;
+
+select a,b,c,d from table5;
+
+-- UPDATE New Columns
+update table5 set c=99;
+
+select a,b,c,d from table5;
+
+alter table table5 compact 'major';
+
+select a,b,c,d from table5;
+
+DROP TABLE table5;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column
+---
+CREATE TABLE table6(insert_num int, a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table6 SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table table6 add columns(c int, d string);
+
+insert into table table6 SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num <= 110;
+
+insert into table table6 SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num > 110;
+
+select a,b,c,d from table6;
+
+-- DELETE where old column
+delete from table6 where insert_num = 102 or insert_num = 104 or insert_num = 106;
+
+select a,b,c,d from table6;
+
+alter table table6 compact 'major';
+
+select a,b,c,d from table6;
+
+DROP TABLE table6;
+
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column
+---
+CREATE TABLE table7(insert_num int, a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true');
+
+insert into table table7 SELECT insert_num, int1, 'original' FROM schema_evolution_data;
+
+-- Table-Non-Cascade ADD COLUMNS ...
+alter table table7 add columns(c int, d string);
+
+insert into table table7 SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num <= 110;
+
+insert into table table7 SELECT insert_num, int1, 'new', int1, string1 FROM schema_evolution_data_2 WHERE insert_num > 110;
+
+select a,b,c,d from table7;
+
+-- DELETE where new column
+delete from table7 where insert_num = 107 or insert_num >= 110;
+
+select a,b,c,d from table7;
+
+alter table table7 compact 'major';
+
+select a,b,c,d from table7;
+
+DROP TABLE table7;