You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/30 09:56:59 UTC
[1/2] hive git commit: HIVE-19043: Vectorization:
LazySimpleDeserializeRead fewer fields handling is broken for Complex Types
(Matt McCline, reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master 470a2f998 -> eea736134
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index 5e25c47..953604c 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -181,7 +181,9 @@ public class BytesColumnVector extends ColumnVector {
if ((nextFree + length) > buffer.length) {
increaseBufferSpace(length);
}
- System.arraycopy(sourceBuf, start, buffer, nextFree, length);
+ if (length > 0) {
+ System.arraycopy(sourceBuf, start, buffer, nextFree, length);
+ }
vector[elementNum] = buffer;
this.start[elementNum] = nextFree;
this.length[elementNum] = length;
[2/2] hive git commit: HIVE-19043: Vectorization:
LazySimpleDeserializeRead fewer fields handling is broken for Complex Types
(Matt McCline, reviewed by Teddy Choi)
Posted by mm...@apache.org.
HIVE-19043: Vectorization: LazySimpleDeserializeRead fewer fields handling is broken for Complex Types (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eea73613
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eea73613
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eea73613
Branch: refs/heads/master
Commit: eea736134bc5c96179489ee760427f61d37ad0c4
Parents: 470a2f9
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Mar 30 04:56:49 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Mar 30 04:56:49 2018 -0500
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../resources/testconfiguration.properties.orig | 1685 ++++++++++++++++++
.../hive/ql/exec/vector/VectorExtractRow.java | 76 +-
.../expressions/VectorUDFStructField.java | 92 +-
.../clientpositive/create_struct_table.q | 29 +-
.../clientpositive/vector_create_struct_table.q | 45 +
.../clientpositive/create_struct_table.q.out | 130 +-
.../llap/vector_create_struct_table.q.out | 336 ++++
.../lazy/fast/LazySimpleDeserializeRead.java | 24 +-
.../hive/ql/exec/vector/BytesColumnVector.java | 4 +-
10 files changed, 2338 insertions(+), 84 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index a42ae80..e494f8e 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -732,6 +732,7 @@ minillaplocal.query.files=\
vector_char_varchar_1.q,\
vector_complex_all.q,\
vector_complex_join.q,\
+ vector_create_struct_table.q,\
vector_decimal_2.q,\
vector_decimal_udf.q,\
vector_groupby_cube1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/itests/src/test/resources/testconfiguration.properties.orig
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties.orig b/itests/src/test/resources/testconfiguration.properties.orig
new file mode 100644
index 0000000..a42ae80
--- /dev/null
+++ b/itests/src/test/resources/testconfiguration.properties.orig
@@ -0,0 +1,1685 @@
+# Note: the *.shared groups also run on TestCliDriver
+
+# NOTE: files should be listed in alphabetical order
+minimr.query.files=infer_bucket_sort_map_operators.q,\
+ infer_bucket_sort_dyn_part.q,\
+ infer_bucket_sort_merge.q,\
+ infer_bucket_sort_reducers_power_two.q,\
+ infer_bucket_sort_num_buckets.q,\
+ root_dir_external_table.q,\
+ parallel_orderby.q,\
+ bucket_num_reducers.q,\
+ udf_using.q,\
+ index_bitmap3.q,\
+ index_bitmap_auto.q,\
+ scriptfile1.q,\
+ bucket_num_reducers2.q,\
+ bucket_num_reducers_acid.q,\
+ bucket_num_reducers_acid2.q,\
+ scriptfile1_win.q
+
+# These tests are disabled for minimr
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# ql_rewrite_gbtoidx_cbo_2.q,\
+# smb_mapjoin_8.q,\
+
+
+# Tests that are not enabled for CLI Driver
+disabled.query.files=ql_rewrite_gbtoidx.q,\
+ ql_rewrite_gbtoidx_cbo_1.q,\
+ cbo_rp_subq_in.q,\
+ cbo_rp_subq_not_in.q,\
+ cbo_rp_subq_exists.q,\
+ orc_llap.q,\
+ ql_rewrite_gbtoidx_cbo_2.q,\
+ rcfile_merge1.q,\
+ stats_filemetadata.q,\
+ cbo_rp_insert.q,\
+ cbo_rp_lineage2.q
+
+# NOTE: Add tests to minitez only if it is very
+# specific to tez and cannot be added to minillap.
+minitez.query.files.shared=delete_orig_table.q,\
+ orc_merge12.q,\
+ orc_vectorization_ppd.q,\
+ update_orig_table.q,\
+ vector_join_part_col_char.q,\
+ vector_non_string_partition.q,\
+ vectorization_div0.q,\
+ vectorization_limit.q
+
+# NOTE: Add tests to minitez only if it is very
+# specific to tez and cannot be added to minillap.
+minitez.query.files=acid_vectorization_original_tez.q,\
+ explainuser_3.q,\
+ explainanalyze_1.q,\
+ explainanalyze_3.q,\
+ explainanalyze_4.q,\
+ explainanalyze_5.q,\
+ hybridgrace_hashjoin_1.q,\
+ hybridgrace_hashjoin_2.q,\
+ multi_count_distinct.q,\
+ tez-tag.q,\
+ tez_union_with_udf.q,\
+ vectorization_div0.q
+
+
+minillap.shared.query.files=insert_into1.q,\
+ insert_into2.q,\
+ llapdecider.q,\
+ mapreduce1.q,\
+ mapreduce2.q,\
+ mm_all.q,\
+ mm_cttas.q,\
+ orc_merge1.q,\
+ orc_merge10.q,\
+ orc_merge2.q,\
+ orc_merge3.q,\
+ orc_merge4.q,\
+ orc_merge_diff_fs.q,\
+ parallel_colstats.q,\
+ parquet_types_vectorization.q,\
+ parquet_complex_types_vectorization.q,\
+ parquet_map_type_vectorization.q,\
+ parquet_struct_type_vectorization.q,\
+ orc_struct_type_vectorization.q,\
+ union_type_chk.q,\
+ cte_2.q,\
+ cte_4.q,\
+ llap_nullscan.q,\
+ dynamic_partition_pruning_2.q,\
+ tez_union_dynamic_partition.q
+
+minillaplocal.shared.query.files=alter_merge_2_orc.q,\
+ alter_merge_orc.q,\
+ alter_merge_stats_orc.q,\
+ authorization_view_8.q,\
+ auto_join0.q,\
+ auto_join1.q,\
+ auto_join21.q,\
+ auto_join29.q,\
+ auto_join30.q,\
+ auto_join_filters.q,\
+ auto_join_nulls.q,\
+ auto_sortmerge_join_1.q,\
+ auto_sortmerge_join_10.q,\
+ auto_sortmerge_join_11.q,\
+ auto_sortmerge_join_12.q,\
+ auto_sortmerge_join_13.q,\
+ auto_sortmerge_join_14.q,\
+ auto_sortmerge_join_15.q,\
+ auto_sortmerge_join_16.q,\
+ auto_sortmerge_join_2.q,\
+ auto_sortmerge_join_3.q,\
+ auto_sortmerge_join_4.q,\
+ auto_sortmerge_join_5.q,\
+ auto_sortmerge_join_6.q,\
+ auto_sortmerge_join_7.q,\
+ auto_sortmerge_join_8.q,\
+ auto_sortmerge_join_9.q,\
+ autoColumnStats_1.q,\
+ autoColumnStats_10.q,\
+ autoColumnStats_2.q,\
+ bucket2.q,\
+ bucket3.q,\
+ bucket4.q,\
+ bucket_map_join_tez1.q,\
+ bucket_map_join_tez2.q,\
+ cbo_gby.q,\
+ cbo_gby_empty.q,\
+ cbo_join.q,\
+ cbo_limit.q,\
+ cbo_semijoin.q,\
+ cbo_simple_select.q,\
+ cbo_stats.q,\
+ cbo_subq_exists.q,\
+ cbo_subq_in.q,\
+ cbo_subq_not_in.q,\
+ cbo_udf_udaf.q,\
+ cbo_union.q,\
+ cbo_views.q,\
+ cbo_windowing.q,\
+ column_names_with_leading_and_trailing_spaces.q,\
+ constprog_dpp.q,\
+ constprog_semijoin.q,\
+ correlationoptimizer1.q,\
+ count.q,\
+ count_dist_rewrite.q,\
+ create_merge_compressed.q,\
+ cross_join.q,\
+ cross_prod_1.q,\
+ cross_prod_3.q,\
+ cross_prod_4.q,\
+ cross_product_check_1.q,\
+ cross_product_check_2.q,\
+ ctas.q,\
+ cte_1.q,\
+ cte_3.q,\
+ cte_5.q,\
+ cte_mat_1.q,\
+ cte_mat_2.q,\
+ cte_mat_3.q,\
+ cte_mat_4.q,\
+ cte_mat_5.q,\
+ custom_input_output_format.q,\
+ deleteAnalyze.q,\
+ delete_all_non_partitioned.q,\
+ delete_all_partitioned.q,\
+ delete_tmp_table.q,\
+ delete_where_no_match.q,\
+ delete_where_non_partitioned.q,\
+ delete_where_partitioned.q,\
+ delete_whole_partition.q,\
+ disable_merge_for_bucketing.q,\
+ dynamic_partition_pruning.q,\
+ dynamic_semijoin_reduction.q,\
+ dynpart_sort_opt_vectorization.q,\
+ dynpart_sort_optimization.q,\
+ dynpart_sort_optimization2.q,\
+ empty_join.q,\
+ enforce_order.q,\
+ filter_join_breaktask.q,\
+ filter_join_breaktask2.q,\
+ filter_union.q,\
+ groupby1.q,\
+ groupby2.q,\
+ groupby3.q,\
+ groupby_rollup_empty.q,\
+ having.q,\
+ identity_project_remove_skip.q,\
+ insert1.q,\
+ insert_orig_table.q,\
+ insert_update_delete.q,\
+ insert_values_dynamic_partitioned.q,\
+ insert_values_non_partitioned.q,\
+ insert_values_partitioned.q,\
+ insert_values_tmp_table.q,\
+ join0.q,\
+ join1.q,\
+ join_emit_interval.q,\
+ join46.q,\
+ join_nullsafe.q,\
+ kill_query.q,\
+ leftsemijoin.q,\
+ limit_pushdown.q,\
+ llap_acid.q,\
+ llap_acid_fast.q,\
+ load_dyn_part1.q,\
+ load_dyn_part2.q,\
+ load_dyn_part3.q,\
+ lvj_mapjoin.q,\
+ materialized_view_create_rewrite.q,\
+ materialized_view_create_rewrite_3.q,\
+ materialized_view_describe.q,\
+ materialized_view_rewrite_ssb.q,\
+ materialized_view_create.q,\
+ materialized_view_create_rewrite_2.q,\
+ materialized_view_create_rewrite_multi_db.q,\
+ materialized_view_drop.q,\
+ materialized_view_rewrite_ssb_2.q,\
+ mapjoin2.q,\
+ mapjoin3.q,\
+ mapjoin_decimal.q,\
+ mapjoin_mapjoin.q,\
+ mapjoin46.q,\
+ merge1.q,\
+ merge2.q,\
+ mergejoin.q,\
+ metadata_only_queries.q,\
+ metadata_only_queries_with_filters.q,\
+ metadataonly1.q,\
+ mm_conversions.q,\
+ mrr.q,\
+ nonmr_fetch_threshold.q,\
+ optimize_nullscan.q,\
+ orc_analyze.q,\
+ orc_merge11.q,\
+ orc_merge5.q,\
+ orc_merge6.q,\
+ orc_merge7.q,\
+ orc_merge8.q,\
+ orc_merge9.q,\
+ orc_merge_incompat1.q,\
+ orc_merge_incompat2.q,\
+ orc_merge_incompat3.q,\
+ orc_merge_incompat_schema.q,\
+ orc_merge_incompat_writer_version.q,\
+ orc_ppd_schema_evol_1a.q,\
+ orc_ppd_schema_evol_1b.q,\
+ orc_ppd_schema_evol_2a.q,\
+ orc_ppd_schema_evol_2b.q,\
+ parallel.q,\
+ partition_pruning.q,\
+ ptf.q,\
+ ptf_matchpath.q,\
+ ptf_streaming.q,\
+ results_cache_1.q,\
+ results_cache_empty_result.q,\
+ sample1.q,\
+ selectDistinctStar.q,\
+ select_dummy_source.q,\
+ skewjoin.q,\
+ stats_noscan_1.q,\
+ stats_only_null.q,\
+ subquery_exists.q,\
+ subquery_in.q,\
+ temp_table.q,\
+ tez_bmj_schema_evolution.q,\
+ tez_dml.q,\
+ tez_dynpart_hashjoin_1.q,\
+ tez_dynpart_hashjoin_2.q,\
+ tez_fsstat.q,\
+ tez_insert_overwrite_local_directory_1.q,\
+ tez_join.q,\
+ tez_join_hash.q,\
+ tez_join_result_complex.q,\
+ tez_join_tests.q,\
+ tez_joins_explain.q,\
+ tez_multi_union.q,\
+ tez_schema_evolution.q,\
+ tez_self_join.q,\
+ tez_smb_1.q,\
+ tez_smb_main.q,\
+ tez_union.q,\
+ tez_union2.q,\
+ tez_union_decimal.q,\
+ tez_union_group_by.q,\
+ tez_union_multiinsert.q,\
+ tez_vector_dynpart_hashjoin_1.q,\
+ tez_vector_dynpart_hashjoin_2.q,\
+ union2.q,\
+ union3.q,\
+ union4.q,\
+ union5.q,\
+ union6.q,\
+ union7.q,\
+ union8.q,\
+ union9.q,\
+ union_stats.q,\
+ update_access_time_non_current_db.q, \
+ update_after_multiple_inserts.q,\
+ update_all_non_partitioned.q,\
+ update_all_partitioned.q,\
+ update_all_types.q,\
+ update_tmp_table.q,\
+ update_two_cols.q,\
+ update_where_no_match.q,\
+ update_where_non_partitioned.q,\
+ update_where_partitioned.q,\
+ vector_acid3.q,\
+ vector_aggregate_9.q,\
+ vector_aggregate_without_gby.q,\
+ vector_auto_smb_mapjoin_14.q,\
+ vector_between_columns.q,\
+ vector_between_in.q,\
+ vector_binary_join_groupby.q,\
+ vector_bround.q,\
+ vector_bucket.q,\
+ vector_case_when_1.q,\
+ vector_case_when_2.q,\
+ vector_cast_constant.q,\
+ vector_char_2.q,\
+ vector_char_4.q,\
+ vector_char_cast.q,\
+ vector_char_mapjoin1.q,\
+ vector_char_simple.q,\
+ vector_coalesce.q,\
+ vector_coalesce_2.q,\
+ vector_coalesce_3.q,\
+ vector_coalesce_4.q,\
+ vector_complex_all.q,\
+ vector_count.q,\
+ vector_count_distinct.q,\
+ vector_data_types.q,\
+ vector_date_1.q,\
+ vector_decimal_1.q,\
+ vector_decimal_10_0.q,\
+ vector_decimal_2.q,\
+ vector_decimal_3.q,\
+ vector_decimal_4.q,\
+ vector_decimal_5.q,\
+ vector_decimal_6.q,\
+ vector_decimal_aggregate.q,\
+ vector_decimal_cast.q,\
+ vector_decimal_expressions.q,\
+ vector_decimal_mapjoin.q,\
+ vector_decimal_math_funcs.q,\
+ vector_decimal_precision.q,\
+ vector_decimal_round.q,\
+ vector_decimal_round_2.q,\
+ vector_decimal_trailing.q,\
+ vector_decimal_udf.q,\
+ vector_decimal_udf2.q,\
+ vector_distinct_2.q,\
+ vectorized_distinct_gby.q,\
+ vector_elt.q,\
+ vector_groupby4.q,\
+ vector_groupby6.q,\
+ vector_groupby_3.q,\
+ vector_groupby_mapjoin.q,\
+ vector_groupby_reduce.q,\
+ vector_grouping_sets.q,\
+ vector_if_expr.q,\
+ vector_include_no_sel.q,\
+ vector_inner_join.q,\
+ vector_interval_1.q,\
+ vector_interval_2.q,\
+ vector_interval_arithmetic.q,\
+ vector_interval_mapjoin.q,\
+ vector_join30.q,\
+ vector_join_filters.q,\
+ vector_join_nulls.q,\
+ vector_left_outer_join.q,\
+ vector_left_outer_join2.q,\
+ vector_leftsemi_mapjoin.q,\
+ vector_mr_diff_schema_alias.q,\
+ vector_multi_insert.q,\
+ vector_null_projection.q,\
+ vector_nullsafe_join.q,\
+ vector_nvl.q,\
+ vector_orderby_5.q,\
+ vector_outer_join0.q,\
+ vector_outer_join1.q,\
+ vector_outer_join2.q,\
+ vector_outer_join3.q,\
+ vector_outer_join4.q,\
+ vector_outer_join5.q,\
+ vector_outer_join6.q,\
+ vector_partition_diff_num_cols.q,\
+ vector_partitioned_date_time.q,\
+ vector_reduce1.q,\
+ vector_reduce2.q,\
+ vector_reduce3.q,\
+ vector_reduce_groupby_decimal.q,\
+ vector_reduce_groupby_duplicate_cols.q,\
+ vector_row__id.q,\
+ vector_string_concat.q,\
+ vector_struct_in.q,\
+ vector_udf_character_length.q,\
+ vector_udf_octet_length.q,\
+ vector_varchar_4.q,\
+ vector_varchar_mapjoin1.q,\
+ vector_varchar_simple.q,\
+ vector_when_case_null.q,\
+ vectorization_0.q,\
+ vectorization_1.q,\
+ vectorization_10.q,\
+ vectorization_11.q,\
+ vectorization_12.q,\
+ vectorization_13.q,\
+ vectorization_14.q,\
+ vectorization_15.q,\
+ vectorization_16.q,\
+ vectorization_17.q,\
+ vectorization_2.q,\
+ vectorization_3.q,\
+ vectorization_4.q,\
+ vectorization_5.q,\
+ vectorization_6.q,\
+ vectorization_7.q,\
+ vectorization_8.q,\
+ vectorization_9.q,\
+ vectorization_decimal_date.q,\
+ vectorization_limit.q,\
+ vectorization_nested_udf.q,\
+ vectorization_not.q,\
+ vectorization_part.q,\
+ vectorization_part_project.q,\
+ vectorization_part_varchar.q,\
+ vectorization_pushdown.q,\
+ vectorization_short_regress.q,\
+ vectorized_bucketmapjoin1.q,\
+ vectorized_case.q,\
+ vectorized_casts.q,\
+ vectorized_context.q,\
+ vectorized_date_funcs.q,\
+ vectorized_dynamic_partition_pruning.q,\
+ vectorized_insert_into_bucketed_table.q,\
+ vectorized_mapjoin.q,\
+ vectorized_mapjoin3.q,\
+ vectorized_math_funcs.q,\
+ vectorized_nested_mapjoin.q,\
+ vectorized_parquet.q,\
+ vectorized_parquet_types.q,\
+ vectorized_ptf.q,\
+ vectorized_rcfile_columnar.q,\
+ vectorized_shufflejoin.q,\
+ vectorized_string_funcs.q,\
+ vectorized_timestamp.q,\
+ vectorized_timestamp_funcs.q,\
+ vectorized_timestamp_ints_casts.q
+
+minillap.query.files=acid_bucket_pruning.q,\
+ bucket5.q,\
+ bucket6.q,\
+ dynamic_semijoin_user_level.q,\
+ except_distinct.q,\
+ explainuser_2.q,\
+ empty_dir_in_table.q,\
+ intersect_all.q,\
+ intersect_distinct.q,\
+ intersect_merge.q,\
+ llap_smb.q,\
+ llap_udf.q,\
+ llapdecider.q,\
+ reduce_deduplicate.q,\
+ reduce_deduplicate_distinct.q, \
+ remote_script.q,\
+ tez_aggr_part_stats.q,\
+ tez_union_view.q,\
+ file_with_header_footer.q,\
+ external_table_with_space_in_location_path.q,\
+ import_exported_table.q,\
+ orc_llap_counters.q,\
+ orc_llap_counters1.q,\
+ load_hdfs_file_with_space_in_the_name.q,\
+ orc_ppd_basic.q,\
+ schemeAuthority.q,\
+ schemeAuthority2.q,\
+ temp_table_external.q,\
+ table_nonprintable.q,\
+ llap_nullscan.q,\
+ rcfile_merge2.q,\
+ rcfile_merge3.q,\
+ rcfile_merge4.q,\
+ rcfile_createas1.q,\
+ orc_ppd_schema_evol_3a.q,\
+ global_limit.q,\
+ dynamic_partition_pruning_2.q,\
+ tez_union_dynamic_partition.q,\
+ tez_union_dynamic_partition_2.q,\
+ unionDistinct_1.q,\
+ load_fs2.q,\
+ llap_stats.q,\
+ multi_count_distinct_null.q
+
+minillaplocal.query.files=\
+ dp_counter_non_mm.q,\
+ dp_counter_mm.q,\
+ acid_no_buckets.q, \
+ acid_globallimit.q,\
+ acid_vectorization_missing_cols.q,\
+ acid_vectorization_original.q,\
+ alter_merge_stats_orc.q,\
+ authorization_view_8.q,\
+ auto_join30.q,\
+ auto_join_filters.q,\
+ auto_join_nulls.q,\
+ auto_sortmerge_join_16.q,\
+ auto_sortmerge_join_6.q,\
+ auto_sortmerge_join_8.q,\
+ auto_sortmerge_join_9.q,\
+ bucket4.q,\
+ bucket_groupby.q,\
+ bucket_many.q,\
+ bucket_map_join_tez1.q,\
+ bucket_map_join_tez2.q,\
+ bucket_map_join_tez_empty.q,\
+ bucketizedhiveinputformat.q,\
+ bucketmapjoin6.q,\
+ bucketmapjoin7.q,\
+ bucketpruning1.q,\
+ retry_failure.q,\
+ retry_failure_stat_changes.q,\
+ retry_failure_oom.q,\
+ bucketsortoptimize_insert_2.q,\
+ check_constraint.q,\
+ cbo_gby.q,\
+ cbo_join.q,\
+ cbo_limit.q,\
+ cbo_rp_gby.q,\
+ cbo_rp_join.q,\
+ cbo_rp_semijoin.q,\
+ cbo_rp_unionDistinct_2.q,\
+ cbo_rp_windowing_2.q,\
+ cbo_subq_not_in.q,\
+ column_table_stats.q,\
+ column_table_stats_orc.q,\
+ constprog_dpp.q,\
+ current_date_timestamp.q,\
+ correlationoptimizer1.q,\
+ correlationoptimizer2.q,\
+ correlationoptimizer4.q,\
+ correlationoptimizer6.q,\
+ default_constraint.q,\
+ disable_merge_for_bucketing.q,\
+ cross_prod_1.q,\
+ cross_prod_3.q,\
+ cross_prod_4.q,\
+ dynamic_partition_pruning.q,\
+ dynamic_semijoin_reduction.q,\
+ dynamic_semijoin_reduction_2.q,\
+ dynamic_semijoin_reduction_3.q,\
+ dynamic_semijoin_reduction_sw.q,\
+ dynpart_sort_opt_vectorization.q,\
+ dynpart_sort_optimization.q,\
+ dynpart_sort_optimization_acid.q,\
+ enforce_constraint_notnull.q,\
+ escape1.q,\
+ escape2.q,\
+ exchgpartition2lel.q,\
+ explainanalyze_2.q,\
+ explainuser_1.q,\
+ explainuser_4.q,\
+ groupby2.q,\
+ groupby_groupingset_bug.q,\
+ hybridgrace_hashjoin_1.q,\
+ hybridgrace_hashjoin_2.q,\
+ is_distinct_from.q,\
+ infer_bucket_sort_bucketed_table.q,\
+ input16_cc.q,\
+ insert_after_drop_partition.q,\
+ insert_dir_distcp.q,\
+ insert_into_with_schema.q,\
+ insert_values_orig_table.q,\
+ insert_values_orig_table_use_metadata.q,\
+ insert1_overwrite_partitions.q,\
+ jdbc_handler.q,\
+ join1.q,\
+ join_acid_non_acid.q,\
+ join_filters.q,\
+ join_max_hashtable.q,\
+ join_nulls.q,\
+ join_nullsafe.q,\
+ join_is_not_distinct_from.q,\
+ join_reordering_no_stats.q,\
+ leftsemijoin_mr.q,\
+ limit_join_transpose.q,\
+ lineage2.q,\
+ lineage3.q,\
+ list_bucket_dml_10.q,\
+ llap_acid2.q,\
+ llap_partitioned.q,\
+ llap_vector_nohybridgrace.q,\
+ load_data_acid_rename.q,\
+ load_dyn_part5.q,\
+ lvj_mapjoin.q,\
+ materialized_view_create_rewrite_dummy.q,\
+ materialized_view_create_rewrite_rebuild_dummy.q,\
+ mapjoin_decimal.q,\
+ mapjoin_hint.q,\
+ mapjoin_emit_interval.q,\
+ mergejoin_3way.q,\
+ mm_exim.q,\
+ mrr.q,\
+ multiMapJoin1.q,\
+ multiMapJoin2.q,\
+ non_native_window_udf.q,\
+ optimize_join_ptp.q,\
+ orc_analyze.q,\
+ orc_llap_nonvector.q,\
+ orc_ppd_date.q,\
+ tez_input_counters.q,\
+ orc_ppd_decimal.q,\
+ orc_ppd_timestamp.q,\
+ order_null.q,\
+ partition_multilevels.q,\
+ partition_shared_scan.q,\
+ partition_pruning.q,\
+ ptf.q,\
+ ptf_streaming.q,\
+ quotedid_smb.q,\
+ resourceplan.q,\
+ sample10.q,\
+ schema_evol_orc_acid_part_llap_io.q,\
+ schema_evol_orc_acid_part.q,\
+ schema_evol_orc_acid_part_update_llap_io.q,\
+ schema_evol_orc_acid_part_update.q,\
+ schema_evol_orc_acid_table_llap_io.q,\
+ schema_evol_orc_acid_table.q,\
+ schema_evol_orc_acid_table_update_llap_io.q,\
+ schema_evol_orc_acid_table_update.q,\
+ schema_evol_orc_acidvec_part_llap_io.q,\
+ schema_evol_orc_acidvec_part.q,\
+ schema_evol_orc_acidvec_part_update_llap_io.q,\
+ schema_evol_orc_acidvec_part_update.q,\
+ schema_evol_orc_acidvec_table_llap_io.q,\
+ schema_evol_orc_acidvec_table.q,\
+ schema_evol_orc_acidvec_table_update_llap_io.q,\
+ schema_evol_orc_acidvec_table_update.q,\
+ schema_evol_orc_nonvec_part_llap_io.q,\
+ schema_evol_orc_nonvec_part.q,\
+ schema_evol_orc_nonvec_part_all_complex_llap_io.q,\
+ schema_evol_orc_nonvec_part_all_complex.q,\
+ schema_evol_orc_nonvec_part_all_primitive_llap_io.q,\
+ schema_evol_orc_nonvec_part_all_primitive.q,\
+ schema_evol_orc_nonvec_table_llap_io.q,\
+ schema_evol_orc_nonvec_table.q,\
+ schema_evol_orc_vec_part_llap_io.q,\
+ schema_evol_orc_vec_part.q,\
+ schema_evol_orc_vec_part_all_complex_llap_io.q,\
+ schema_evol_orc_vec_part_all_complex.q,\
+ schema_evol_orc_vec_part_all_primitive_llap_io.q,\
+ schema_evol_orc_vec_part_all_primitive.q,\
+ schema_evol_orc_vec_table_llap_io.q,\
+ schema_evol_orc_vec_table.q,\
+ schema_evol_stats.q,\
+ schema_evol_text_nonvec_part_llap_io.q,\
+ schema_evol_text_nonvec_part.q,\
+ schema_evol_text_nonvec_part_all_complex_llap_io.q,\
+ schema_evol_text_nonvec_part_all_complex.q,\
+ schema_evol_text_nonvec_part_all_primitive_llap_io.q,\
+ schema_evol_text_nonvec_part_all_primitive.q,\
+ schema_evol_text_nonvec_table_llap_io.q,\
+ schema_evol_text_nonvec_table.q,\
+ schema_evol_text_vec_part_llap_io.q,\
+ schema_evol_text_vec_part.q,\
+ schema_evol_text_vec_part_all_complex_llap_io.q,\
+ schema_evol_text_vec_part_all_complex.q,\
+ schema_evol_text_vec_part_all_primitive_llap_io.q,\
+ schema_evol_text_vec_part_all_primitive.q,\
+ schema_evol_text_vec_table_llap_io.q,\
+ schema_evol_text_vec_table.q,\
+ schema_evol_text_vecrow_part_llap_io.q,\
+ schema_evol_text_vecrow_part.q,\
+ schema_evol_text_vecrow_part_all_complex_llap_io.q,\
+ schema_evol_text_vecrow_part_all_complex.q,\
+ schema_evol_text_vecrow_part_all_primitive_llap_io.q,\
+ schema_evol_text_vecrow_part_all_primitive.q,\
+ schema_evol_text_vecrow_table_llap_io.q,\
+ schema_evol_text_vecrow_table.q,\
+ selectDistinctStar.q,\
+ semijoin.q,\
+ semijoin6.q,\
+ semijoin7.q,\
+ semijoin_hint.q,\
+ sharedworkext.q,\
+ smb_cache.q,\
+ special_character_in_tabnames_1.q,\
+ sqlmerge.q,\
+ stats_based_fetch_decision.q,\
+ subquery_in_having.q,\
+ subquery_notin.q,\
+ subquery_nested_subquery.q, \
+ subquery_select.q, \
+ subquery_shared_alias.q, \
+ subquery_null_agg.q,\
+ sysdb.q,\
+ table_access_keys_stats.q,\
+ tez_bmj_schema_evolution.q,\
+ tez_dml.q,\
+ tez_dynpart_hashjoin_1.q,\
+ tez_dynpart_hashjoin_2.q,\
+ tez_dynpart_hashjoin_3.q,\
+ tez_fsstat.q,\
+ tez_insert_overwrite_local_directory_1.q,\
+ tez_join.q,\
+ tez_join_result_complex.q,\
+ tez_join_tests.q,\
+ tez_joins_explain.q,\
+ tez_multi_union.q,\
+ tez_nway_join.q,\
+ tez_schema_evolution.q,\
+ tez_self_join.q,\
+ tez_smb_1.q,\
+ tez_smb_empty.q,\
+ tez_smb_main.q,\
+ tez_union.q,\
+ tez_union2.q,\
+ tez_union_decimal.q,\
+ tez_union_group_by.q,\
+ tez_union_multiinsert.q,\
+ tez_vector_dynpart_hashjoin_1.q,\
+ tez_vector_dynpart_hashjoin_2.q,\
+ uber_reduce.q,\
+ udaf_collect_set_2.q,\
+ udaf_all_keyword.q,\
+ union_fast_stats.q,\
+ union_remove_26.q,\
+ union_top_level.q,\
+ vector_auto_smb_mapjoin_14.q,\
+ vector_char_varchar_1.q,\
+ vector_complex_all.q,\
+ vector_complex_join.q,\
+ vector_decimal_2.q,\
+ vector_decimal_udf.q,\
+ vector_groupby_cube1.q,\
+ vector_groupby_grouping_id1.q,\
+ vector_groupby_grouping_id2.q,\
+ vector_groupby_grouping_id3.q,\
+ vector_groupby_grouping_sets1.q,\
+ vector_groupby_grouping_sets2.q,\
+ vector_groupby_grouping_sets3.q,\
+ vector_groupby_grouping_sets3_dec.q,\
+ vector_groupby_grouping_sets4.q,\
+ vector_groupby_grouping_sets5.q,\
+ vector_groupby_grouping_sets6.q,\
+ vector_groupby_grouping_sets_grouping.q,\
+ vector_groupby_grouping_sets_limit.q,\
+ vector_groupby_grouping_window.q,\
+ vector_groupby_rollup1.q,\
+ vector_groupby_sort_11.q,\
+ vector_groupby_sort_8.q,\
+ vector_if_expr_2.q,\
+ vector_join30.q,\
+ vector_join_filters.q,\
+ vector_leftsemi_mapjoin.q,\
+ vector_like_2.q,\
+ vector_llap_text_1.q,\
+ vector_mapjoin_reduce.q,\
+ vector_number_compare_projection.q,\
+ vector_order_null.q,\
+ vector_outer_reference_windowed.q,\
+ vector_partitioned_date_time.q,\
+ vector_ptf_1.q,\
+ vector_ptf_part_simple.q,\
+ vector_reuse_scratchcols.q,\
+ vector_string_decimal.q,\
+ vector_udf_adaptor_1.q,\
+ vector_udf_inline.q,\
+ vector_udf_string_to_boolean.q,\
+ vector_udf1.q,\
+ vector_udf2.q,\
+ vector_windowing.q,\
+ vector_windowing_expressions.q,\
+ vector_windowing_gby.q,\
+ vector_windowing_gby2.q,\
+ vector_windowing_multipartitioning.q,\
+ vector_windowing_navfn.q,\
+ vector_windowing_order_null.q,\
+ vector_windowing_range_multiorder.q,\
+ vector_windowing_rank.q,\
+ vector_windowing_streaming.q,\
+ vector_windowing_windowspec.q,\
+ vector_windowing_windowspec4.q,\
+ vectorization_div0.q,\
+ vectorization_input_format_excludes.q,\
+ vectorized_insert_into_bucketed_table.q,\
+ vectorization_short_regress.q,\
+ vectorized_dynamic_partition_pruning.q,\
+ vectorized_dynamic_semijoin_reduction.q,\
+ vectorized_dynamic_semijoin_reduction2.q,\
+ vectorized_ptf.q,\
+ windowing.q,\
+ windowing_gby.q,\
+ unionDistinct_2.q,\
+ auto_smb_mapjoin_14.q,\
+ subquery_views.q,\
+ vector_nullsafe_join.q,\
+ smb_mapjoin_18.q,\
+ varchar_udf1.q,\
+ vectorized_parquet.q,\
+ bucketmapjoin2.q,\
+ orc_ppd_varchar.q,\
+ multi_insert.q,\
+ cbo_rp_limit.q,\
+ vector_interval_2.q,\
+ cbo_semijoin.q,\
+ parquet_predicate_pushdown.q,\
+ vector_outer_join5.q,\
+ smb_mapjoin_6.q,\
+ multi_column_in.q,\
+ orc_predicate_pushdown.q,\
+ columnStatsUpdateForStatsOptimizer_1.q,\
+ reduce_deduplicate_extended.q,\
+ limit_pushdown3.q,\
+ offset_limit.q,\
+ vector_join_nulls.q,\
+ correlationoptimizer3.q,\
+ vectorization_0.q,\
+ columnstats_part_coltype.q,\
+ drop_partition_with_stats.q,\
+ dynpart_sort_optimization2.q,\
+ multi_column_in_single.q,\
+ join32_lessSize.q,\
+ alter_table_invalidate_column_stats.q,\
+ bucketmapjoin1.q,\
+ ppr_pushdown.q,\
+ smb_mapjoin_14.q,\
+ vector_between_in.q,\
+ offset_limit_ppd_optimizer.q,\
+ cluster.q,\
+ subquery_corr.q,\
+ subquery_in.q,\
+ subquery_multi.q,\
+ subquery_scalar.q,\
+ stats11.q,\
+ orc_create.q,\
+ orc_split_elimination.q,\
+ order_null.q,\
+ skewjoinopt15.q,\
+ authorization_2.q,\
+ cbo_subq_in.q,\
+ alter_merge_orc.q,\
+ bucketsortoptimize_insert_6.q,\
+ bucketmapjoin4.q,\
+ orc_merge7.q,\
+ column_access_stats.q,\
+ smb_mapjoin_5.q,\
+ vector_adaptor_usage_mode.q,\
+ optimize_nullscan.q,\
+ parquet_types.q,\
+ groupby_grouping_id2.q,\
+ constprog_semijoin.q,\
+ ppd_union_view.q,\
+ smb_mapjoin_19.q,\
+ cbo_rp_views.q,\
+ bucketsortoptimize_insert_7.q,\
+ smb_mapjoin_15.q,\
+ vectorized_nested_mapjoin.q,\
+ skiphf_aggr.q,\
+ multi_insert_lateral_view.q,\
+ smb_mapjoin_4.q,\
+ cbo_udf_udaf.q,\
+ bucketmapjoin3.q,\
+ metadataonly1.q,\
+ lateral_view.q,\
+ extrapolate_part_stats_partial_ndv.q,\
+ cbo_views.q,\
+ limit_pushdown.q,\
+ cbo_rp_udf_udaf.q,\
+ count.q,\
+ vector_inner_join.q,\
+ temp_table.q,\
+ vector_partition_diff_num_cols.q,\
+ vector_count_distinct.q,\
+ cbo_rp_udf_udaf_stats_opt.q,\
+ database.q,\
+ smb_mapjoin_17.q,\
+ groupby_resolution.q,\
+ windowing_windowspec2.q,\
+ unionDistinct_3.q,\
+ vectorized_join46.q,\
+ vectorized_multi_output_select.q,\
+ partialdhj.q
+
+encrypted.query.files=encryption_join_unencrypted_tbl.q,\
+ encryption_insert_partition_static.q,\
+ encryption_insert_partition_dynamic.q,\
+ encryption_join_with_different_encryption_keys.q,\
+ encryption_select_read_only_encrypted_tbl.q,\
+ encryption_select_read_only_unencrypted_tbl.q,\
+ encryption_load_data_to_encrypted_tables.q, \
+ encryption_unencrypted_nonhdfs_external_tables.q, \
+ encryption_move_tbl.q, \
+ encryption_drop_table.q, \
+ encryption_insert_values.q, \
+ encryption_drop_view.q, \
+ encryption_drop_partition.q, \
+ encryption_with_trash.q, \
+ encryption_ctas.q, \
+ encryption_auto_purge_tables.q, \
+ encryption_drop_table_in_encrypted_db.q
+
+beeline.positive.include=create_merge_compressed.q,\
+ colstats_all_nulls.q,\
+ drop_with_concurrency.q,\
+ escape_comments.q,\
+ explain_outputs.q,\
+ insert_overwrite_local_directory_1.q,\
+ mapjoin2.q,\
+ materialized_view_create_rewrite.q,\
+ smb_mapjoin_1.q,\
+ smb_mapjoin_10.q,\
+ smb_mapjoin_11.q,\
+ smb_mapjoin_12.q,\
+ smb_mapjoin_13.q,\
+ smb_mapjoin_16.q,\
+ smb_mapjoin_2.q,\
+ smb_mapjoin_3.q,\
+ smb_mapjoin_7.q,\
+ select_dummy_source.q,\
+ udf_unix_timestamp.q
+
+minimr.query.negative.files=cluster_tasklog_retrieval.q,\
+ file_with_header_footer_negative.q,\
+ local_mapred_error_cache.q,\
+ mapreduce_stack_trace.q,\
+ mapreduce_stack_trace_turnoff.q,\
+ minimr_broken_pipe.q,\
+ table_nonprintable_negative.q,\
+ udf_local_resource.q,\
+ ct_noperm_loc.q,\
+ ctas_noperm_loc.q
+
+# tests are sorted use: perl -pe 's@\\\s*\n@ @g' testconfiguration.properties \
+# | awk -F= '/spark.query.files/{print $2}' | perl -pe 's@.q *, *@\n@g' \
+# | egrep -v '^ *$' | sort -V | uniq | perl -pe 's@\n@.q, \\\n@g' | perl -pe 's@^@ @g'
+spark.query.files=add_part_multiple.q, \
+ alter_merge_orc.q, \
+ alter_merge_stats_orc.q, \
+ annotate_stats_join.q, \
+ parquet_vectorization_0.q ,\
+ parquet_vectorization_10.q ,\
+ parquet_vectorization_11.q ,\
+ parquet_vectorization_12.q ,\
+ parquet_vectorization_13.q ,\
+ parquet_vectorization_14.q ,\
+ parquet_vectorization_15.q ,\
+ parquet_vectorization_16.q ,\
+ parquet_vectorization_17.q ,\
+ parquet_vectorization_1.q ,\
+ parquet_vectorization_2.q ,\
+ parquet_vectorization_3.q ,\
+ parquet_vectorization_4.q ,\
+ parquet_vectorization_5.q ,\
+ parquet_vectorization_6.q ,\
+ parquet_vectorization_7.q ,\
+ parquet_vectorization_8.q ,\
+ parquet_vectorization_9.q ,\
+ parquet_vectorization_decimal_date.q ,\
+ parquet_vectorization_div0.q ,\
+ parquet_vectorization_limit.q ,\
+ parquet_vectorization_nested_udf.q ,\
+ parquet_vectorization_not.q ,\
+ parquet_vectorization_offset_limit.q ,\
+ parquet_vectorization_part_project.q ,\
+ parquet_vectorization_part.q ,\
+ parquet_vectorization_part_varchar.q ,\
+ parquet_vectorization_pushdown.q ,\
+ auto_join0.q, \
+ auto_join1.q, \
+ auto_join10.q, \
+ auto_join11.q, \
+ auto_join12.q, \
+ auto_join13.q, \
+ auto_join14.q, \
+ auto_join15.q, \
+ auto_join16.q, \
+ auto_join17.q, \
+ auto_join18.q, \
+ auto_join18_multi_distinct.q, \
+ auto_join19.q, \
+ auto_join2.q, \
+ auto_join20.q, \
+ auto_join21.q, \
+ auto_join22.q, \
+ auto_join23.q, \
+ auto_join24.q, \
+ auto_join26.q, \
+ auto_join27.q, \
+ auto_join28.q, \
+ auto_join29.q, \
+ auto_join3.q, \
+ auto_join30.q, \
+ auto_join31.q, \
+ auto_join32.q, \
+ auto_join4.q, \
+ auto_join5.q, \
+ auto_join6.q, \
+ auto_join7.q, \
+ auto_join8.q, \
+ auto_join9.q, \
+ auto_join_filters.q, \
+ auto_join_nulls.q, \
+ auto_join_reordering_values.q, \
+ auto_join_stats.q, \
+ auto_join_stats2.q, \
+ auto_join_without_localtask.q, \
+ auto_smb_mapjoin_14.q, \
+ auto_sortmerge_join_1.q, \
+ auto_sortmerge_join_12.q, \
+ auto_sortmerge_join_13.q, \
+ auto_sortmerge_join_14.q, \
+ auto_sortmerge_join_15.q, \
+ auto_sortmerge_join_16.q, \
+ auto_sortmerge_join_2.q, \
+ auto_sortmerge_join_3.q, \
+ auto_sortmerge_join_4.q, \
+ auto_sortmerge_join_5.q, \
+ auto_sortmerge_join_6.q, \
+ auto_sortmerge_join_7.q, \
+ auto_sortmerge_join_8.q, \
+ auto_sortmerge_join_9.q, \
+ avro_compression_enabled_native.q, \
+ avro_decimal_native.q, \
+ avro_joins.q, \
+ avro_joins_native.q, \
+ bucket2.q, \
+ bucket3.q, \
+ bucket4.q, \
+ bucket_map_join_1.q, \
+ bucket_map_join_2.q, \
+ bucket_map_join_spark1.q, \
+ bucket_map_join_spark2.q, \
+ bucket_map_join_spark3.q, \
+ bucket_map_join_spark4.q, \
+ bucket_map_join_tez1.q, \
+ bucket_map_join_tez2.q, \
+ bucketmapjoin1.q, \
+ bucketmapjoin10.q, \
+ bucketmapjoin11.q, \
+ bucketmapjoin12.q, \
+ bucketmapjoin13.q, \
+ bucketmapjoin2.q, \
+ bucketmapjoin3.q, \
+ bucketmapjoin4.q, \
+ bucketmapjoin5.q, \
+ bucketmapjoin7.q, \
+ bucketmapjoin8.q, \
+ bucketmapjoin9.q, \
+ bucketmapjoin_negative.q, \
+ bucketmapjoin_negative2.q, \
+ bucketmapjoin_negative3.q, \
+ bucketsortoptimize_insert_2.q, \
+ bucketsortoptimize_insert_4.q, \
+ bucketsortoptimize_insert_6.q, \
+ bucketsortoptimize_insert_7.q, \
+ bucketsortoptimize_insert_8.q, \
+ cbo_gby.q, \
+ cbo_gby_empty.q, \
+ cbo_limit.q, \
+ cbo_semijoin.q, \
+ cbo_simple_select.q, \
+ cbo_stats.q, \
+ cbo_subq_in.q, \
+ cbo_subq_not_in.q, \
+ cbo_udf_udaf.q, \
+ cbo_union.q, \
+ column_access_stats.q, \
+ count.q, \
+ create_merge_compressed.q, \
+ cross_join.q, \
+ cross_product_check_1.q, \
+ cross_product_check_2.q, \
+ ctas.q, \
+ custom_input_output_format.q, \
+ date_join1.q, \
+ date_udf.q, \
+ decimal_1_1.q, \
+ decimal_join.q, \
+ disable_merge_for_bucketing.q, \
+ enforce_order.q, \
+ escape_clusterby1.q, \
+ escape_distributeby1.q, \
+ escape_orderby1.q, \
+ escape_sortby1.q, \
+ explaindenpendencydiffengs.q, \
+ filter_join_breaktask.q, \
+ filter_join_breaktask2.q, \
+ groupby1.q, \
+ groupby10.q, \
+ groupby11.q, \
+ groupby1_map.q, \
+ groupby1_map_nomap.q, \
+ groupby1_map_skew.q, \
+ groupby1_noskew.q, \
+ groupby2.q, \
+ groupby2_map.q, \
+ groupby2_map_multi_distinct.q, \
+ groupby2_map_skew.q, \
+ groupby2_noskew.q, \
+ groupby2_noskew_multi_distinct.q, \
+ groupby3.q, \
+ groupby3_map.q, \
+ groupby3_map_multi_distinct.q, \
+ groupby3_map_skew.q, \
+ groupby3_noskew.q, \
+ groupby3_noskew_multi_distinct.q, \
+ groupby4.q, \
+ groupby4_map.q, \
+ groupby4_map_skew.q, \
+ groupby4_noskew.q, \
+ groupby5.q, \
+ groupby5_map.q, \
+ groupby5_map_skew.q, \
+ groupby5_noskew.q, \
+ groupby6.q, \
+ groupby6_map.q, \
+ groupby6_map_skew.q, \
+ groupby6_noskew.q, \
+ groupby7.q, \
+ groupby7_map.q, \
+ groupby7_map_multi_single_reducer.q, \
+ groupby7_map_skew.q, \
+ groupby7_noskew.q, \
+ groupby7_noskew_multi_single_reducer.q, \
+ groupby8.q, \
+ groupby8_map.q, \
+ groupby8_map_skew.q, \
+ groupby8_noskew.q, \
+ groupby9.q, \
+ groupby_bigdata.q, \
+ groupby_complex_types.q, \
+ groupby_complex_types_multi_single_reducer.q, \
+ groupby_cube1.q, \
+ groupby_grouping_id2.q, \
+ groupby_map_ppr.q, \
+ groupby_map_ppr_multi_distinct.q, \
+ groupby_multi_insert_common_distinct.q, \
+ groupby_multi_single_reducer.q, \
+ groupby_multi_single_reducer2.q, \
+ groupby_multi_single_reducer3.q, \
+ groupby_position.q, \
+ groupby_ppr.q, \
+ groupby_ppr_multi_distinct.q, \
+ groupby_resolution.q, \
+ groupby_rollup1.q, \
+ groupby_sort_1_23.q, \
+ groupby_sort_skew_1.q, \
+ groupby_sort_skew_1_23.q, \
+ qroupby_limit_extrastep.q, \
+ having.q, \
+ identity_project_remove_skip.q, \
+ index_auto_self_join.q, \
+ innerjoin.q, \
+ input12.q, \
+ input13.q, \
+ input14.q, \
+ input17.q, \
+ input18.q, \
+ input1_limit.q, \
+ input_part2.q, \
+ insert1.q, \
+ insert_into1.q, \
+ insert_into2.q, \
+ insert_into3.q, \
+ join0.q, \
+ join1.q, \
+ join10.q, \
+ join11.q, \
+ join12.q, \
+ join13.q, \
+ join14.q, \
+ join15.q, \
+ join16.q, \
+ join17.q, \
+ join18.q, \
+ join18_multi_distinct.q, \
+ join19.q, \
+ join2.q, \
+ join20.q, \
+ join21.q, \
+ join22.q, \
+ join23.q, \
+ join24.q, \
+ join25.q, \
+ join26.q, \
+ join27.q, \
+ join28.q, \
+ join29.q, \
+ join3.q, \
+ join30.q, \
+ join31.q, \
+ join32.q, \
+ join32_lessSize.q, \
+ join33.q, \
+ join34.q, \
+ join35.q, \
+ join36.q, \
+ join37.q, \
+ join38.q, \
+ join39.q, \
+ join4.q, \
+ join40.q, \
+ join41.q, \
+ join5.q, \
+ join6.q, \
+ join7.q, \
+ join8.q, \
+ join9.q, \
+ join_1to1.q, \
+ join_alt_syntax.q, \
+ join_array.q, \
+ join_casesensitive.q, \
+ join_cond_pushdown_1.q, \
+ join_cond_pushdown_2.q, \
+ join_cond_pushdown_3.q, \
+ join_cond_pushdown_4.q, \
+ join_cond_pushdown_unqual1.q, \
+ join_cond_pushdown_unqual2.q, \
+ join_cond_pushdown_unqual3.q, \
+ join_cond_pushdown_unqual4.q, \
+ join_empty.q, \
+ join_filters_overlap.q, \
+ join_hive_626.q, \
+ join_literals.q, \
+ join_map_ppr.q, \
+ join_merge_multi_expressions.q, \
+ join_merging.q, \
+ join_nullsafe.q, \
+ join_rc.q, \
+ join_reorder.q, \
+ join_reorder2.q, \
+ join_reorder3.q, \
+ join_reorder4.q, \
+ join_star.q, \
+ join_thrift.q, \
+ join_vc.q, \
+ join_view.q, \
+ lateral_view_explode2.q, \
+ lateral_view_multi_lateralviews.q, \
+ leftsemijoin.q, \
+ leftsemijoin_mr.q, \
+ limit_pushdown.q, \
+ limit_pushdown2.q, \
+ list_bucket_dml_2.q, \
+ load_dyn_part1.q, \
+ load_dyn_part10.q, \
+ load_dyn_part11.q, \
+ load_dyn_part12.q, \
+ load_dyn_part13.q, \
+ load_dyn_part14.q, \
+ load_dyn_part15.q, \
+ load_dyn_part2.q, \
+ load_dyn_part3.q, \
+ load_dyn_part4.q, \
+ load_dyn_part5.q, \
+ load_dyn_part6.q, \
+ load_dyn_part7.q, \
+ load_dyn_part8.q, \
+ load_dyn_part9.q, \
+ louter_join_ppr.q, \
+ mapjoin1.q, \
+ mapjoin_addjar.q, \
+ mapjoin_decimal.q, \
+ mapjoin_distinct.q, \
+ mapjoin_filter_on_outerjoin.q, \
+ mapjoin_mapjoin.q, \
+ mapjoin_memcheck.q, \
+ mapjoin_subquery.q, \
+ mapjoin_subquery2.q, \
+ mapjoin_test_outer.q, \
+ mapreduce1.q, \
+ mapreduce2.q, \
+ merge1.q, \
+ merge2.q, \
+ mergejoins.q, \
+ mergejoins_mixed.q, \
+ metadata_only_queries.q, \
+ metadata_only_queries_with_filters.q, \
+ multi_insert.q, \
+ multi_insert_gby.q, \
+ multi_insert_gby2.q, \
+ multi_insert_gby3.q, \
+ multi_insert_lateral_view.q, \
+ multi_insert_mixed.q, \
+ multi_insert_move_tasks_share_dependencies.q, \
+ multi_insert_with_join.q, \
+ multi_join_union.q, \
+ multi_join_union_src.q, \
+ multigroupby_singlemr.q, \
+ nullgroup.q, \
+ nullgroup2.q, \
+ nullgroup4.q, \
+ nullgroup4_multi_distinct.q, \
+ optimize_nullscan.q, \
+ order.q, \
+ order2.q, \
+ outer_join_ppr.q, \
+ parallel.q, \
+ parallel_join0.q, \
+ parallel_join1.q, \
+ parquet_join.q, \
+ pcr.q, \
+ ppd_gby_join.q, \
+ ppd_join.q, \
+ ppd_join2.q, \
+ ppd_join3.q, \
+ ppd_join4.q, \
+ ppd_join5.q, \
+ ppd_join_filter.q, \
+ ppd_multi_insert.q, \
+ ppd_outer_join1.q, \
+ ppd_outer_join2.q, \
+ ppd_outer_join3.q, \
+ ppd_outer_join4.q, \
+ ppd_outer_join5.q, \
+ ppd_transform.q, \
+ ptf.q, \
+ ptf_decimal.q, \
+ ptf_general_queries.q, \
+ ptf_matchpath.q, \
+ ptf_rcfile.q, \
+ ptf_register_tblfn.q, \
+ ptf_seqfile.q, \
+ ptf_streaming.q, \
+ rcfile_bigdata.q, \
+ reduce_deduplicate_exclude_join.q, \
+ router_join_ppr.q, \
+ runtime_skewjoin_mapjoin_spark.q, \
+ sample1.q, \
+ sample10.q, \
+ sample2.q, \
+ sample3.q, \
+ sample4.q, \
+ sample5.q, \
+ sample6.q, \
+ sample7.q, \
+ sample8.q, \
+ sample9.q, \
+ script_env_var1.q, \
+ script_env_var2.q, \
+ script_pipe.q, \
+ scriptfile1.q, \
+ semijoin.q, \
+ skewjoin.q, \
+ skewjoin_noskew.q, \
+ skewjoin_union_remove_1.q, \
+ skewjoin_union_remove_2.q, \
+ skewjoinopt1.q, \
+ skewjoinopt10.q, \
+ skewjoinopt11.q, \
+ skewjoinopt12.q, \
+ skewjoinopt13.q, \
+ skewjoinopt14.q, \
+ skewjoinopt15.q, \
+ skewjoinopt16.q, \
+ skewjoinopt17.q, \
+ skewjoinopt18.q, \
+ skewjoinopt19.q, \
+ skewjoinopt2.q, \
+ skewjoinopt20.q, \
+ skewjoinopt3.q, \
+ skewjoinopt4.q, \
+ skewjoinopt5.q, \
+ skewjoinopt6.q, \
+ skewjoinopt7.q, \
+ skewjoinopt8.q, \
+ skewjoinopt9.q, \
+ smb_mapjoin_1.q, \
+ smb_mapjoin_10.q, \
+ smb_mapjoin_11.q, \
+ smb_mapjoin_12.q, \
+ smb_mapjoin_13.q, \
+ smb_mapjoin_14.q, \
+ smb_mapjoin_15.q, \
+ smb_mapjoin_16.q, \
+ smb_mapjoin_17.q, \
+ smb_mapjoin_18.q, \
+ smb_mapjoin_19.q, \
+ smb_mapjoin_2.q, \
+ smb_mapjoin_20.q, \
+ smb_mapjoin_21.q, \
+ smb_mapjoin_22.q, \
+ smb_mapjoin_25.q, \
+ smb_mapjoin_3.q, \
+ smb_mapjoin_4.q, \
+ smb_mapjoin_5.q, \
+ smb_mapjoin_6.q, \
+ smb_mapjoin_7.q, \
+ smb_mapjoin_8.q, \
+ smb_mapjoin_9.q, \
+ sort.q, \
+ stats0.q, \
+ stats1.q, \
+ stats10.q, \
+ stats12.q, \
+ stats13.q, \
+ stats14.q, \
+ stats15.q, \
+ stats16.q, \
+ stats18.q, \
+ stats2.q, \
+ stats3.q, \
+ stats5.q, \
+ stats6.q, \
+ stats7.q, \
+ stats8.q, \
+ stats9.q, \
+ stats_noscan_1.q, \
+ stats_noscan_2.q, \
+ stats_only_null.q, \
+ stats_partscan_1_23.q, \
+ statsfs.q, \
+ subquery_exists.q, \
+ subquery_in.q, \
+ subquery_multi.q,\
+ subquery_multiinsert.q, \
+ subquery_nested_subquery.q, \
+ subquery_notin.q,\
+ subquery_null_agg.q,\
+ subquery_scalar.q,\
+ subquery_select.q, \
+ subquery_shared_alias.q, \
+ subquery_views.q,\
+ table_access_keys_stats.q, \
+ temp_table.q, \
+ temp_table_gb1.q, \
+ temp_table_join1.q, \
+ tez_join_tests.q, \
+ tez_joins_explain.q, \
+ timestamp_1.q, \
+ timestamp_2.q, \
+ timestamp_3.q, \
+ timestamp_comparison.q, \
+ timestamp_lazy.q, \
+ timestamp_null.q, \
+ timestamp_udf.q, \
+ transform1.q, \
+ transform2.q, \
+ transform_ppr1.q, \
+ transform_ppr2.q, \
+ udaf_collect_set.q, \
+ udf_example_add.q, \
+ udf_in_file.q, \
+ udf_max.q, \
+ udf_min.q, \
+ udf_percentile.q, \
+ union.q, \
+ union10.q, \
+ union11.q, \
+ union12.q, \
+ union13.q, \
+ union14.q, \
+ union15.q, \
+ union16.q, \
+ union17.q, \
+ union18.q, \
+ union19.q, \
+ union2.q, \
+ union20.q, \
+ union21.q, \
+ union22.q, \
+ union23.q, \
+ union24.q, \
+ union25.q, \
+ union26.q, \
+ union27.q, \
+ union28.q, \
+ union29.q, \
+ union3.q, \
+ union30.q, \
+ union31.q, \
+ union32.q, \
+ union33.q, \
+ union34.q, \
+ union4.q, \
+ union5.q, \
+ union6.q, \
+ union7.q, \
+ union8.q, \
+ union9.q, \
+ union_date.q, \
+ union_date_trim.q, \
+ union_lateralview.q, \
+ union_null.q, \
+ union_ppr.q, \
+ union_remove_1.q, \
+ union_remove_10.q, \
+ union_remove_11.q, \
+ union_remove_12.q, \
+ union_remove_13.q, \
+ union_remove_14.q, \
+ union_remove_15.q, \
+ union_remove_16.q, \
+ union_remove_17.q, \
+ union_remove_18.q, \
+ union_remove_19.q, \
+ union_remove_2.q, \
+ union_remove_20.q, \
+ union_remove_21.q, \
+ union_remove_22.q, \
+ union_remove_23.q, \
+ union_remove_24.q, \
+ union_remove_25.q, \
+ union_remove_3.q, \
+ union_remove_4.q, \
+ union_remove_5.q, \
+ union_remove_6.q, \
+ union_remove_6_subq.q, \
+ union_remove_7.q, \
+ union_remove_8.q, \
+ union_remove_9.q, \
+ union_script.q, \
+ union_top_level.q, \
+ union_view.q, \
+ uniquejoin.q, \
+ varchar_join1.q, \
+ vector_between_in.q, \
+ vector_cast_constant.q, \
+ vector_char_4.q, \
+ vector_count_distinct.q, \
+ vector_data_types.q, \
+ vector_decimal_aggregate.q, \
+ vector_decimal_mapjoin.q, \
+ vector_distinct_2.q, \
+ vector_elt.q, \
+ vector_groupby_3.q, \
+ vector_left_outer_join.q, \
+ vector_mapjoin_reduce.q, \
+ vector_orderby_5.q, \
+ vector_string_concat.q, \
+ vector_varchar_4.q, \
+ vectorization_0.q, \
+ vectorization_1.q, \
+ vectorization_10.q, \
+ vectorization_11.q, \
+ vectorization_12.q, \
+ vectorization_13.q, \
+ vectorization_14.q, \
+ vectorization_15.q, \
+ vectorization_16.q, \
+ vectorization_17.q, \
+ vectorization_2.q, \
+ vectorization_3.q, \
+ vectorization_4.q, \
+ vectorization_5.q, \
+ vectorization_6.q, \
+ vectorization_9.q, \
+ vectorization_decimal_date.q, \
+ vectorization_div0.q, \
+ vectorization_input_format_excludes.q, \
+ vectorization_nested_udf.q, \
+ vectorization_not.q, \
+ vectorization_part.q, \
+ vectorization_part_project.q, \
+ vectorization_parquet_projection.q, \
+ vectorization_pushdown.q, \
+ vectorization_short_regress.q, \
+ vectorized_case.q, \
+ vectorized_mapjoin.q, \
+ vectorized_math_funcs.q, \
+ vectorized_nested_mapjoin.q, \
+ vectorized_ptf.q, \
+ vectorized_rcfile_columnar.q, \
+ vectorized_shufflejoin.q, \
+ vectorized_string_funcs.q, \
+ vectorized_timestamp_funcs.q, \
+ windowing.q
+
+# Unlike "spark.query.files" above, these tests only run
+# under Spark engine and only use TestSparkCliDriver.
+spark.only.query.files=spark_union_merge.q,\
+ spark_combine_equivalent_work_2.q
+
+# Unlike "miniSparkOnYarn.query.files" below, these tests only run
+# under Spark engine and only use TestMiniSparkOnYarnCliDriver.
+miniSparkOnYarn.only.query.files=spark_combine_equivalent_work.q,\
+ spark_dynamic_partition_pruning.q,\
+ spark_dynamic_partition_pruning_2.q,\
+ spark_dynamic_partition_pruning_3.q,\
+ spark_dynamic_partition_pruning_4.q,\
+ spark_dynamic_partition_pruning_5.q,\
+ spark_dynamic_partition_pruning_6.q,\
+ spark_dynamic_partition_pruning_mapjoin_only.q,\
+ spark_constprog_dpp.q,\
+ spark_dynamic_partition_pruning_recursive_mapjoin.q,\
+ dynamic_rdd_cache.q, \
+ spark_multi_insert_parallel_orderby.q,\
+ spark_explainuser_1.q,\
+ spark_vectorized_dynamic_partition_pruning.q,\
+ spark_use_ts_stats_for_mapjoin.q,\
+ spark_use_op_stats.q,\
+ spark_explain_groupbyshuffle.q,\
+ spark_opt_shuffle_serde.q
+
+miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
+ bucket4.q,\
+ bucket5.q,\
+ bucket6.q,\
+ bucketizedhiveinputformat.q,\
+ bucketmapjoin6.q,\
+ bucketmapjoin7.q,\
+ constprog_partitioner.q,\
+ constprog_semijoin.q,\
+ disable_merge_for_bucketing.q,\
+ empty_dir_in_table.q,\
+ external_table_with_space_in_location_path.q,\
+ file_with_header_footer.q,\
+ gen_udf_example_add10.q,\
+ import_exported_table.q,\
+ index_bitmap3.q,\
+ index_bitmap_auto.q,\
+ infer_bucket_sort_bucketed_table.q,\
+ infer_bucket_sort_map_operators.q,\
+ infer_bucket_sort_merge.q,\
+ infer_bucket_sort_num_buckets.q,\
+ infer_bucket_sort_reducers_power_two.q,\
+ input16_cc.q,\
+ insert_overwrite_directory2.q,\
+ leftsemijoin_mr.q,\
+ list_bucket_dml_10.q,\
+ load_fs2.q,\
+ load_hdfs_file_with_space_in_the_name.q,\
+ orc_merge1.q,\
+ orc_merge2.q,\
+ orc_merge3.q,\
+ orc_merge4.q,\
+ orc_merge5.q,\
+ orc_merge6.q,\
+ orc_merge7.q,\
+ orc_merge8.q,\
+ orc_merge9.q,\
+ orc_merge_diff_fs.q,\
+ orc_merge_incompat1.q,\
+ orc_merge_incompat2.q,\
+ parallel_orderby.q,\
+ quotedid_smb.q,\
+ reduce_deduplicate.q,\
+ remote_script.q,\
+ root_dir_external_table.q,\
+ schemeAuthority.q,\
+ schemeAuthority2.q,\
+ scriptfile1.q,\
+ scriptfile1_win.q,\
+ temp_table_external.q,\
+ truncate_column_buckets.q,\
+ uber_reduce.q,\
+ vector_inner_join.q,\
+ vector_outer_join0.q,\
+ vector_outer_join1.q,\
+ vector_outer_join2.q,\
+ vector_outer_join3.q,\
+ vector_outer_join4.q,\
+ vector_outer_join5.q
+
+# These tests are removed from miniSparkOnYarn.query.files
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# smb_mapjoin_8.q,\
+
+localSpark.only.query.files=spark_local_queries.q
+
+spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
+ groupby2_multi_distinct.q,\
+ groupby3_map_skew_multi_distinct.q,\
+ groupby3_multi_distinct.q,\
+ groupby_grouping_sets7.q,\
+ spark_job_max_tasks.q,\
+ spark_stage_max_tasks.q
+
+spark.perf.disabled.query.files=query14.q,\
+ query64.q
+
+druid.query.files=druidmini_test1.q,\
+ druidmini_test_insert.q,\
+ druidmini_mv.q,\
+ druid_timestamptz.q,\
+ druidmini_dynamic_partition.q,\
+ druidmini_expressions.q,\
+ druidmini_extractTime.q,\
+ druidmini_test_alter.q,\
+ druidmini_floorTime.q
+
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
index f429308..d0961b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
@@ -90,6 +90,9 @@ public class VectorExtractRow {
TypeInfo[] typeInfos;
ObjectInspector[] objectInspectors;
+ private static final byte[] EMPTY_BYTES = new byte[0];
+ private static final String EMPTY_STRING = "";
+
/*
* Allocate the various arrays.
*/
@@ -257,18 +260,15 @@ public class VectorExtractRow {
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
- if (bytesColVector.isRepeating) {
- if (!bytesColVector.isNull[0] && bytes == null) {
+ BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
+ if (bytes == null || length == 0) {
+ if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
+ bytesWritable.set(EMPTY_BYTES, 0, 0);
} else {
- if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
- nullBytesReadError(primitiveCategory, batchIndex);
- }
+ bytesWritable.set(bytes, start, length);
}
-
- BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
- bytesWritable.set(bytes, start, length);
return primitiveWritable;
}
case STRING:
@@ -279,18 +279,16 @@ public class VectorExtractRow {
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
- if (bytesColVector.isRepeating) {
- if (!bytesColVector.isNull[0] && bytes == null) {
+ if (bytes == null || length == 0) {
+ if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
+ ((Text) primitiveWritable).set(EMPTY_BYTES, 0, 0);
} else {
- if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
- nullBytesReadError(primitiveCategory, batchIndex);
- }
- }
- // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
- ((Text) primitiveWritable).set(bytes, start, length);
+ // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
+ ((Text) primitiveWritable).set(bytes, start, length);
+ }
return primitiveWritable;
}
case VARCHAR:
@@ -301,21 +299,23 @@ public class VectorExtractRow {
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
- if (bytesColVector.isRepeating) {
- if (!bytesColVector.isNull[0] && bytes == null) {
+ final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
+ if (bytes == null || length == 0) {
+ if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
+ hiveVarcharWritable.set(EMPTY_STRING, -1);
} else {
- if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
- nullBytesReadError(primitiveCategory, batchIndex);
+ final int adjustedLength =
+ StringExpr.truncate(
+ bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+ if (adjustedLength == 0) {
+ hiveVarcharWritable.set(EMPTY_STRING, -1);
+ } else {
+ hiveVarcharWritable.set(
+ new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
}
}
-
- final int adjustedLength = StringExpr.truncate(bytes, start, length,
- ((VarcharTypeInfo) primitiveTypeInfo).getLength());
-
- final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
- hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
return primitiveWritable;
}
case CHAR:
@@ -326,22 +326,24 @@ public class VectorExtractRow {
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
- if (bytesColVector.isRepeating) {
- if (!bytesColVector.isNull[0] && bytes == null) {
+ final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
+ final int maxLength = ((CharTypeInfo) primitiveTypeInfo).getLength();
+ if (bytes == null || length == 0) {
+ if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
+ hiveCharWritable.set(EMPTY_STRING, maxLength);
} else {
- if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) {
- nullBytesReadError(primitiveCategory, batchIndex);
+ final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length,
+ ((CharTypeInfo) primitiveTypeInfo).getLength());
+
+ if (adjustedLength == 0) {
+ hiveCharWritable.set(EMPTY_STRING, maxLength);
+ } else {
+ hiveCharWritable.set(
+ new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLength);
}
}
-
- final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length,
- ((CharTypeInfo) primitiveTypeInfo).getLength());
-
- final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
- hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8),
- ((CharTypeInfo) primitiveTypeInfo).getLength());
return primitiveWritable;
}
case DECIMAL:
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java
index 0507fa5..b40126a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,36 +46,96 @@ public class VectorUDFStructField extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector outV = batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
StructColumnVector structColumnVector = (StructColumnVector) batch.cols[structColumnNum];
ColumnVector fieldColumnVector = structColumnVector.fields[fieldIndex];
- outV.noNulls = true;
+ boolean[] inputIsNull = structColumnVector.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
if (structColumnVector.isRepeating) {
- if (structColumnVector.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- } else {
+ if (structColumnVector.noNulls || !structColumnVector.isNull[0]) {
+ outputIsNull[0] = false;
outV.setElement(0, 0, fieldColumnVector);
- outV.isNull[0] = false;
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
}
outV.isRepeating = true;
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (structColumnVector.isNull[j]) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ return;
+ }
+ if (structColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputIsNull[i] = false;
+ outV.setElement(i, i, fieldColumnVector);
+ }
} else {
- outV.setElement(j, j, fieldColumnVector);
- outV.isNull[j] = false;
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outV.setElement(i, i, fieldColumnVector);
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outV.setElement(i, i, fieldColumnVector);
+ }
+ }
+ } else /* there are NULLs in the structColumnVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!inputIsNull[i]) {
+ outputIsNull[i] = false;
+ outV.setElement(i, i, fieldColumnVector);
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!inputIsNull[i]) {
+ outputIsNull[i] = false;
+ outV.setElement(i, i, fieldColumnVector);
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
- outV.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/test/queries/clientpositive/create_struct_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/create_struct_table.q b/ql/src/test/queries/clientpositive/create_struct_table.q
index 1e5d151..fafe52c 100644
--- a/ql/src/test/queries/clientpositive/create_struct_table.q
+++ b/ql/src/test/queries/clientpositive/create_struct_table.q
@@ -1,12 +1,35 @@
+SET hive.vectorized.execution.enabled=false;
-create table abc(strct struct<a:int, b:string, c:string>)
+-- The kv1 input file has 2 data fields, so when the 3 field struct is deserialized,
+-- the premature end will put a NULL in field #3.
+create table string_fields(strct struct<a:int, b:string, c:string>)
row format delimited
fields terminated by '\t'
collection items terminated by '\001';
load data local inpath '../../data/files/kv1.txt'
-overwrite into table abc;
+overwrite into table string_fields;
-SELECT strct, strct.a, strct.b FROM abc LIMIT 10;
+SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10;
+create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001';
+
+load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields;
+
+SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10;
+
+
+create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001';
+
+load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields;
+
+SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/test/queries/clientpositive/vector_create_struct_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_create_struct_table.q b/ql/src/test/queries/clientpositive/vector_create_struct_table.q
new file mode 100644
index 0000000..db26cb2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_create_struct_table.q
@@ -0,0 +1,45 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- The kv1 input file has 2 data fields, so when the 3 field struct is deserialized,
+-- the premature end will put a NULL in field #3.
+create table string_fields(strct struct<a:int, b:string, c:string>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001';
+
+load data local inpath '../../data/files/kv1.txt'
+overwrite into table string_fields;
+
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10;
+
+SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10;
+
+
+create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001';
+
+load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields;
+
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10;
+
+SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10;
+
+
+create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001';
+
+load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields;
+
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10;
+
+SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/test/results/clientpositive/create_struct_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/create_struct_table.q.out b/ql/src/test/results/clientpositive/create_struct_table.q.out
index f4c7829..f2fd893 100644
--- a/ql/src/test/results/clientpositive/create_struct_table.q.out
+++ b/ql/src/test/results/clientpositive/create_struct_table.q.out
@@ -1,42 +1,126 @@
-PREHOOK: query: create table abc(strct struct<a:int, b:string, c:string>)
+PREHOOK: query: create table string_fields(strct struct<a:int, b:string, c:string>)
row format delimited
fields terminated by '\t'
collection items terminated by '\001'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@abc
-POSTHOOK: query: create table abc(strct struct<a:int, b:string, c:string>)
+PREHOOK: Output: default@string_fields
+POSTHOOK: query: create table string_fields(strct struct<a:int, b:string, c:string>)
row format delimited
fields terminated by '\t'
collection items terminated by '\001'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@abc
+POSTHOOK: Output: default@string_fields
PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
-overwrite into table abc
+overwrite into table string_fields
PREHOOK: type: LOAD
#### A masked pattern was here ####
-PREHOOK: Output: default@abc
+PREHOOK: Output: default@string_fields
POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
-overwrite into table abc
+overwrite into table string_fields
POSTHOOK: type: LOAD
#### A masked pattern was here ####
-POSTHOOK: Output: default@abc
-PREHOOK: query: SELECT strct, strct.a, strct.b FROM abc LIMIT 10
+POSTHOOK: Output: default@string_fields
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
PREHOOK: type: QUERY
-PREHOOK: Input: default@abc
+PREHOOK: Input: default@string_fields
#### A masked pattern was here ####
-POSTHOOK: query: SELECT strct, strct.a, strct.b FROM abc LIMIT 10
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@abc
-#### A masked pattern was here ####
-{"a":238,"b":"val_238","c":null} 238 val_238
-{"a":86,"b":"val_86","c":null} 86 val_86
-{"a":311,"b":"val_311","c":null} 311 val_311
-{"a":27,"b":"val_27","c":null} 27 val_27
-{"a":165,"b":"val_165","c":null} 165 val_165
-{"a":409,"b":"val_409","c":null} 409 val_409
-{"a":255,"b":"val_255","c":null} 255 val_255
-{"a":278,"b":"val_278","c":null} 278 val_278
-{"a":98,"b":"val_98","c":null} 98 val_98
-{"a":484,"b":"val_484","c":null} 484 val_484
+POSTHOOK: Input: default@string_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_238","c":null} 238 val_238 NULL
+{"a":86,"b":"val_86","c":null} 86 val_86 NULL
+{"a":311,"b":"val_311","c":null} 311 val_311 NULL
+{"a":27,"b":"val_27","c":null} 27 val_27 NULL
+{"a":165,"b":"val_165","c":null} 165 val_165 NULL
+{"a":409,"b":"val_409","c":null} 409 val_409 NULL
+{"a":255,"b":"val_255","c":null} 255 val_255 NULL
+{"a":278,"b":"val_278","c":null} 278 val_278 NULL
+{"a":98,"b":"val_98","c":null} 98 val_98 NULL
+{"a":484,"b":"val_484","c":null} 484 val_484 NULL
+PREHOOK: query: create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_fields
+POSTHOOK: query: create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_fields
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@char_fields
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@char_fields
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_fields
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_238 ","c":null} 238 val_238 NULL
+{"a":86,"b":"val_86 ","c":null} 86 val_86 NULL
+{"a":311,"b":"val_311 ","c":null} 311 val_311 NULL
+{"a":27,"b":"val_27 ","c":null} 27 val_27 NULL
+{"a":165,"b":"val_165 ","c":null} 165 val_165 NULL
+{"a":409,"b":"val_409 ","c":null} 409 val_409 NULL
+{"a":255,"b":"val_255 ","c":null} 255 val_255 NULL
+{"a":278,"b":"val_278 ","c":null} 278 val_278 NULL
+{"a":98,"b":"val_98 ","c":null} 98 val_98 NULL
+{"a":484,"b":"val_484 ","c":null} 484 val_484 NULL
+PREHOOK: query: create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchar_fields
+POSTHOOK: query: create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchar_fields
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@varchar_fields
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@varchar_fields
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_fields
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_2","c":null} 238 val_2 NULL
+{"a":86,"b":"val_8","c":null} 86 val_8 NULL
+{"a":311,"b":"val_3","c":null} 311 val_3 NULL
+{"a":27,"b":"val_2","c":null} 27 val_2 NULL
+{"a":165,"b":"val_1","c":null} 165 val_1 NULL
+{"a":409,"b":"val_4","c":null} 409 val_4 NULL
+{"a":255,"b":"val_2","c":null} 255 val_2 NULL
+{"a":278,"b":"val_2","c":null} 278 val_2 NULL
+{"a":98,"b":"val_9","c":null} 98 val_9 NULL
+{"a":484,"b":"val_4","c":null} 484 val_4 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/ql/src/test/results/clientpositive/llap/vector_create_struct_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_create_struct_table.q.out b/ql/src/test/results/clientpositive/llap/vector_create_struct_table.q.out
new file mode 100644
index 0000000..14d1803
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_create_struct_table.q.out
@@ -0,0 +1,336 @@
+PREHOOK: query: create table string_fields(strct struct<a:int, b:string, c:string>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@string_fields
+POSTHOOK: query: create table string_fields(strct struct<a:int, b:string, c:string>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@string_fields
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table string_fields
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@string_fields
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table string_fields
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@string_fields
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: string_fields
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: strct (type: struct<a:int,b:string,c:string>), strct.a (type: int), strct.b (type: string), strct.c (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4]
+ selectExpressions: VectorUDFStructField(col 0:struct<a:int,b:string,c:string>, col 0:int) -> 2:int, VectorUDFStructField(col 0:struct<a:int,b:string,c:string>, col 1:int) -> 3:string, VectorUDFStructField(col 0:struct<a:int,b:string,c:string>, col 2:int) -> 4:string
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@string_fields
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM string_fields LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@string_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_238","c":null} 238 val_238 NULL
+{"a":86,"b":"val_86","c":null} 86 val_86 NULL
+{"a":311,"b":"val_311","c":null} 311 val_311 NULL
+{"a":27,"b":"val_27","c":null} 27 val_27 NULL
+{"a":165,"b":"val_165","c":null} 165 val_165 NULL
+{"a":409,"b":"val_409","c":null} 409 val_409 NULL
+{"a":255,"b":"val_255","c":null} 255 val_255 NULL
+{"a":278,"b":"val_278","c":null} 278 val_278 NULL
+{"a":98,"b":"val_98","c":null} 98 val_98 NULL
+{"a":484,"b":"val_484","c":null} 484 val_484 NULL
+PREHOOK: query: create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@char_fields
+POSTHOOK: query: create table char_fields(strct struct<a:int, b:char(10), c:char(10)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@char_fields
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@char_fields
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table char_fields
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@char_fields
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: char_fields
+ Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: strct (type: struct<a:int,b:char(10),c:char(10)>), strct.a (type: int), strct.b (type: char(10)), strct.c (type: char(10))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4]
+ selectExpressions: VectorUDFStructField(col 0:struct<a:int,b:char(10),c:char(10)>, col 0:int) -> 2:int, VectorUDFStructField(col 0:struct<a:int,b:char(10),c:char(10)>, col 1:int) -> 3:char(10), VectorUDFStructField(col 0:struct<a:int,b:char(10),c:char(10)>, col 2:int) -> 4:char(10)
+ Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@char_fields
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM char_fields LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@char_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_238 ","c":null} 238 val_238 NULL
+{"a":86,"b":"val_86 ","c":null} 86 val_86 NULL
+{"a":311,"b":"val_311 ","c":null} 311 val_311 NULL
+{"a":27,"b":"val_27 ","c":null} 27 val_27 NULL
+{"a":165,"b":"val_165 ","c":null} 165 val_165 NULL
+{"a":409,"b":"val_409 ","c":null} 409 val_409 NULL
+{"a":255,"b":"val_255 ","c":null} 255 val_255 NULL
+{"a":278,"b":"val_278 ","c":null} 278 val_278 NULL
+{"a":98,"b":"val_98 ","c":null} 98 val_98 NULL
+{"a":484,"b":"val_484 ","c":null} 484 val_484 NULL
+PREHOOK: query: create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchar_fields
+POSTHOOK: query: create table varchar_fields(strct struct<a:int, b:varchar(5), c:varchar(5)>)
+row format delimited
+ fields terminated by '\t'
+ collection items terminated by '\001'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchar_fields
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@varchar_fields
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt'
+overwrite into table varchar_fields
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@varchar_fields
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: varchar_fields
+ Statistics: Num rows: 1 Data size: 238 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: strct (type: struct<a:int,b:varchar(5),c:varchar(5)>), strct.a (type: int), strct.b (type: varchar(5)), strct.c (type: varchar(5))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4]
+ selectExpressions: VectorUDFStructField(col 0:struct<a:int,b:varchar(5),c:varchar(5)>, col 0:int) -> 2:int, VectorUDFStructField(col 0:struct<a:int,b:varchar(5),c:varchar(5)>, col 1:int) -> 3:varchar(5), VectorUDFStructField(col 0:struct<a:int,b:varchar(5),c:varchar(5)>, col 2:int) -> 4:varchar(5)
+ Statistics: Num rows: 1 Data size: 238 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 238 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 238 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_fields
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, strct.a, strct.b, strct.c FROM varchar_fields LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_fields
+#### A masked pattern was here ####
+{"a":238,"b":"val_2","c":null} 238 val_2 NULL
+{"a":86,"b":"val_8","c":null} 86 val_8 NULL
+{"a":311,"b":"val_3","c":null} 311 val_3 NULL
+{"a":27,"b":"val_2","c":null} 27 val_2 NULL
+{"a":165,"b":"val_1","c":null} 165 val_1 NULL
+{"a":409,"b":"val_4","c":null} 409 val_4 NULL
+{"a":255,"b":"val_2","c":null} 255 val_2 NULL
+{"a":278,"b":"val_2","c":null} 278 val_2 NULL
+{"a":98,"b":"val_9","c":null} 98 val_9 NULL
+{"a":484,"b":"val_4","c":null} 484 val_4 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/eea73613/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index fe0ee48..dd88da8 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -497,6 +497,12 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private int parseComplexField(int start, int end, int level) {
+ if (start == end + 1) {
+
+ // Data prematurely ended. Return start - 1 so we don't move our field position.
+ return start - 1;
+ }
+
final byte separator = separators[level];
int fieldByteEnd = start;
@@ -996,7 +1002,9 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
final ListComplexTypeHelper listHelper = (ListComplexTypeHelper) complexTypeHelper;
final int fieldPosition = listHelper.fieldPosition;
final int complexFieldEnd = listHelper.complexFieldEnd;
- Preconditions.checkState(fieldPosition <= complexFieldEnd);
+
+ // When data is prematurely ended the fieldPosition will be 1 more than the end.
+ Preconditions.checkState(fieldPosition <= complexFieldEnd + 1);
final int fieldEnd = parseComplexField(fieldPosition, complexFieldEnd, currentLevel);
listHelper.fieldPosition = fieldEnd + 1; // Move past separator.
@@ -1011,7 +1019,9 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
final MapComplexTypeHelper mapHelper = (MapComplexTypeHelper) complexTypeHelper;
final int fieldPosition = mapHelper.fieldPosition;
final int complexFieldEnd = mapHelper.complexFieldEnd;
- Preconditions.checkState(fieldPosition <= complexFieldEnd);
+
+ // When data is prematurely ended the fieldPosition will be 1 more than the end.
+ Preconditions.checkState(fieldPosition <= complexFieldEnd + 1);
currentFieldStart = fieldPosition;
@@ -1057,7 +1067,9 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
final StructComplexTypeHelper structHelper = (StructComplexTypeHelper) complexTypeHelper;
final int fieldPosition = structHelper.fieldPosition;
final int complexFieldEnd = structHelper.complexFieldEnd;
- Preconditions.checkState(fieldPosition <= complexFieldEnd);
+
+ // When data is prematurely ended the fieldPosition will be 1 more than the end.
+ Preconditions.checkState(fieldPosition <= complexFieldEnd + 1);
currentFieldStart = fieldPosition;
@@ -1069,7 +1081,7 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
// Parse until field separator (currentLevel).
fieldEnd = parseComplexField(fieldPosition, complexFieldEnd, currentLevel);
- structHelper.fieldPosition = fieldEnd + 1; // Move past key separator.
+ structHelper.fieldPosition = fieldEnd + 1; // Move past parent field separator.
currentFieldLength = fieldEnd - fieldPosition;
@@ -1101,7 +1113,9 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
final UnionComplexTypeHelper unionHelper = (UnionComplexTypeHelper) complexTypeHelper;
final int fieldPosition = unionHelper.fieldPosition;
final int complexFieldEnd = unionHelper.complexFieldEnd;
- Preconditions.checkState(fieldPosition <= complexFieldEnd);
+
+ // When data is prematurely ended the fieldPosition will be 1 more than the end.
+ Preconditions.checkState(fieldPosition <= complexFieldEnd + 1);
currentFieldStart = fieldPosition;