You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/30 13:31:52 UTC
svn commit: r1635477 [1/17] - in /hive/branches/spark:
common/src/java/org/apache/hadoop/hive/common/ data/conf/spark/
ql/src/java/org/apache/hadoop/hive/ql/exec/spark/
ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/
ql/src/java/org/apache/ha...
Author: xuefu
Date: Thu Oct 30 12:31:47 2014
New Revision: 1635477
URL: http://svn.apache.org/r1635477
Log:
HIVE-8539: Enable collect table statistics based on SparkCounter[Spark Branch] (Chengxiang via Xuefu)
Modified:
hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
hive/branches/spark/data/conf/spark/hive-site.xml
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join24.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join25.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join26.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/create_merge_compressed.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/ctas.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/custom_input_output_format.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/date_udf.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/enforce_order.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/innerjoin.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input12.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input1_limit.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input_part2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/insert1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join24.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join25.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join26.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join27.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join28.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join29.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join30.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join31.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join32.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join33.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join36.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join37.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join38.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join39.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join41.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_rc.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part12.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapreduce1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/mapreduce2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/parquet_join.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/pcr.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sample7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/scriptfile1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/temp_table.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_lazy.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_udf.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/transform1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union25.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union28.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union29.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union30.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union33.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_part.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_rcfile_columnar.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/windowing.q.out
Modified: hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java (original)
+++ hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java Thu Oct 30 12:31:47 2014
@@ -55,6 +55,8 @@ public class StatsSetupConst {
public String getAggregator(Configuration conf) {
if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez";
+ } else if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorSpark";
}
return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; }
},
Modified: hive/branches/spark/data/conf/spark/hive-site.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/data/conf/spark/hive-site.xml?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/data/conf/spark/hive-site.xml (original)
+++ hive/branches/spark/data/conf/spark/hive-site.xml Thu Oct 30 12:31:47 2014
@@ -185,7 +185,7 @@
<property>
<name>hive.stats.dbclass</name>
- <value>counter</value>
+ <value>fs</value>
<description>The default storatge that stores temporary hive statistics. Currently, jdbc, hbase and counter type is supported</description>
</property>
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java Thu Oct 30 12:31:47 2014
@@ -25,14 +25,13 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounter;
-import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounterGroup;
import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
-import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
import org.apache.hadoop.hive.ql.exec.spark.status.impl.JobStateListener;
import org.apache.hadoop.hive.ql.exec.spark.status.impl.SimpleSparkJobStatus;
import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -151,7 +150,7 @@ public class SparkClient implements Seri
return sparkConf;
}
- public int execute(DriverContext driverContext, SparkWork sparkWork) {
+ public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Context ctx = driverContext.getCtx();
HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
@@ -159,49 +158,35 @@ public class SparkClient implements Seri
// Create temporary scratch dir
Path emptyScratchDir;
- try {
- emptyScratchDir = ctx.getMRTmpPath();
- FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
- fs.mkdirs(emptyScratchDir);
- } catch (IOException e) {
- LOG.error("Error launching map-reduce job", e);
- return 5;
- }
+ emptyScratchDir = ctx.getMRTmpPath();
+ FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
+ fs.mkdirs(emptyScratchDir);
SparkCounters sparkCounters = new SparkCounters(sc, hiveConf);
+ List<String> prefixes = sparkWork.getRequiredCounterPrefix();
+ // register spark counters before submit spark job.
+ if (prefixes != null) {
+ for (String prefix : prefixes) {
+ sparkCounters.createCounter(prefix, StatsSetupConst.ROW_COUNT);
+ sparkCounters.createCounter(prefix, StatsSetupConst.RAW_DATA_SIZE);
+ }
+ }
SparkReporter sparkReporter = new SparkReporter(sparkCounters);
// Generate Spark plan
SparkPlanGenerator gen =
new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
- SparkPlan plan;
- try {
- plan = gen.generate(sparkWork);
- } catch (Exception e) {
- LOG.error("Error generating Spark Plan", e);
- return 2;
- }
+ SparkPlan plan = gen.generate(sparkWork);
// Execute generated plan.
- try {
- JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
- // We use Spark RDD async action to submit job as it's the only way to get jobId now.
- JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
- // An action may trigger multi jobs in Spark, we only monitor the latest job here
- // until we found that Hive does trigger multi jobs.
- List<Integer> jobIds = future.jobIds();
- // jobIds size is always bigger than or equal with 1.
- int jobId = jobIds.get(jobIds.size() - 1);
- SimpleSparkJobStatus sparkJobStatus = new SimpleSparkJobStatus(
- jobId, jobStateListener, jobProgressListener);
- SparkJobMonitor monitor = new SparkJobMonitor(sparkJobStatus);
- monitor.startMonitor();
- } catch (Exception e) {
- LOG.error("Error executing Spark Plan", e);
- return 1;
- }
-
- return 0;
+ JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
+ // We use Spark RDD async action to submit job as it's the only way to get jobId now.
+ JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
+ // As we always use foreach action to submit RDD graph, it would only trigger on job.
+ int jobId = future.jobIds().get(0);
+ SimpleSparkJobStatus sparkJobStatus =
+ new SimpleSparkJobStatus(jobId, jobStateListener, jobProgressListener, sparkCounters);
+ return new SparkJobRef(jobId, sparkJobStatus);
}
/**
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java Thu Oct 30 12:31:47 2014
@@ -105,6 +105,7 @@ public class SparkMapRecordHandler exten
execContext.setLocalWork(localWork);
MapredContext.init(true, new JobConf(jc));
+ MapredContext.get().setReporter(reporter);
mo.setExecContext(execContext);
mo.initializeLocalWork(jc);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java Thu Oct 30 12:31:47 2014
@@ -48,6 +48,7 @@ public abstract class SparkRecordHandler
public void init(JobConf job, OutputCollector output, Reporter reporter) {
jc = job;
MapredContext.init(false, new JobConf(jc));
+ MapredContext.get().setReporter(reporter);
oc = output;
rp = reporter;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Thu Oct 30 12:31:47 2014
@@ -30,9 +30,12 @@ import org.apache.hadoop.hive.ql.QueryPl
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
@@ -47,6 +50,7 @@ public class SparkTask extends Task<Spar
private static final long serialVersionUID = 1L;
private transient JobConf job;
private transient ContentSummary inputSummary;
+ private SparkCounters sparkCounters;
@Override
public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
@@ -64,8 +68,8 @@ public class SparkTask extends Task<Spar
printConfigInfo();
sparkSessionManager = SparkSessionManagerImpl.getInstance();
sparkSession = SessionState.get().getSparkSession();
-
- // Spark configurations are updated close the existing session
+
+ // Spark configurations are updated close the existing session
if(conf.getSparkConfigUpdated()){
sparkSessionManager.closeSession(sparkSession);
sparkSession = null;
@@ -73,12 +77,21 @@ public class SparkTask extends Task<Spar
}
sparkSession = sparkSessionManager.getSession(sparkSession, conf, true);
SessionState.get().setSparkSession(sparkSession);
- rc = sparkSession.submit(driverContext, getWork());
+ SparkWork sparkWork = getWork();
+ String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
+ if (statsImpl.equalsIgnoreCase("counter")) {
+ sparkWork.setRequiredCounterPrefix(SparkUtilities.getRequiredCounterPrefix(this, db));
+ }
+ SparkJobRef jobRef = sparkSession.submit(driverContext, sparkWork);
+ sparkCounters = jobRef.getSparkJobStatus().getCounter();
+ SparkJobMonitor monitor = new SparkJobMonitor(jobRef.getSparkJobStatus());
+ monitor.startMonitor();
+ console.printInfo(sparkCounters.toString());
+ rc = 0;
} catch (Exception e) {
LOG.error("Failed to execute spark task.", e);
return 1;
- }
- finally {
+ } finally {
if (sparkSession != null && sparkSessionManager != null) {
rc = close(rc);
try {
@@ -154,6 +167,10 @@ public class SparkTask extends Task<Spar
return result;
}
+ public SparkCounters getSparkCounters() {
+ return sparkCounters;
+ }
+
/**
* Set the number of reducers for the spark work.
*/
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java Thu Oct 30 12:31:47 2014
@@ -17,7 +17,35 @@
*/
package org.apache.hadoop.hive.ql.exec.spark;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveKey;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MoveWork;
+import org.apache.hadoop.hive.ql.plan.StatsWork;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.io.BytesWritable;
/**
@@ -42,4 +70,108 @@ public class SparkUtilities {
return copy;
}
+ public static List<String> getRequiredCounterPrefix(SparkTask sparkTask, Hive db)
+ throws HiveException, MetaException {
+
+ List<String> prefixs = new LinkedList<String>();
+ List<BaseWork> works = sparkTask.getWork().getAllWork();
+ for (BaseWork baseWork : works) {
+ Set<Operator<?>> operators = baseWork.getAllOperators();
+ for (Operator<?> operator : operators) {
+ if (operator instanceof TableScanOperator) {
+ TableScanOperator tableScanOperator = (TableScanOperator) operator;
+ TableScanDesc tableScanDesc = tableScanOperator.getConf();
+
+ if (tableScanDesc.isGatherStats()) {
+ List<Task<? extends Serializable>> childTasks = getChildTasks(sparkTask);
+ for (Task<? extends Serializable> task : childTasks) {
+ if (task instanceof StatsTask) {
+ StatsTask statsTask = (StatsTask) task;
+ StatsWork statsWork = statsTask.getWork();
+ // ANALYZE command
+ BaseSemanticAnalyzer.tableSpec tblSpec = statsWork.getTableSpecs();
+ Table table = tblSpec.tableHandle;
+ if (!table.isPartitioned()) {
+ prefixs.add(tableScanDesc.getStatsAggPrefix()); // non-partitioned
+ } else {
+ for (Partition partition : tblSpec.partitions) {
+ String aggrPrefix = getAggregationPrefix(
+ table, partition.getSpec(), tableScanDesc.getMaxStatsKeyPrefixLength());
+ prefixs.add(aggrPrefix);
+ }
+ }
+ }
+ }
+ }
+ } else if (operator instanceof FileSinkOperator) {
+ FileSinkOperator fileSinkOperator = (FileSinkOperator) operator;
+ FileSinkDesc fileSinkDesc = fileSinkOperator.getConf();
+
+ if (fileSinkDesc.isGatherStats()) {
+ List<Task<? extends Serializable>> childTasks = getChildTasks(sparkTask);
+ for (Task<? extends Serializable> task : childTasks) {
+ if (task instanceof MoveTask) {
+ MoveTask moveTask = (MoveTask) task;
+ MoveWork moveWork = moveTask.getWork();
+
+ // INSERT OVERWRITE command
+ LoadTableDesc tbd = moveWork.getLoadTableWork();
+ Table table = db.getTable(tbd.getTable().getTableName());
+ if (!table.isPartitioned()) {
+ prefixs.add(
+ getAggregationPrefix(table, null, fileSinkDesc.getMaxStatsKeyPrefixLength()));
+ } else {
+ DynamicPartitionCtx dpCtx = tbd.getDPCtx();
+ if (dpCtx == null || dpCtx.getNumDPCols() == 0) {
+ // static partition
+ Map<String, String> partitionSpec = tbd.getPartitionSpec();
+ if (partitionSpec != null && !partitionSpec.isEmpty()) {
+ String aggrPrefix = getAggregationPrefix(
+ table, partitionSpec, fileSinkDesc.getMaxStatsKeyPrefixLength());
+ prefixs.add(aggrPrefix);
+ }
+ } else {
+ // dynamic partition
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return prefixs;
+ }
+
+ private static String getAggregationPrefix(Table table, Map<String, String> partitionSpec, int maxKeyLength)
+ throws MetaException {
+ StringBuilder prefix = new StringBuilder();
+ // prefix is of the form dbName.tblName
+ prefix.append(table.getDbName()).append('.').append(table.getTableName());
+ if (partitionSpec != null) {
+ return Utilities.join(prefix.toString(), Warehouse.makePartPath(partitionSpec));
+ }
+ return Utilities.getHashedStatsPrefix(prefix.toString(), maxKeyLength);
+ }
+
+ private static List<Task<? extends Serializable>> getChildTasks(
+ Task<? extends Serializable> rootTask) {
+
+ List<Task<? extends Serializable>> tasks = new ArrayList<Task<? extends Serializable>>();
+ fillChildTasks(tasks, rootTask);
+ return tasks;
+ }
+
+ private static void fillChildTasks(
+ List<Task<? extends Serializable>> tasks,
+ Task<? extends Serializable> rootTask) {
+
+ List<Task<? extends Serializable>> childTasks = rootTask.getChildTasks();
+ tasks.add(rootTask);
+ if (childTasks != null) {
+ for (Task<? extends Serializable> task : childTasks) {
+ fillChildTasks(tasks, task);
+ }
+ }
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java Thu Oct 30 12:31:47 2014
@@ -21,6 +21,8 @@ import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
@@ -44,6 +46,7 @@ import org.apache.spark.api.java.JavaSpa
* 3. Hive could only get Counter value at driver side.
*/
public class SparkCounters implements Serializable {
+ private static final Log LOG = LogFactory.getLog(SparkCounters.class);
private Map<String, SparkCounterGroup> sparkCounterGroups;
@@ -90,20 +93,22 @@ public class SparkCounters implements Se
public void increment(String groupName, String counterName, long value) {
SparkCounter counter = getGroup(groupName).getCounter(counterName);
if (counter == null) {
- throw new RuntimeException(
+ LOG.error(
String.format("counter[%s, %s] has not initialized before.", groupName, counterName));
+ } else {
+ counter.increment(value);
}
- counter.increment(value);
}
public long getValue(String groupName, String counterName) {
SparkCounter counter = getGroup(groupName).getCounter(counterName);
if (counter == null) {
- throw new RuntimeException(
+ LOG.error(
String.format("counter[%s, %s] has not initialized before.", groupName, counterName));
+ return 0;
+ } else {
+ return counter.getValue();
}
-
- return counter.getValue();
}
public SparkCounter getCounter(String groupName, String counterName) {
@@ -127,4 +132,29 @@ public class SparkCounters implements Se
public Map<String, SparkCounterGroup> getSparkCounterGroups() {
return sparkCounterGroups;
}
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ Map<String, SparkCounterGroup> groups = getSparkCounterGroups();
+ if (groups != null) {
+ for(Map.Entry<String, SparkCounterGroup> groupEntry : groups.entrySet()) {
+ String groupName = groupEntry.getKey();
+ SparkCounterGroup group = groupEntry.getValue();
+ sb.append(groupName).append("\n");
+ Map<String, SparkCounter> counters = group.getSparkCounters();
+ for (Map.Entry<String, SparkCounter> counterEntry : counters.entrySet()) {
+ String counterName = counterEntry.getKey();
+ SparkCounter counter = counterEntry.getValue();
+ sb.append("\t")
+ .append(counterName)
+ .append(": ")
+ .append(counter.getValue())
+ .append("\n");
+ }
+ }
+ }
+
+ return sb.toString();
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java Thu Oct 30 12:31:47 2014
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.s
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
import org.apache.hadoop.hive.ql.plan.SparkWork;
public interface SparkSession {
@@ -32,7 +33,7 @@ public interface SparkSession {
* @param driverContext
* @param sparkWork
*/
- public int submit(DriverContext driverContext, SparkWork sparkWork);
+ public SparkJobRef submit(DriverContext driverContext, SparkWork sparkWork) throws Exception;
/**
* Is the session open and ready to submit jobs?
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java Thu Oct 30 12:31:47 2014
@@ -21,6 +21,7 @@ import com.google.common.base.Preconditi
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.exec.spark.SparkClient;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
import org.apache.hadoop.hive.ql.plan.SparkWork;
import java.util.UUID;
@@ -46,7 +47,7 @@ public class SparkSessionImpl implements
}
@Override
- public int submit(DriverContext driverContext, SparkWork sparkWork) {
+ public SparkJobRef submit(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Preconditions.checkState(isOpen, "Session is not open. Can't submit jobs.");
sparkClient = SparkClient.getInstance(driverContext.getCtx().getConf());
return sparkClient.execute(driverContext, sparkWork);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java Thu Oct 30 12:31:47 2014
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.exec.spark.status;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
+
import java.util.Map;
/**
@@ -32,4 +34,6 @@ public interface SparkJobStatus {
public Map<String, SparkStageProgress> getSparkStageProgress();
+ public SparkCounters getCounter();
+
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java Thu Oct 30 12:31:47 2014
@@ -22,6 +22,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobState;
import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus;
import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress;
@@ -40,15 +41,18 @@ public class SimpleSparkJobStatus implem
private int jobId;
private JobStateListener jobStateListener;
private JobProgressListener jobProgressListener;
+ private SparkCounters sparkCounters;
public SimpleSparkJobStatus(
int jobId,
JobStateListener stateListener,
- JobProgressListener progressListener) {
+ JobProgressListener progressListener,
+ SparkCounters sparkCounters) {
this.jobId = jobId;
this.jobStateListener = stateListener;
this.jobProgressListener = progressListener;
+ this.sparkCounters = sparkCounters;
}
@Override
@@ -111,6 +115,11 @@ public class SimpleSparkJobStatus implem
return stageProgresses;
}
+ @Override
+ public SparkCounters getCounter() {
+ return sparkCounters;
+ }
+
private List<StageInfo> getStageInfo(int stageId) {
List<StageInfo> stageInfos = new LinkedList<StageInfo>();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java Thu Oct 30 12:31:47 2014
@@ -54,10 +54,13 @@ public class SparkWork extends AbstractO
protected final Map<Pair<BaseWork, BaseWork>, SparkEdgeProperty> edgeProperties =
new HashMap<Pair<BaseWork, BaseWork>, SparkEdgeProperty>();
+ private List<String> requiredCounterPrefix;
+
public SparkWork(String name) {
this.name = name + ":" + (++counter);
}
+
@Explain(displayName = "DagName")
public String getName() {
return name;
@@ -173,6 +176,14 @@ public class SparkWork extends AbstractO
return new HashSet<BaseWork>(leaves);
}
+ public void setRequiredCounterPrefix(List<String> requiredCounterPrefix) {
+ this.requiredCounterPrefix = requiredCounterPrefix;
+ }
+
+ public List<String> getRequiredCounterPrefix() {
+ return requiredCounterPrefix;
+ }
+
/**
* getParents returns all the nodes with edges leading into work
*/
@@ -300,5 +311,5 @@ public class SparkWork extends AbstractO
}
return result;
}
-
+
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java Thu Oct 30 12:31:47 2014
@@ -52,8 +52,9 @@ public class CounterStatsPublisher imple
for (Map.Entry<String, String> entry : stats.entrySet()) {
try {
reporter.incrCounter(fileID, entry.getKey(), Long.valueOf(entry.getValue()));
- } catch (NumberFormatException e) {
- LOG.error("Invalid counter value " + entry.getValue() + " for " + entry.getKey());
+ } catch (Exception e) {
+ LOG.error("Failed to increment counter value " + entry.getValue() + " for " + entry.getKey());
+ return false;
}
}
return true;
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out Thu Oct 30 12:31:47 2014
@@ -68,10 +68,6 @@ PREHOOK: Output: default@add_part_test@d
PREHOOK: Output: default@add_part_test@ds=2010-02-01
PREHOOK: Output: default@add_part_test@ds=2010-03-01
PREHOOK: Output: default@add_part_test@ds=2010-04-01
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: from src TABLESAMPLE (1 ROWS)
insert into table add_part_test PARTITION (ds='2010-01-01') select 100,100
insert into table add_part_test PARTITION (ds='2010-02-01') select 200,200
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out Thu Oct 30 12:31:47 2014
@@ -10,7 +10,6 @@ PREHOOK: query: insert overwrite table s
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert overwrite table src_orc_merge_test select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -21,7 +20,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -32,7 +30,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -136,7 +133,6 @@ PREHOOK: query: insert overwrite table s
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -147,7 +143,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -158,7 +153,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out Thu Oct 30 12:31:47 2014
@@ -10,7 +10,6 @@ PREHOOK: query: insert overwrite table s
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert overwrite table src_orc_merge_test_stat select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -21,7 +20,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -32,7 +30,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -181,7 +178,6 @@ PREHOOK: query: insert overwrite table s
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert overwrite table src_orc_merge_test_part_stat partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -192,7 +188,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -203,7 +198,6 @@ PREHOOK: query: insert into table src_or
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
@@ -253,6 +247,8 @@ Protect Mode: None
Partition Parameters:
COLUMN_STATS_ACCURATE true
numFiles 3
+ numRows 1500
+ rawDataSize 141000
totalSize 7488
#### A masked pattern was here ####