You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/30 13:31:52 UTC

svn commit: r1635477 [1/17] - in /hive/branches/spark: common/src/java/org/apache/hadoop/hive/common/ data/conf/spark/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/ ql/src/java/org/apache/ha...

Author: xuefu
Date: Thu Oct 30 12:31:47 2014
New Revision: 1635477

URL: http://svn.apache.org/r1635477
Log:
HIVE-8539: Enable collect table statistics based on SparkCounter[Spark Branch] (Chengxiang via Xuefu)

Modified:
    hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
    hive/branches/spark/data/conf/spark/hive-site.xml
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java
    hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join17.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join19.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join24.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join25.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join26.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/create_merge_compressed.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/ctas.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/custom_input_output_format.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/date_udf.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/enforce_order.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/innerjoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input12.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input17.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input18.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input1_limit.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input_part2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/insert1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join17.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join24.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join25.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join26.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join27.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join28.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join29.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join30.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join31.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join32.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join33.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join36.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join37.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join38.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join39.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join41.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_rc.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part11.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part12.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part15.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapreduce1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapreduce2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/parquet_join.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/pcr.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sample7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/scriptfile1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/temp_table.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_lazy.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/timestamp_udf.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/transform1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union18.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union19.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union25.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union28.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union29.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union30.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union33.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/union6.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_decimal_date.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_part.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorization_part_project.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_rcfile_columnar.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/windowing.q.out

Modified: hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java (original)
+++ hive/branches/spark/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java Thu Oct 30 12:31:47 2014
@@ -55,6 +55,8 @@ public class StatsSetupConst {
       public String getAggregator(Configuration conf) {
         if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
           return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez";
+        } else if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+          return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorSpark";
         }
         return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; }
     },

Modified: hive/branches/spark/data/conf/spark/hive-site.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/data/conf/spark/hive-site.xml?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/data/conf/spark/hive-site.xml (original)
+++ hive/branches/spark/data/conf/spark/hive-site.xml Thu Oct 30 12:31:47 2014
@@ -185,7 +185,7 @@
 
 <property>
   <name>hive.stats.dbclass</name>
-  <value>counter</value>
+  <value>fs</value>
   <description>The default storatge that stores temporary hive statistics. Currently, jdbc, hbase and counter type is supported</description>
 </property>
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkClient.java Thu Oct 30 12:31:47 2014
@@ -25,14 +25,13 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounter;
-import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounterGroup;
 import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
-import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
 import org.apache.hadoop.hive.ql.exec.spark.status.impl.JobStateListener;
 import org.apache.hadoop.hive.ql.exec.spark.status.impl.SimpleSparkJobStatus;
 import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -151,7 +150,7 @@ public class SparkClient implements Seri
     return sparkConf;
   }
 
-  public int execute(DriverContext driverContext, SparkWork sparkWork) {
+  public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
     Context ctx = driverContext.getCtx();
     HiveConf hiveConf = (HiveConf) ctx.getConf();
     refreshLocalResources(sparkWork, hiveConf);
@@ -159,49 +158,35 @@ public class SparkClient implements Seri
 
     // Create temporary scratch dir
     Path emptyScratchDir;
-    try {
-      emptyScratchDir = ctx.getMRTmpPath();
-      FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
-      fs.mkdirs(emptyScratchDir);
-    } catch (IOException e) {
-      LOG.error("Error launching map-reduce job", e);
-      return 5;
-    }
+    emptyScratchDir = ctx.getMRTmpPath();
+    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
+    fs.mkdirs(emptyScratchDir);
 
     SparkCounters sparkCounters = new SparkCounters(sc, hiveConf);
+    List<String> prefixes = sparkWork.getRequiredCounterPrefix();
+    // register spark counters before submit spark job.
+    if (prefixes != null) {
+      for (String prefix : prefixes) {
+        sparkCounters.createCounter(prefix, StatsSetupConst.ROW_COUNT);
+        sparkCounters.createCounter(prefix, StatsSetupConst.RAW_DATA_SIZE);
+      }
+    }
     SparkReporter sparkReporter = new SparkReporter(sparkCounters);
 
     // Generate Spark plan
     SparkPlanGenerator gen =
       new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
-    SparkPlan plan;
-    try {
-      plan = gen.generate(sparkWork);
-    } catch (Exception e) {
-      LOG.error("Error generating Spark Plan", e);
-      return 2;
-    }
+    SparkPlan plan = gen.generate(sparkWork);
 
     // Execute generated plan.
-    try {
-      JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
-      // We use Spark RDD async action to submit job as it's the only way to get jobId now.
-      JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
-      // An action may trigger multi jobs in Spark, we only monitor the latest job here
-      // until we found that Hive does trigger multi jobs.
-      List<Integer> jobIds = future.jobIds();
-      // jobIds size is always bigger than or equal with 1.
-      int jobId = jobIds.get(jobIds.size() - 1);
-      SimpleSparkJobStatus sparkJobStatus = new SimpleSparkJobStatus(
-       jobId, jobStateListener, jobProgressListener);
-      SparkJobMonitor monitor = new SparkJobMonitor(sparkJobStatus);
-      monitor.startMonitor();
-    } catch (Exception e) {
-      LOG.error("Error executing Spark Plan", e);
-      return 1;
-    }
-
-    return 0;
+    JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
+    // We use Spark RDD async action to submit job as it's the only way to get jobId now.
+    JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
+    // As we always use foreach action to submit RDD graph, it would only trigger on job.
+    int jobId = future.jobIds().get(0);
+    SimpleSparkJobStatus sparkJobStatus =
+      new SimpleSparkJobStatus(jobId, jobStateListener, jobProgressListener, sparkCounters);
+    return new SparkJobRef(jobId, sparkJobStatus);
   }
 
   /**

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java Thu Oct 30 12:31:47 2014
@@ -105,6 +105,7 @@ public class SparkMapRecordHandler exten
       execContext.setLocalWork(localWork);
 
       MapredContext.init(true, new JobConf(jc));
+      MapredContext.get().setReporter(reporter);
 
       mo.setExecContext(execContext);
       mo.initializeLocalWork(jc);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java Thu Oct 30 12:31:47 2014
@@ -48,6 +48,7 @@ public abstract class SparkRecordHandler
   public void init(JobConf job, OutputCollector output, Reporter reporter) {
     jc = job;
     MapredContext.init(false, new JobConf(jc));
+    MapredContext.get().setReporter(reporter);
 
     oc = output;
     rp = reporter;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Thu Oct 30 12:31:47 2014
@@ -30,9 +30,12 @@ import org.apache.hadoop.hive.ql.QueryPl
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
 import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession;
 import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager;
 import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.MapWork;
@@ -47,6 +50,7 @@ public class SparkTask extends Task<Spar
   private static final long serialVersionUID = 1L;
   private transient JobConf job;
   private transient ContentSummary inputSummary;
+  private SparkCounters sparkCounters;
 
   @Override
   public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
@@ -64,8 +68,8 @@ public class SparkTask extends Task<Spar
       printConfigInfo();
       sparkSessionManager = SparkSessionManagerImpl.getInstance();
       sparkSession = SessionState.get().getSparkSession();
-      
-      // Spark configurations are updated close the existing session 
+
+      // Spark configurations are updated close the existing session
       if(conf.getSparkConfigUpdated()){
         sparkSessionManager.closeSession(sparkSession);
         sparkSession =  null;
@@ -73,12 +77,21 @@ public class SparkTask extends Task<Spar
       }
       sparkSession = sparkSessionManager.getSession(sparkSession, conf, true);
       SessionState.get().setSparkSession(sparkSession);
-      rc = sparkSession.submit(driverContext, getWork());
+      SparkWork sparkWork = getWork();
+      String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
+      if (statsImpl.equalsIgnoreCase("counter")) {
+        sparkWork.setRequiredCounterPrefix(SparkUtilities.getRequiredCounterPrefix(this, db));
+      }
+      SparkJobRef jobRef = sparkSession.submit(driverContext, sparkWork);
+      sparkCounters = jobRef.getSparkJobStatus().getCounter();
+      SparkJobMonitor monitor = new SparkJobMonitor(jobRef.getSparkJobStatus());
+      monitor.startMonitor();
+      console.printInfo(sparkCounters.toString());
+      rc = 0;
     } catch (Exception e) {
       LOG.error("Failed to execute spark task.", e);
       return 1;
-    }
-    finally {
+    } finally {
       if (sparkSession != null && sparkSessionManager != null) {
         rc = close(rc);
         try {
@@ -154,6 +167,10 @@ public class SparkTask extends Task<Spar
     return result;
   }
 
+  public SparkCounters getSparkCounters() {
+    return sparkCounters;
+  }
+
   /**
    * Set the number of reducers for the spark work.
    */

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java Thu Oct 30 12:31:47 2014
@@ -17,7 +17,35 @@
  */
 package org.apache.hadoop.hive.ql.exec.spark;
 
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveKey;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MoveWork;
+import org.apache.hadoop.hive.ql.plan.StatsWork;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
@@ -42,4 +70,108 @@ public class SparkUtilities {
     return copy;
   }
 
+  public static List<String> getRequiredCounterPrefix(SparkTask sparkTask, Hive db)
+    throws HiveException, MetaException {
+
+    List<String> prefixs = new LinkedList<String>();
+    List<BaseWork> works = sparkTask.getWork().getAllWork();
+    for (BaseWork baseWork : works) {
+      Set<Operator<?>> operators = baseWork.getAllOperators();
+      for (Operator<?> operator : operators) {
+        if (operator instanceof TableScanOperator) {
+          TableScanOperator tableScanOperator = (TableScanOperator) operator;
+          TableScanDesc tableScanDesc = tableScanOperator.getConf();
+
+          if (tableScanDesc.isGatherStats()) {
+            List<Task<? extends Serializable>> childTasks = getChildTasks(sparkTask);
+            for (Task<? extends Serializable> task : childTasks) {
+              if (task instanceof StatsTask) {
+                StatsTask statsTask = (StatsTask) task;
+                StatsWork statsWork = statsTask.getWork();
+                // ANALYZE command
+                BaseSemanticAnalyzer.tableSpec tblSpec = statsWork.getTableSpecs();
+                Table table = tblSpec.tableHandle;
+                if (!table.isPartitioned()) {
+                  prefixs.add(tableScanDesc.getStatsAggPrefix()); // non-partitioned
+                } else {
+                  for (Partition partition : tblSpec.partitions) {
+                    String aggrPrefix = getAggregationPrefix(
+                      table, partition.getSpec(), tableScanDesc.getMaxStatsKeyPrefixLength());
+                    prefixs.add(aggrPrefix);
+                  }
+                }
+              }
+            }
+          }
+        } else if (operator instanceof FileSinkOperator) {
+          FileSinkOperator fileSinkOperator = (FileSinkOperator) operator;
+          FileSinkDesc fileSinkDesc = fileSinkOperator.getConf();
+
+          if (fileSinkDesc.isGatherStats()) {
+            List<Task<? extends Serializable>> childTasks = getChildTasks(sparkTask);
+            for (Task<? extends Serializable> task : childTasks) {
+              if (task instanceof MoveTask) {
+                MoveTask moveTask = (MoveTask) task;
+                MoveWork moveWork = moveTask.getWork();
+
+                // INSERT OVERWRITE command
+                LoadTableDesc tbd = moveWork.getLoadTableWork();
+                Table table = db.getTable(tbd.getTable().getTableName());
+                if (!table.isPartitioned()) {
+                  prefixs.add(
+                    getAggregationPrefix(table, null, fileSinkDesc.getMaxStatsKeyPrefixLength()));
+                } else {
+                  DynamicPartitionCtx dpCtx = tbd.getDPCtx();
+                  if (dpCtx == null || dpCtx.getNumDPCols() == 0) {
+                    // static partition
+                    Map<String, String> partitionSpec = tbd.getPartitionSpec();
+                    if (partitionSpec != null && !partitionSpec.isEmpty()) {
+                      String aggrPrefix = getAggregationPrefix(
+                        table, partitionSpec, fileSinkDesc.getMaxStatsKeyPrefixLength());
+                      prefixs.add(aggrPrefix);
+                    }
+                  } else {
+                    // dynamic partition
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return prefixs;
+  }
+
+  private static String getAggregationPrefix(Table table, Map<String, String> partitionSpec, int maxKeyLength)
+    throws MetaException {
+    StringBuilder prefix = new StringBuilder();
+    // prefix is of the form dbName.tblName
+    prefix.append(table.getDbName()).append('.').append(table.getTableName());
+    if (partitionSpec != null) {
+      return Utilities.join(prefix.toString(), Warehouse.makePartPath(partitionSpec));
+    }
+    return Utilities.getHashedStatsPrefix(prefix.toString(), maxKeyLength);
+  }
+
+  private static List<Task<? extends Serializable>> getChildTasks(
+    Task<? extends Serializable> rootTask) {
+
+    List<Task<? extends Serializable>> tasks = new ArrayList<Task<? extends Serializable>>();
+    fillChildTasks(tasks, rootTask);
+    return tasks;
+  }
+
+  private static void fillChildTasks(
+    List<Task<? extends Serializable>> tasks,
+    Task<? extends Serializable> rootTask) {
+
+    List<Task<? extends Serializable>> childTasks = rootTask.getChildTasks();
+    tasks.add(rootTask);
+    if (childTasks != null) {
+      for (Task<? extends Serializable> task : childTasks) {
+        fillChildTasks(tasks, task);
+      }
+    }
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/counter/SparkCounters.java Thu Oct 30 12:31:47 2014
@@ -21,6 +21,8 @@ import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
@@ -44,6 +46,7 @@ import org.apache.spark.api.java.JavaSpa
  * 3. Hive could only get Counter value at driver side.
  */
 public class SparkCounters implements Serializable {
+  private static final Log LOG = LogFactory.getLog(SparkCounters.class);
 
   private Map<String, SparkCounterGroup> sparkCounterGroups;
 
@@ -90,20 +93,22 @@ public class SparkCounters implements Se
   public void increment(String groupName, String counterName, long value) {
     SparkCounter counter = getGroup(groupName).getCounter(counterName);
     if (counter == null) {
-      throw new RuntimeException(
+      LOG.error(
         String.format("counter[%s, %s] has not initialized before.", groupName, counterName));
+    } else {
+      counter.increment(value);
     }
-    counter.increment(value);
   }
 
   public long getValue(String groupName, String counterName) {
     SparkCounter counter = getGroup(groupName).getCounter(counterName);
     if (counter == null) {
-      throw new RuntimeException(
+      LOG.error(
         String.format("counter[%s, %s] has not initialized before.", groupName, counterName));
+      return 0;
+    } else {
+      return counter.getValue();
     }
-
-    return counter.getValue();
   }
 
   public SparkCounter getCounter(String groupName, String counterName) {
@@ -127,4 +132,29 @@ public class SparkCounters implements Se
   public Map<String, SparkCounterGroup> getSparkCounterGroups() {
     return sparkCounterGroups;
   }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    Map<String, SparkCounterGroup> groups = getSparkCounterGroups();
+    if (groups != null) {
+      for(Map.Entry<String, SparkCounterGroup> groupEntry : groups.entrySet()) {
+        String groupName = groupEntry.getKey();
+        SparkCounterGroup group = groupEntry.getValue();
+        sb.append(groupName).append("\n");
+        Map<String, SparkCounter> counters = group.getSparkCounters();
+        for (Map.Entry<String, SparkCounter> counterEntry : counters.entrySet()) {
+          String counterName = counterEntry.getKey();
+          SparkCounter counter = counterEntry.getValue();
+          sb.append("\t")
+            .append(counterName)
+            .append(": ")
+            .append(counter.getValue())
+            .append("\n");
+        }
+      }
+    }
+
+    return sb.toString();
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSession.java Thu Oct 30 12:31:47 2014
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.s
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
 import org.apache.hadoop.hive.ql.plan.SparkWork;
 
 public interface SparkSession {
@@ -32,7 +33,7 @@ public interface SparkSession {
    * @param driverContext
    * @param sparkWork
    */
-  public int submit(DriverContext driverContext, SparkWork sparkWork);
+  public SparkJobRef submit(DriverContext driverContext, SparkWork sparkWork) throws Exception;
 
   /**
    * Is the session open and ready to submit jobs?

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java Thu Oct 30 12:31:47 2014
@@ -21,6 +21,7 @@ import com.google.common.base.Preconditi
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.exec.spark.SparkClient;
+import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef;
 import org.apache.hadoop.hive.ql.plan.SparkWork;
 
 import java.util.UUID;
@@ -46,7 +47,7 @@ public class SparkSessionImpl implements
   }
 
   @Override
-  public int submit(DriverContext driverContext, SparkWork sparkWork) {
+  public SparkJobRef submit(DriverContext driverContext, SparkWork sparkWork) throws Exception {
     Preconditions.checkState(isOpen, "Session is not open. Can't submit jobs.");
     sparkClient = SparkClient.getInstance(driverContext.getCtx().getConf());
     return sparkClient.execute(driverContext, sparkWork);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobStatus.java Thu Oct 30 12:31:47 2014
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.exec.spark.status;
 
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
+
 import java.util.Map;
 
 /**
@@ -32,4 +34,6 @@ public interface SparkJobStatus {
 
   public Map<String, SparkStageProgress> getSparkStageProgress();
 
+  public SparkCounters getCounter();
+
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/SimpleSparkJobStatus.java Thu Oct 30 12:31:47 2014
@@ -22,6 +22,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.exec.spark.counter.SparkCounters;
 import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobState;
 import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus;
 import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress;
@@ -40,15 +41,18 @@ public class SimpleSparkJobStatus implem
   private int jobId;
   private JobStateListener jobStateListener;
   private JobProgressListener jobProgressListener;
+  private SparkCounters sparkCounters;
 
   public SimpleSparkJobStatus(
     int jobId,
     JobStateListener stateListener,
-    JobProgressListener progressListener) {
+    JobProgressListener progressListener,
+    SparkCounters sparkCounters) {
 
     this.jobId = jobId;
     this.jobStateListener = stateListener;
     this.jobProgressListener = progressListener;
+    this.sparkCounters = sparkCounters;
   }
 
   @Override
@@ -111,6 +115,11 @@ public class SimpleSparkJobStatus implem
     return stageProgresses;
   }
 
+  @Override
+  public SparkCounters getCounter() {
+    return sparkCounters;
+  }
+
   private List<StageInfo> getStageInfo(int stageId) {
     List<StageInfo> stageInfos = new LinkedList<StageInfo>();
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java Thu Oct 30 12:31:47 2014
@@ -54,10 +54,13 @@ public class SparkWork extends AbstractO
   protected final Map<Pair<BaseWork, BaseWork>, SparkEdgeProperty> edgeProperties =
       new HashMap<Pair<BaseWork, BaseWork>, SparkEdgeProperty>();
 
+  private List<String> requiredCounterPrefix;
+
   public SparkWork(String name) {
     this.name = name + ":" + (++counter);
   }
 
+
   @Explain(displayName = "DagName")
   public String getName() {
     return name;
@@ -173,6 +176,14 @@ public class SparkWork extends AbstractO
     return new HashSet<BaseWork>(leaves);
   }
 
+  public void setRequiredCounterPrefix(List<String> requiredCounterPrefix) {
+    this.requiredCounterPrefix = requiredCounterPrefix;
+  }
+
+  public List<String> getRequiredCounterPrefix() {
+    return requiredCounterPrefix;
+  }
+
   /**
    * getParents returns all the nodes with edges leading into work
    */
@@ -300,5 +311,5 @@ public class SparkWork extends AbstractO
     }
     return result;
   }
-  
+
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java Thu Oct 30 12:31:47 2014
@@ -52,8 +52,9 @@ public class CounterStatsPublisher imple
     for (Map.Entry<String, String> entry : stats.entrySet()) {
       try {
         reporter.incrCounter(fileID, entry.getKey(), Long.valueOf(entry.getValue()));
-      } catch (NumberFormatException e) {
-        LOG.error("Invalid counter value " + entry.getValue() + " for " + entry.getKey());
+      } catch (Exception e) {
+        LOG.error("Failed to increment counter value " + entry.getValue() + " for " + entry.getKey());
+        return false;
       }
     }
     return true;

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/add_part_multiple.q.out Thu Oct 30 12:31:47 2014
@@ -68,10 +68,6 @@ PREHOOK: Output: default@add_part_test@d
 PREHOOK: Output: default@add_part_test@ds=2010-02-01
 PREHOOK: Output: default@add_part_test@ds=2010-03-01
 PREHOOK: Output: default@add_part_test@ds=2010-04-01
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: from src TABLESAMPLE (1 ROWS)
 insert into table add_part_test PARTITION (ds='2010-01-01') select 100,100
 insert into table add_part_test PARTITION (ds='2010-02-01') select 200,200

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out Thu Oct 30 12:31:47 2014
@@ -10,7 +10,6 @@ PREHOOK: query: insert overwrite table s
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert overwrite table src_orc_merge_test select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -21,7 +20,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -32,7 +30,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -136,7 +133,6 @@ PREHOOK: query: insert overwrite table s
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -147,7 +143,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -158,7 +153,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out?rev=1635477&r1=1635476&r2=1635477&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out Thu Oct 30 12:31:47 2014
@@ -10,7 +10,6 @@ PREHOOK: query: insert overwrite table s
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert overwrite table src_orc_merge_test_stat select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -21,7 +20,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -32,7 +30,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_stat
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -181,7 +178,6 @@ PREHOOK: query: insert overwrite table s
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert overwrite table src_orc_merge_test_part_stat partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -192,7 +188,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -203,7 +198,6 @@ PREHOOK: query: insert into table src_or
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011
-[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
 POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -253,6 +247,8 @@ Protect Mode:       	None               
 Partition Parameters:	 	 
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	3                   
+	numRows             	1500                
+	rawDataSize         	141000              
 	totalSize           	7488                
 #### A masked pattern was here ####