You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Ritika Maheshwari (Jira)" <ji...@apache.org> on 2023/02/06 00:22:00 UTC
[jira] [Commented] (SPARK-37581) sql hang at planning stage
[ https://issues.apache.org/jira/browse/SPARK-37581?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17684361#comment-17684361 ]
Ritika Maheshwari commented on SPARK-37581:
-------------------------------------------
Just executed this on 3.3.0 and it takes 4.14 second. I am also including the plan. So does not seem like an issue in 3.3.0
> left join
> (select dt,count(distinct kb_code) as h_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t8
> on calendar.dt = t8.dt
>
> left join
> (select dt,count(distinct kb_code) as i_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t9
> on calendar.dt = t9.dt
>
> left join
> (select dt,count(distinct kb_code) as j_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t10
> on calendar.dt = t10.dt
>
> left join
> (select dt,count(distinct kb_code) as k_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t11
> on calendar.dt = t11.dt;
Time taken: 0.609 seconds
23/02/05 16:10:47 WARN HiveMetaStore: Location: file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_c specified for non-external table:test_c
{color:#FF0000}*Time taken: 4.14 seconds*{color}
spark-sql> select * from test.test_c
> ;
1 1 2 1 1 1 1 1 1 1 1 1 1 1
Time taken: 0.296 seconds, Fetched 1 row(s)
spark-sql>
{color:#FF0000}*The plan for the query is* {color}
CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L]
+- Join LeftOuter, (dt#14 = dt#366)
:- Join LeftOuter, (dt#14 = dt#334)
: :- Join LeftOuter, (dt#14 = dt#302)
: : :- Join LeftOuter, (dt#14 = dt#270)
: : : :- Join LeftOuter, (dt#14 = dt#238)
: : : : :- Join LeftOuter, (dt#14 = dt#206)
: : : : : :- Join LeftOuter, (dt#14 = dt#174)
: : : : : : :- Join LeftOuter, (dt#14 = dt#142)
: : : : : : : :- Join LeftOuter, (dt#14 = dt#110)
: : : : : : : : :- Join LeftOuter, (dt#14 = dt#78)
: : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46)
: : : : : : : : : : :- SubqueryAlias calendar
: : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14]
: : : : : : : : : : : +- Filter (dt#14 = 20211126)
: : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a
: : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc
: : : : : : : : : : +- SubqueryAlias t1
: : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L]
: : : : : : : : : : +- Filter (dt#46 = 20211126)
: : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc
: : : : : : : : : +- SubqueryAlias t2
: : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L]
: : : : : : : : : +- Filter (dt#78 = 20211126)
: : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc
: : : : : : : : +- SubqueryAlias t3
: : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L]
: : : : : : : : +- Filter (dt#110 = 20211126)
: : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc
: : : : : : : +- SubqueryAlias t4
: : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L]
: : : : : : : +- Filter (dt#142 = 20211126)
: : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc
: : : : : : +- SubqueryAlias t5
: : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L]
: : : : : : +- Filter (dt#174 = 20211126)
: : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc
: : : : : +- SubqueryAlias t6
: : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L]
: : : : : +- Filter (dt#206 = 20211126)
: : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc
: : : : +- SubqueryAlias t7
: : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L]
: : : : +- Filter (dt#238 = 20211126)
: : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc
: : : +- SubqueryAlias t8
: : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L]
: : : +- Filter (dt#270 = 20211126)
: : : +- SubqueryAlias spark_catalog.test.test_b
: : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc
: : +- SubqueryAlias t9
: : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L]
: : +- Filter (dt#302 = 20211126)
: : +- SubqueryAlias spark_catalog.test.test_b
: : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc
: +- SubqueryAlias t10
: +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L]
: +- Filter (dt#334 = 20211126)
: +- SubqueryAlias spark_catalog.test.test_b
: +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc
+- SubqueryAlias t11
+- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L]
+- Filter (dt#366 = 20211126)
+- SubqueryAlias spark_catalog.test.test_b
+- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc
== Analyzed Logical Plan ==
CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L]
+- Join LeftOuter, (dt#14 = dt#366)
:- Join LeftOuter, (dt#14 = dt#334)
: :- Join LeftOuter, (dt#14 = dt#302)
: : :- Join LeftOuter, (dt#14 = dt#270)
: : : :- Join LeftOuter, (dt#14 = dt#238)
: : : : :- Join LeftOuter, (dt#14 = dt#206)
: : : : : :- Join LeftOuter, (dt#14 = dt#174)
: : : : : : :- Join LeftOuter, (dt#14 = dt#142)
: : : : : : : :- Join LeftOuter, (dt#14 = dt#110)
: : : : : : : : :- Join LeftOuter, (dt#14 = dt#78)
: : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46)
: : : : : : : : : : :- SubqueryAlias calendar
: : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14]
: : : : : : : : : : : +- Filter (dt#14 = 20211126)
: : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a
: : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc
: : : : : : : : : : +- SubqueryAlias t1
: : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L]
: : : : : : : : : : +- Filter (dt#46 = 20211126)
: : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc
: : : : : : : : : +- SubqueryAlias t2
: : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L]
: : : : : : : : : +- Filter (dt#78 = 20211126)
: : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc
: : : : : : : : +- SubqueryAlias t3
: : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L]
: : : : : : : : +- Filter (dt#110 = 20211126)
: : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc
: : : : : : : +- SubqueryAlias t4
: : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L]
: : : : : : : +- Filter (dt#142 = 20211126)
: : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc
: : : : : : +- SubqueryAlias t5
: : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L]
: : : : : : +- Filter (dt#174 = 20211126)
: : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc
: : : : : +- SubqueryAlias t6
: : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L]
: : : : : +- Filter (dt#206 = 20211126)
: : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc
: : : : +- SubqueryAlias t7
: : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L]
: : : : +- Filter (dt#238 = 20211126)
: : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc
: : : +- SubqueryAlias t8
: : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L]
: : : +- Filter (dt#270 = 20211126)
: : : +- SubqueryAlias spark_catalog.test.test_b
: : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc
: : +- SubqueryAlias t9
: : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L]
: : +- Filter (dt#302 = 20211126)
: : +- SubqueryAlias spark_catalog.test.test_b
: : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc
: +- SubqueryAlias t10
: +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L]
: +- Filter (dt#334 = 20211126)
: +- SubqueryAlias spark_catalog.test.test_b
: +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc
+- SubqueryAlias t11
+- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L]
+- Filter (dt#366 = 20211126)
+- SubqueryAlias spark_catalog.test.test_b
+- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc
== Optimized Logical Plan ==
CommandResult Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L]
+- Join LeftOuter, (dt#14 = dt#366)
:- Join LeftOuter, (dt#14 = dt#334)
: :- Join LeftOuter, (dt#14 = dt#302)
: : :- Join LeftOuter, (dt#14 = dt#270)
: : : :- Join LeftOuter, (dt#14 = dt#238)
: : : : :- Join LeftOuter, (dt#14 = dt#206)
: : : : : :- Join LeftOuter, (dt#14 = dt#174)
: : : : : : :- Join LeftOuter, (dt#14 = dt#142)
: : : : : : : :- Join LeftOuter, (dt#14 = dt#110)
: : : : : : : : :- Join LeftOuter, (dt#14 = dt#78)
: : : : : : : : : :- Join LeftOuter, (dt#14 = dt#46)
: : : : : : : : : : :- SubqueryAlias calendar
: : : : : : : : : : : +- Project [day#11, week#12, weekday#13, dt#14]
: : : : : : : : : : : +- Filter (dt#14 = 20211126)
: : : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_a
: : : : : : : : : : : +- Relation test.test_a[day#11,week#12,weekday#13,dt#14] orc
: : : : : : : : : : +- SubqueryAlias t1
: : : : : : : : : : +- Aggregate [dt#46], [dt#46, count(distinct kb_code#40) AS a_kbs#0L]
: : : : : : : : : : +- Filter (dt#46 = 20211126)
: : : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : : +- Relation test.test_b[session_id#15,device_id#16,brand#17,model#18,wx_version#19,os#20,net_work_type#21,app_id#22,app_name#23,col_z#24,page_url#25,page_title#26,olabel#27,otitle#28,source#29,send_dt#30,recv_dt#31,request_time#32,write_time#33,client_ip#34,col_a#35,dt_hour#36,product#37,channelfrom#38,... 8 more fields] orc
: : : : : : : : : +- SubqueryAlias t2
: : : : : : : : : +- Aggregate [dt#78], [dt#78, count(distinct kb_code#72) AS b_kbs#1L]
: : : : : : : : : +- Filter (dt#78 = 20211126)
: : : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : : +- Relation test.test_b[session_id#47,device_id#48,brand#49,model#50,wx_version#51,os#52,net_work_type#53,app_id#54,app_name#55,col_z#56,page_url#57,page_title#58,olabel#59,otitle#60,source#61,send_dt#62,recv_dt#63,request_time#64,write_time#65,client_ip#66,col_a#67,dt_hour#68,product#69,channelfrom#70,... 8 more fields] orc
: : : : : : : : +- SubqueryAlias t3
: : : : : : : : +- Aggregate [dt#110], [dt#110, count(distinct kb_code#104) AS c_kbs#2L]
: : : : : : : : +- Filter (dt#110 = 20211126)
: : : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : : +- Relation test.test_b[session_id#79,device_id#80,brand#81,model#82,wx_version#83,os#84,net_work_type#85,app_id#86,app_name#87,col_z#88,page_url#89,page_title#90,olabel#91,otitle#92,source#93,send_dt#94,recv_dt#95,request_time#96,write_time#97,client_ip#98,col_a#99,dt_hour#100,product#101,channelfrom#102,... 8 more fields] orc
: : : : : : : +- SubqueryAlias t4
: : : : : : : +- Aggregate [dt#142], [dt#142, count(distinct kb_code#136) AS d_kbs#3L]
: : : : : : : +- Filter (dt#142 = 20211126)
: : : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : : +- Relation test.test_b[session_id#111,device_id#112,brand#113,model#114,wx_version#115,os#116,net_work_type#117,app_id#118,app_name#119,col_z#120,page_url#121,page_title#122,olabel#123,otitle#124,source#125,send_dt#126,recv_dt#127,request_time#128,write_time#129,client_ip#130,col_a#131,dt_hour#132,product#133,channelfrom#134,... 8 more fields] orc
: : : : : : +- SubqueryAlias t5
: : : : : : +- Aggregate [dt#174], [dt#174, count(distinct kb_code#168) AS e_kbs#4L]
: : : : : : +- Filter (dt#174 = 20211126)
: : : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : : +- Relation test.test_b[session_id#143,device_id#144,brand#145,model#146,wx_version#147,os#148,net_work_type#149,app_id#150,app_name#151,col_z#152,page_url#153,page_title#154,olabel#155,otitle#156,source#157,send_dt#158,recv_dt#159,request_time#160,write_time#161,client_ip#162,col_a#163,dt_hour#164,product#165,channelfrom#166,... 8 more fields] orc
: : : : : +- SubqueryAlias t6
: : : : : +- Aggregate [dt#206], [dt#206, count(distinct kb_code#200) AS f_kbs#5L]
: : : : : +- Filter (dt#206 = 20211126)
: : : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : : +- Relation test.test_b[session_id#175,device_id#176,brand#177,model#178,wx_version#179,os#180,net_work_type#181,app_id#182,app_name#183,col_z#184,page_url#185,page_title#186,olabel#187,otitle#188,source#189,send_dt#190,recv_dt#191,request_time#192,write_time#193,client_ip#194,col_a#195,dt_hour#196,product#197,channelfrom#198,... 8 more fields] orc
: : : : +- SubqueryAlias t7
: : : : +- Aggregate [dt#238], [dt#238, count(distinct kb_code#232) AS g_kbs#6L]
: : : : +- Filter (dt#238 = 20211126)
: : : : +- SubqueryAlias spark_catalog.test.test_b
: : : : +- Relation test.test_b[session_id#207,device_id#208,brand#209,model#210,wx_version#211,os#212,net_work_type#213,app_id#214,app_name#215,col_z#216,page_url#217,page_title#218,olabel#219,otitle#220,source#221,send_dt#222,recv_dt#223,request_time#224,write_time#225,client_ip#226,col_a#227,dt_hour#228,product#229,channelfrom#230,... 8 more fields] orc
: : : +- SubqueryAlias t8
: : : +- Aggregate [dt#270], [dt#270, count(distinct kb_code#264) AS h_kbs#7L]
: : : +- Filter (dt#270 = 20211126)
: : : +- SubqueryAlias spark_catalog.test.test_b
: : : +- Relation test.test_b[session_id#239,device_id#240,brand#241,model#242,wx_version#243,os#244,net_work_type#245,app_id#246,app_name#247,col_z#248,page_url#249,page_title#250,olabel#251,otitle#252,source#253,send_dt#254,recv_dt#255,request_time#256,write_time#257,client_ip#258,col_a#259,dt_hour#260,product#261,channelfrom#262,... 8 more fields] orc
: : +- SubqueryAlias t9
: : +- Aggregate [dt#302], [dt#302, count(distinct kb_code#296) AS i_kbs#8L]
: : +- Filter (dt#302 = 20211126)
: : +- SubqueryAlias spark_catalog.test.test_b
: : +- Relation test.test_b[session_id#271,device_id#272,brand#273,model#274,wx_version#275,os#276,net_work_type#277,app_id#278,app_name#279,col_z#280,page_url#281,page_title#282,olabel#283,otitle#284,source#285,send_dt#286,recv_dt#287,request_time#288,write_time#289,client_ip#290,col_a#291,dt_hour#292,product#293,channelfrom#294,... 8 more fields] orc
: +- SubqueryAlias t10
: +- Aggregate [dt#334], [dt#334, count(distinct kb_code#328) AS j_kbs#9L]
: +- Filter (dt#334 = 20211126)
: +- SubqueryAlias spark_catalog.test.test_b
: +- Relation test.test_b[session_id#303,device_id#304,brand#305,model#306,wx_version#307,os#308,net_work_type#309,app_id#310,app_name#311,col_z#312,page_url#313,page_title#314,olabel#315,otitle#316,source#317,send_dt#318,recv_dt#319,request_time#320,write_time#321,client_ip#322,col_a#323,dt_hour#324,product#325,channelfrom#326,... 8 more fields] orc
+- SubqueryAlias t11
+- Aggregate [dt#366], [dt#366, count(distinct kb_code#360) AS k_kbs#10L]
+- Filter (dt#366 = 20211126)
+- SubqueryAlias spark_catalog.test.test_b
+- Relation test.test_b[session_id#335,device_id#336,brand#337,model#338,wx_version#339,os#340,net_work_type#341,app_id#342,app_name#343,col_z#344,page_url#345,page_title#346,olabel#347,otitle#348,source#349,send_dt#350,recv_dt#351,request_time#352,write_time#353,client_ip#354,col_a#355,dt_hour#356,product#357,channelfrom#358,... 8 more fields] orc
== Physical Plan ==
CommandResult <empty>
+- Execute OptimizedCreateHiveTableAsSelectCommand [Database: test, TableName: test_c, InsertIntoHadoopFsRelationCommand]
+- AdaptiveSparkPlan isFinalPlan=true
+- == Final Plan ==
*(34) Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L]
+- *(34) BroadcastHashJoin [dt#14], [dt#366], LeftOuter, BuildRight, false
:- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L]
: +- *(34) BroadcastHashJoin [dt#14], [dt#334], LeftOuter, BuildRight, false
: :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L]
: : +- *(34) BroadcastHashJoin [dt#14], [dt#302], LeftOuter, BuildRight, false
: : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L]
: : : +- *(34) BroadcastHashJoin [dt#14], [dt#270], LeftOuter, BuildRight, false
: : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L]
: : : : +- *(34) BroadcastHashJoin [dt#14], [dt#238], LeftOuter, BuildRight, false
: : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L]
: : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#206], LeftOuter, BuildRight, false
: : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L]
: : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#174], LeftOuter, BuildRight, false
: : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L]
: : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#142], LeftOuter, BuildRight, false
: : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L]
: : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#110], LeftOuter, BuildRight, false
: : : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L]
: : : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#78], LeftOuter, BuildRight, false
: : : : : : : : : :- *(34) Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L]
: : : : : : : : : : +- *(34) BroadcastHashJoin [dt#14], [dt#46], LeftOuter, BuildRight, false
: : : : : : : : : : :- *(34) ColumnarToRow
: : : : : : : : : : : +- FileScan orc test.test_a[day#11,week#12,weekday#13,dt#14] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_a..., PartitionFilters: [isnotnull(dt#14), (dt#14 = 20211126)], PushedFilters: [], ReadSchema: struct<day:string,week:int,weekday:int>
: : : : : : : : : : +- BroadcastQueryStage 42
: : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : : : : : : : +- *(23) HashAggregate(keys=[dt#46], functions=[count(distinct kb_code#40)], output=[dt#46, a_kbs#0L])
: : : : : : : : : : +- AQEShuffleRead coalesced
: : : : : : : : : : +- ShuffleQueryStage 21
: : : : : : : : : : +- Exchange hashpartitioning(dt#46, 200), ENSURE_REQUIREMENTS, [id=#983]
: : : : : : : : : : +- *(12) HashAggregate(keys=[dt#46], functions=[partial_count(distinct kb_code#40)], output=[dt#46, count#427L])
: : : : : : : : : : +- *(12) HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40])
: : : : : : : : : : +- AQEShuffleRead coalesced
: : : : : : : : : : +- ShuffleQueryStage 0
: : : : : : : : : : +- Exchange hashpartitioning(dt#46, kb_code#40, 200), ENSURE_REQUIREMENTS, [id=#427]
: : : : : : : : : : +- *(1) HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40])
: : : : : : : : : : +- *(1) ColumnarToRow
: : : : : : : : : : +- FileScan orc test.test_b[kb_code#40,dt#46] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#46), (dt#46 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : : : : : +- BroadcastQueryStage 44
: : : : : : : : : +- ReusedExchange [dt#78, b_kbs#1L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : : : : : +- BroadcastQueryStage 46
: : : : : : : : +- ReusedExchange [dt#110, c_kbs#2L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : : : : +- BroadcastQueryStage 48
: : : : : : : +- ReusedExchange [dt#142, d_kbs#3L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : : : +- BroadcastQueryStage 50
: : : : : : +- ReusedExchange [dt#174, e_kbs#4L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : : +- BroadcastQueryStage 52
: : : : : +- ReusedExchange [dt#206, f_kbs#5L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : : +- BroadcastQueryStage 54
: : : : +- ReusedExchange [dt#238, g_kbs#6L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : : +- BroadcastQueryStage 56
: : : +- ReusedExchange [dt#270, h_kbs#7L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: : +- BroadcastQueryStage 58
: : +- ReusedExchange [dt#302, i_kbs#8L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
: +- BroadcastQueryStage 60
: +- ReusedExchange [dt#334, j_kbs#9L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
+- BroadcastQueryStage 62
+- ReusedExchange [dt#366, k_kbs#10L], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#1643]
+- == Initial Plan ==
Project [day#11, week#12, weekday#13, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L, k_kbs#10L]
+- BroadcastHashJoin [dt#14], [dt#366], LeftOuter, BuildRight, false
:- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L, j_kbs#9L]
: +- BroadcastHashJoin [dt#14], [dt#334], LeftOuter, BuildRight, false
: :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L, i_kbs#8L]
: : +- BroadcastHashJoin [dt#14], [dt#302], LeftOuter, BuildRight, false
: : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L, h_kbs#7L]
: : : +- BroadcastHashJoin [dt#14], [dt#270], LeftOuter, BuildRight, false
: : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L, g_kbs#6L]
: : : : +- BroadcastHashJoin [dt#14], [dt#238], LeftOuter, BuildRight, false
: : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L, f_kbs#5L]
: : : : : +- BroadcastHashJoin [dt#14], [dt#206], LeftOuter, BuildRight, false
: : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L, e_kbs#4L]
: : : : : : +- BroadcastHashJoin [dt#14], [dt#174], LeftOuter, BuildRight, false
: : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L, d_kbs#3L]
: : : : : : : +- BroadcastHashJoin [dt#14], [dt#142], LeftOuter, BuildRight, false
: : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L, c_kbs#2L]
: : : : : : : : +- BroadcastHashJoin [dt#14], [dt#110], LeftOuter, BuildRight, false
: : : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L, b_kbs#1L]
: : : : : : : : : +- BroadcastHashJoin [dt#14], [dt#78], LeftOuter, BuildRight, false
: : : : : : : : : :- Project [day#11, week#12, weekday#13, dt#14, a_kbs#0L]
: : : : : : : : : : +- BroadcastHashJoin [dt#14], [dt#46], LeftOuter, BuildRight, false
: : : : : : : : : : :- FileScan orc test.test_a[day#11,week#12,weekday#13,dt#14] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_a..., PartitionFilters: [isnotnull(dt#14), (dt#14 = 20211126)], PushedFilters: [], ReadSchema: struct<day:string,week:int,weekday:int>
: : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#302]
: : : : : : : : : : +- HashAggregate(keys=[dt#46], functions=[count(distinct kb_code#40)], output=[dt#46, a_kbs#0L])
: : : : : : : : : : +- Exchange hashpartitioning(dt#46, 200), ENSURE_REQUIREMENTS, [id=#299]
: : : : : : : : : : +- HashAggregate(keys=[dt#46], functions=[partial_count(distinct kb_code#40)], output=[dt#46, count#427L])
: : : : : : : : : : +- HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40])
: : : : : : : : : : +- Exchange hashpartitioning(dt#46, kb_code#40, 200), ENSURE_REQUIREMENTS, [id=#295]
: : : : : : : : : : +- HashAggregate(keys=[dt#46, kb_code#40], functions=[], output=[dt#46, kb_code#40])
: : : : : : : : : : +- FileScan orc test.test_b[kb_code#40,dt#46] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#46), (dt#46 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#312]
: : : : : : : : : +- HashAggregate(keys=[dt#78], functions=[count(distinct kb_code#72)], output=[dt#78, b_kbs#1L])
: : : : : : : : : +- Exchange hashpartitioning(dt#78, 200), ENSURE_REQUIREMENTS, [id=#309]
: : : : : : : : : +- HashAggregate(keys=[dt#78], functions=[partial_count(distinct kb_code#72)], output=[dt#78, count#431L])
: : : : : : : : : +- HashAggregate(keys=[dt#78, kb_code#72], functions=[], output=[dt#78, kb_code#72])
: : : : : : : : : +- Exchange hashpartitioning(dt#78, kb_code#72, 200), ENSURE_REQUIREMENTS, [id=#305]
: : : : : : : : : +- HashAggregate(keys=[dt#78, kb_code#72], functions=[], output=[dt#78, kb_code#72])
: : : : : : : : : +- FileScan orc test.test_b[kb_code#72,dt#78] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#78), (dt#78 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#322]
: : : : : : : : +- HashAggregate(keys=[dt#110], functions=[count(distinct kb_code#104)], output=[dt#110, c_kbs#2L])
: : : : : : : : +- Exchange hashpartitioning(dt#110, 200), ENSURE_REQUIREMENTS, [id=#319]
: : : : : : : : +- HashAggregate(keys=[dt#110], functions=[partial_count(distinct kb_code#104)], output=[dt#110, count#435L])
: : : : : : : : +- HashAggregate(keys=[dt#110, kb_code#104], functions=[], output=[dt#110, kb_code#104])
: : : : : : : : +- Exchange hashpartitioning(dt#110, kb_code#104, 200), ENSURE_REQUIREMENTS, [id=#315]
: : : : : : : : +- HashAggregate(keys=[dt#110, kb_code#104], functions=[], output=[dt#110, kb_code#104])
: : : : : : : : +- FileScan orc test.test_b[kb_code#104,dt#110] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#110), (dt#110 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#332]
: : : : : : : +- HashAggregate(keys=[dt#142], functions=[count(distinct kb_code#136)], output=[dt#142, d_kbs#3L])
: : : : : : : +- Exchange hashpartitioning(dt#142, 200), ENSURE_REQUIREMENTS, [id=#329]
: : : : : : : +- HashAggregate(keys=[dt#142], functions=[partial_count(distinct kb_code#136)], output=[dt#142, count#439L])
: : : : : : : +- HashAggregate(keys=[dt#142, kb_code#136], functions=[], output=[dt#142, kb_code#136])
: : : : : : : +- Exchange hashpartitioning(dt#142, kb_code#136, 200), ENSURE_REQUIREMENTS, [id=#325]
: : : : : : : +- HashAggregate(keys=[dt#142, kb_code#136], functions=[], output=[dt#142, kb_code#136])
: : : : : : : +- FileScan orc test.test_b[kb_code#136,dt#142] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#142), (dt#142 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#342]
: : : : : : +- HashAggregate(keys=[dt#174], functions=[count(distinct kb_code#168)], output=[dt#174, e_kbs#4L])
: : : : : : +- Exchange hashpartitioning(dt#174, 200), ENSURE_REQUIREMENTS, [id=#339]
: : : : : : +- HashAggregate(keys=[dt#174], functions=[partial_count(distinct kb_code#168)], output=[dt#174, count#443L])
: : : : : : +- HashAggregate(keys=[dt#174, kb_code#168], functions=[], output=[dt#174, kb_code#168])
: : : : : : +- Exchange hashpartitioning(dt#174, kb_code#168, 200), ENSURE_REQUIREMENTS, [id=#335]
: : : : : : +- HashAggregate(keys=[dt#174, kb_code#168], functions=[], output=[dt#174, kb_code#168])
: : : : : : +- FileScan orc test.test_b[kb_code#168,dt#174] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#174), (dt#174 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#352]
: : : : : +- HashAggregate(keys=[dt#206], functions=[count(distinct kb_code#200)], output=[dt#206, f_kbs#5L])
: : : : : +- Exchange hashpartitioning(dt#206, 200), ENSURE_REQUIREMENTS, [id=#349]
: : : : : +- HashAggregate(keys=[dt#206], functions=[partial_count(distinct kb_code#200)], output=[dt#206, count#447L])
: : : : : +- HashAggregate(keys=[dt#206, kb_code#200], functions=[], output=[dt#206, kb_code#200])
: : : : : +- Exchange hashpartitioning(dt#206, kb_code#200, 200), ENSURE_REQUIREMENTS, [id=#345]
: : : : : +- HashAggregate(keys=[dt#206, kb_code#200], functions=[], output=[dt#206, kb_code#200])
: : : : : +- FileScan orc test.test_b[kb_code#200,dt#206] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#206), (dt#206 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#362]
: : : : +- HashAggregate(keys=[dt#238], functions=[count(distinct kb_code#232)], output=[dt#238, g_kbs#6L])
: : : : +- Exchange hashpartitioning(dt#238, 200), ENSURE_REQUIREMENTS, [id=#359]
: : : : +- HashAggregate(keys=[dt#238], functions=[partial_count(distinct kb_code#232)], output=[dt#238, count#451L])
: : : : +- HashAggregate(keys=[dt#238, kb_code#232], functions=[], output=[dt#238, kb_code#232])
: : : : +- Exchange hashpartitioning(dt#238, kb_code#232, 200), ENSURE_REQUIREMENTS, [id=#355]
: : : : +- HashAggregate(keys=[dt#238, kb_code#232], functions=[], output=[dt#238, kb_code#232])
: : : : +- FileScan orc test.test_b[kb_code#232,dt#238] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#238), (dt#238 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#372]
: : : +- HashAggregate(keys=[dt#270], functions=[count(distinct kb_code#264)], output=[dt#270, h_kbs#7L])
: : : +- Exchange hashpartitioning(dt#270, 200), ENSURE_REQUIREMENTS, [id=#369]
: : : +- HashAggregate(keys=[dt#270], functions=[partial_count(distinct kb_code#264)], output=[dt#270, count#455L])
: : : +- HashAggregate(keys=[dt#270, kb_code#264], functions=[], output=[dt#270, kb_code#264])
: : : +- Exchange hashpartitioning(dt#270, kb_code#264, 200), ENSURE_REQUIREMENTS, [id=#365]
: : : +- HashAggregate(keys=[dt#270, kb_code#264], functions=[], output=[dt#270, kb_code#264])
: : : +- FileScan orc test.test_b[kb_code#264,dt#270] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#270), (dt#270 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#382]
: : +- HashAggregate(keys=[dt#302], functions=[count(distinct kb_code#296)], output=[dt#302, i_kbs#8L])
: : +- Exchange hashpartitioning(dt#302, 200), ENSURE_REQUIREMENTS, [id=#379]
: : +- HashAggregate(keys=[dt#302], functions=[partial_count(distinct kb_code#296)], output=[dt#302, count#459L])
: : +- HashAggregate(keys=[dt#302, kb_code#296], functions=[], output=[dt#302, kb_code#296])
: : +- Exchange hashpartitioning(dt#302, kb_code#296, 200), ENSURE_REQUIREMENTS, [id=#375]
: : +- HashAggregate(keys=[dt#302, kb_code#296], functions=[], output=[dt#302, kb_code#296])
: : +- FileScan orc test.test_b[kb_code#296,dt#302] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#302), (dt#302 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
: +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#392]
: +- HashAggregate(keys=[dt#334], functions=[count(distinct kb_code#328)], output=[dt#334, j_kbs#9L])
: +- Exchange hashpartitioning(dt#334, 200), ENSURE_REQUIREMENTS, [id=#389]
: +- HashAggregate(keys=[dt#334], functions=[partial_count(distinct kb_code#328)], output=[dt#334, count#463L])
: +- HashAggregate(keys=[dt#334, kb_code#328], functions=[], output=[dt#334, kb_code#328])
: +- Exchange hashpartitioning(dt#334, kb_code#328, 200), ENSURE_REQUIREMENTS, [id=#385]
: +- HashAggregate(keys=[dt#334, kb_code#328], functions=[], output=[dt#334, kb_code#328])
: +- FileScan orc test.test_b[kb_code#328,dt#334] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#334), (dt#334 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
+- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#402]
+- HashAggregate(keys=[dt#366], functions=[count(distinct kb_code#360)], output=[dt#366, k_kbs#10L])
+- Exchange hashpartitioning(dt#366, 200), ENSURE_REQUIREMENTS, [id=#399]
+- HashAggregate(keys=[dt#366], functions=[partial_count(distinct kb_code#360)], output=[dt#366, count#467L])
+- HashAggregate(keys=[dt#366, kb_code#360], functions=[], output=[dt#366, kb_code#360])
+- Exchange hashpartitioning(dt#366, kb_code#360, 200), ENSURE_REQUIREMENTS, [id=#395]
+- HashAggregate(keys=[dt#366, kb_code#360], functions=[], output=[dt#366, kb_code#360])
+- FileScan orc test.test_b[kb_code#360,dt#366] Batched: true, DataFilters: [], Format: ORC, Location: InMemoryFileIndex(1 paths)[file:/Users/ritka/Documents/spark-3.3.0/spark-warehouse/test.db/test_b..., PartitionFilters: [isnotnull(dt#366), (dt#366 = 20211126)], PushedFilters: [], ReadSchema: struct<kb_code:string>
> sql hang at planning stage
> --------------------------
>
> Key: SPARK-37581
> URL: https://issues.apache.org/jira/browse/SPARK-37581
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.1.1, 3.2.0
> Reporter: ocean
> Priority: Major
>
> when exec a sql, this sql hang at planning stage.
> when disable DPP, sql can finish very quickly.
> we can reproduce this problem through example below:
> create table test.test_a (
> day string,
> week int,
> weekday int)
> partitioned by (
> dt varchar(8))
> stored as orc;
> insert into test.test_a partition (dt=20211126) values('1',1,2);
> create table test.test_b (
> session_id string,
> device_id string,
> brand string,
> model string,
> wx_version string,
> os string,
> net_work_type string,
> app_id string,
> app_name string,
> col_z string,
> page_url string,
> page_title string,
> olabel string,
> otitle string,
> source string,
> send_dt string,
> recv_dt string,
> request_time string,
> write_time string,
> client_ip string,
> col_a string,
> dt_hour varchar(12),
> product string,
> channelfrom string,
> customer_um string,
> kb_code string,
> col_b string,
> rectype string,
> errcode string,
> col_c string,
> pageid_merge string)
> partitioned by (
> dt varchar(8))
> stored as orc;
> insert into test.test_b partition(dt=20211126)
> values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2');
>
>
> drop table if exists test.test_c;create table if not exists test.test_c stored as ORCFILE as
> select calendar.day,calendar.week,calendar.weekday, a_kbs,
> b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs
> from (select * from test.test_a where dt = '20211126') calendar
> left join
> (select dt,count(distinct kb_code) as a_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t1
> on calendar.dt = t1.dt
> left join
> (select dt,count(distinct kb_code) as b_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t2
> on calendar.dt = t2.dt
> left join
> (select dt,count(distinct kb_code) as c_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t3
> on calendar.dt = t3.dt
> left join
> (select dt,count(distinct kb_code) as d_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t4
> on calendar.dt = t4.dt
> left join
> (select dt,count(distinct kb_code) as e_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t5
> on calendar.dt = t5.dt
> left join
> (select dt,count(distinct kb_code) as f_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t6
> on calendar.dt = t6.dt
> left join
> (select dt,count(distinct kb_code) as g_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t7
> on calendar.dt = t7.dt
> left join
> (select dt,count(distinct kb_code) as h_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t8
> on calendar.dt = t8.dt
> left join
> (select dt,count(distinct kb_code) as i_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t9
> on calendar.dt = t9.dt
> left join
> (select dt,count(distinct kb_code) as j_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t10
> on calendar.dt = t10.dt
> left join
> (select dt,count(distinct kb_code) as k_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t11
> on calendar.dt = t11.dt
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org