You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Gopal V (JIRA)" <ji...@apache.org> on 2018/04/30 06:09:00 UTC
[jira] [Created] (HIVE-19360) CBO: Add an "optimizedSQL" to
QueryPlan object
Gopal V created HIVE-19360:
------------------------------
Summary: CBO: Add an "optimizedSQL" to QueryPlan object
Key: HIVE-19360
URL: https://issues.apache.org/jira/browse/HIVE-19360
Project: Hive
Issue Type: Improvement
Components: CBO, Diagnosability
Affects Versions: 3.1.0
Reporter: Gopal V
Calcite RelNodes can be converted back into SQL (as the new JDBC storage handler does), which allows Hive to print out the post CBO plan as a SQL query instead of having to guess the join orders from the subsequent Tez plan.
The query generated might not be always valid SQL at this point, but is a world ahead of DAG plans in readability.
Eg. tpc-ds Query4 CTEs gets expanded to
{code}
SELECT t16.$f3 customer_preferred_cust_flag
FROM
(SELECT t0.c_customer_id $f0,
SUM((t2.ws_ext_list_price - t2.ws_ext_wholesale_cost - t2.ws_ext_discount_amt + t2.ws_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t0
INNER JOIN (
(SELECT ws_sold_date_sk,
ws_bill_customer_sk,
ws_ext_discount_amt,
ws_ext_sales_price,
ws_ext_wholesale_cost,
ws_ext_list_price
FROM default.web_sales
WHERE ws_bill_customer_sk IS NOT NULL
AND ws_sold_date_sk IS NOT NULL) t2
INNER JOIN
(SELECT d_date_sk,
CAST(2002 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2002
AND d_date_sk IS NOT NULL) t4 ON t2.ws_sold_date_sk = t4.d_date_sk) ON t0.c_customer_sk = t2.ws_bill_customer_sk
GROUP BY t0.c_customer_id,
t0.c_first_name,
t0.c_last_name,
t0.c_preferred_cust_flag,
t0.c_birth_country,
t0.c_login,
t0.c_email_address) t7
INNER JOIN (
(SELECT t9.c_customer_id $f0,
t9.c_preferred_cust_flag $f3,
SUM((t11.ss_ext_list_price - t11.ss_ext_wholesale_cost - t11.ss_ext_discount_amt + t11.ss_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t9
INNER JOIN (
(SELECT ss_sold_date_sk,
ss_customer_sk,
ss_ext_discount_amt,
ss_ext_sales_price,
ss_ext_wholesale_cost,
ss_ext_list_price
FROM default.store_sales
WHERE ss_customer_sk IS NOT NULL
AND ss_sold_date_sk IS NOT NULL) t11
INNER JOIN
(SELECT d_date_sk,
CAST(2002 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2002
AND d_date_sk IS NOT NULL) t13 ON t11.ss_sold_date_sk = t13.d_date_sk) ON t9.c_customer_sk = t11.ss_customer_sk
GROUP BY t9.c_customer_id,
t9.c_first_name,
t9.c_last_name,
t9.c_preferred_cust_flag,
t9.c_birth_country,
t9.c_login,
t9.c_email_address) t16
INNER JOIN (
(SELECT t18.c_customer_id $f0,
SUM((t20.cs_ext_list_price - t20.cs_ext_wholesale_cost - t20.cs_ext_discount_amt + t20.cs_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t18
INNER JOIN (
(SELECT cs_sold_date_sk,
cs_bill_customer_sk,
cs_ext_discount_amt,
cs_ext_sales_price,
cs_ext_wholesale_cost,
cs_ext_list_price
FROM default.catalog_sales
WHERE cs_bill_customer_sk IS NOT NULL
AND cs_sold_date_sk IS NOT NULL) t20
INNER JOIN
(SELECT d_date_sk,
CAST(2002 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2002
AND d_date_sk IS NOT NULL) t22 ON t20.cs_sold_date_sk = t22.d_date_sk) ON t18.c_customer_sk = t20.cs_bill_customer_sk
GROUP BY t18.c_customer_id,
t18.c_first_name,
t18.c_last_name,
t18.c_preferred_cust_flag,
t18.c_birth_country,
t18.c_login,
t18.c_email_address) t25
INNER JOIN
(SELECT t27.c_customer_id $f0,
SUM((t29.ss_ext_list_price - t29.ss_ext_wholesale_cost - t29.ss_ext_discount_amt + t29.ss_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t27
INNER JOIN (
(SELECT ss_sold_date_sk,
ss_customer_sk,
ss_ext_discount_amt,
ss_ext_sales_price,
ss_ext_wholesale_cost,
ss_ext_list_price
FROM default.store_sales
WHERE ss_customer_sk IS NOT NULL
AND ss_sold_date_sk IS NOT NULL) t29
INNER JOIN
(SELECT d_date_sk,
CAST(2001 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2001
AND d_date_sk IS NOT NULL) t31 ON t29.ss_sold_date_sk = t31.d_date_sk) ON t27.c_customer_sk = t29.ss_customer_sk
GROUP BY t27.c_customer_id,
t27.c_first_name,
t27.c_last_name,
t27.c_preferred_cust_flag,
t27.c_birth_country,
t27.c_login,
t27.c_email_address
HAVING SUM((t29.ss_ext_list_price - t29.ss_ext_wholesale_cost - t29.ss_ext_discount_amt + t29.ss_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) > 0) t35 ON t25.$f0 = t35.$f0) ON t16.$f0 = t35.$f0
INNER JOIN
(SELECT t37.c_customer_id $f0,
SUM((t39.cs_ext_list_price - t39.cs_ext_wholesale_cost - t39.cs_ext_discount_amt + t39.cs_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t37
INNER JOIN (
(SELECT cs_sold_date_sk,
cs_bill_customer_sk,
cs_ext_discount_amt,
cs_ext_sales_price,
cs_ext_wholesale_cost,
cs_ext_list_price
FROM default.catalog_sales
WHERE cs_bill_customer_sk IS NOT NULL
AND cs_sold_date_sk IS NOT NULL) t39
INNER JOIN
(SELECT d_date_sk,
CAST(2001 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2001
AND d_date_sk IS NOT NULL) t41 ON t39.cs_sold_date_sk = t41.d_date_sk) ON t37.c_customer_sk = t39.cs_bill_customer_sk
GROUP BY t37.c_customer_id,
t37.c_first_name,
t37.c_last_name,
t37.c_preferred_cust_flag,
t37.c_birth_country,
t37.c_login,
t37.c_email_address
HAVING SUM((t39.cs_ext_list_price - t39.cs_ext_wholesale_cost - t39.cs_ext_discount_amt + t39.cs_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) > 0) t45 ON t25.$f8 / t45.$f8 > t16.$f8 / t35.$f8
AND t35.$f0 = t45.$f0) ON t7.$f0 = t35.$f0
INNER JOIN
(SELECT t47.c_customer_id $f0,
SUM((t49.ws_ext_list_price - t49.ws_ext_wholesale_cost - t49.ws_ext_discount_amt + t49.ws_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) $f8
FROM
(SELECT c_customer_sk,
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address
FROM default.customer
WHERE c_customer_sk IS NOT NULL
AND c_customer_id IS NOT NULL) t47
INNER JOIN (
(SELECT ws_sold_date_sk,
ws_bill_customer_sk,
ws_ext_discount_amt,
ws_ext_sales_price,
ws_ext_wholesale_cost,
ws_ext_list_price
FROM default.web_sales
WHERE ws_bill_customer_sk IS NOT NULL
AND ws_sold_date_sk IS NOT NULL) t49
INNER JOIN
(SELECT d_date_sk,
CAST(2001 AS INTEGER) d_year
FROM default.date_dim
WHERE d_year = 2001
AND d_date_sk IS NOT NULL) t51 ON t49.ws_sold_date_sk = t51.d_date_sk) ON t47.c_customer_sk = t49.ws_bill_customer_sk
GROUP BY t47.c_customer_id,
t47.c_first_name,
t47.c_last_name,
t47.c_preferred_cust_flag,
t47.c_birth_country,
t47.c_login,
t47.c_email_address
HAVING SUM((t49.ws_ext_list_price - t49.ws_ext_wholesale_cost - t49.ws_ext_discount_amt + t49.ws_ext_sales_price) / CAST(2 AS DECIMAL(10, 0))) > 0) t55 ON t25.$f8 / t45.$f8 > t7.$f8 / t55.$f8
AND t35.$f0 = t55.$f0
ORDER BY t16.$f3 IS NULL DESC, t16.$f3
LIMIT 100
{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)