You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by "Ruilong Huo (JIRA)" <ji...@apache.org> on 2016/01/04 03:38:39 UTC
[jira] [Created] (HAWQ-300) Out of memory during loading large
volume data on large cluster with YARN mode
Ruilong Huo created HAWQ-300:
--------------------------------
Summary: Out of memory during loading large volume data on large cluster with YARN mode
Key: HAWQ-300
URL: https://issues.apache.org/jira/browse/HAWQ-300
Project: Apache HAWQ
Issue Type: Bug
Components: Resource Manager
Reporter: Ruilong Huo
Assignee: Lei Chang
It errors out with out of memory during loading large volume data on large cluster with YARN mode.
{noformat}
INSERT INTO retail_demo.Orders (
Order_ID
, Customer_ID
, Store_ID
, Order_Datetime
, Ship_Completion_Datetime
, Return_Datetime
, Refund_Datetime
, Payment_Method_Code
, Total_Tax_Amount
, Total_Paid_Amount
, Total_Item_Quantity
, Total_Discount_Amount
, Coupon_Code
, Coupon_Amount
, Order_Canceled_Flag
, Has_Returned_Items_Flag
, Has_Refunded_Items_Flag
, Fraud_Code
, Fraud_Resolution_Code
, Billing_Address_Line1
, Billing_Address_Line2
, Billing_Address_Line3
, Billing_Address_City
, Billing_Address_State
, Billing_Address_Postal_Code
, Billing_Address_Country
, Billing_Phone_Number
, Customer_Name
, Customer_Email_Address
, Ordering_Session_ID
, Website_URL
)
SELECT Order_ID AS Order_ID
, MAX(Customer_ID) AS Customer_ID
, MAX(Store_ID) AS Store_ID
, MIN(Order_Datetime) AS Order_Datetime
, MAX(Ship_Datetime) AS Ship_Completion_Datetime
, MIN(item_Return_Datetime) AS Return_Datetime
, MIN(item_Refund_Datetime) AS Refund_Datetime
, MAX(Payment_Method_Code) AS Payment_Method_Code
, SUM(Tax_Amount) AS Total_Tax_Amount
, SUM(Item_Quantity * Item_Price) AS Total_Paid_Amount
, SUM(Item_Quantity) AS Total_Item_Quantity
, SUM(Discount_Amount) AS Total_Discount_Amount
, MAX(Coupon_Code) AS Coupon_Code
, MAX(Coupon_Amount) AS Coupon_Amount
, CASE WHEN SUM(CASE WHEN item_Return_Datetime IS NOT NULL OR item_Refund_Datetime IS NOT NULL THEN 1 ELSE 0 END) = COUNT(*) THEN 'Y' ELSE 'N' END AS Order_Canceled_Flag
, CASE WHEN MAX(item_Return_Datetime) IS NOT NULL THEN 'Y' ELSE 'N' END AS Has_Returned_Items_Flag
, CASE WHEN MAX(item_Refund_Datetime) IS NOT NULL THEN 'Y' ELSE 'N' END AS Has_Refunded_Items_Flag
, CASE SUBSTRING(order_id FOR 2 FROM 2)
WHEN 96 THEN 'Stolen Card'
WHEN 97 THEN 'Compromised Account'
WHEN 98 THEN 'Gift Certificate Abuse'
WHEN 99 THEN 'Researching'
ELSE NULL
END AS Fraud_Code
, CASE WHEN SUBSTRING(order_id FOR 2 FROM 2) > 95
THEN CASE SUBSTRING(order_id FOR 1 FROM 6)
WHEN 5 THEN 'Resolved - Account Canceled'
WHEN 6 THEN 'Resolved - No Fraud'
WHEN 7 THEN 'Resolved - Order Canceled'
WHEN 8 THEN 'Resolved - Authorities Notified'
ELSE 'In Progress'
END
ELSE NULL
END AS Fraud_Resolution_Code
, MAX(ship_Address_Line1) AS Billing_Address_Line1 -- Need to change this to a window function for first ship address as the billing address
, MAX(ship_Address_Line2) AS Billing_Address_Line2
, MAX(ship_Address_Line3) AS Billing_Address_Line3
, MAX(ship_Address_City) AS Billing_Address_City
, MAX(ship_Address_State) AS Billing_Address_State
, MAX(ship_Address_Postal_Code) AS Billing_Address_Postal_Code
, MAX(ship_Address_Country) AS Billing_Address_Country
, MAX(ship_Phone_Number) AS Billing_Phone_Number
, MAX(ship_Customer_Name) AS Customer_Name
, MAX(ship_Customer_Email_Address) AS Customer_Email_Address
, MAX(Ordering_Session_ID) AS Ordering_Session_ID
, MAX(Website_URL) AS Website_URL
FROM retail_demo.order_lineitems
GROUP BY order_id
;
psql:/home/gpadmin/lsp/load/retaildw/gen_facts.sql:100: ERROR: Canceling query because of high VMEM usage. Used: 18436MB, available 2044MB, red zone: 18432MB (runaway_cleaner.c:152) (seg5312 test38.ic:40000 pid=416630) (dispatcher.c:1701)
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)