You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by "Ruilong Huo (JIRA)" <ji...@apache.org> on 2016/01/04 03:38:39 UTC

[jira] [Created] (HAWQ-300) Out of memory during loading large volume data on large cluster with YARN mode

Ruilong Huo created HAWQ-300:
--------------------------------

             Summary: Out of memory during loading large volume data on large cluster with YARN mode
                 Key: HAWQ-300
                 URL: https://issues.apache.org/jira/browse/HAWQ-300
             Project: Apache HAWQ
          Issue Type: Bug
          Components: Resource Manager
            Reporter: Ruilong Huo
            Assignee: Lei Chang


It errors out with out of memory during loading large volume data on large cluster with YARN mode.
{noformat}
INSERT INTO retail_demo.Orders (
  Order_ID                      
, Customer_ID                  
, Store_ID                     
, Order_Datetime               
, Ship_Completion_Datetime     
, Return_Datetime              
, Refund_Datetime              
, Payment_Method_Code          
, Total_Tax_Amount             
, Total_Paid_Amount             
, Total_Item_Quantity          
, Total_Discount_Amount        
, Coupon_Code                  
, Coupon_Amount                
, Order_Canceled_Flag          
, Has_Returned_Items_Flag       
, Has_Refunded_Items_Flag      
, Fraud_Code                   
, Fraud_Resolution_Code        
, Billing_Address_Line1        
, Billing_Address_Line2        
, Billing_Address_Line3        
, Billing_Address_City         
, Billing_Address_State        
, Billing_Address_Postal_Code  
, Billing_Address_Country      
, Billing_Phone_Number         
, Customer_Name                
, Customer_Email_Address       
, Ordering_Session_ID          
, Website_URL                  
)
SELECT Order_ID AS Order_ID
, MAX(Customer_ID) AS Customer_ID
, MAX(Store_ID) AS Store_ID
, MIN(Order_Datetime) AS Order_Datetime
, MAX(Ship_Datetime) AS Ship_Completion_Datetime
, MIN(item_Return_Datetime) AS Return_Datetime
, MIN(item_Refund_Datetime) AS Refund_Datetime
, MAX(Payment_Method_Code) AS Payment_Method_Code
, SUM(Tax_Amount) AS Total_Tax_Amount
, SUM(Item_Quantity * Item_Price) AS Total_Paid_Amount
, SUM(Item_Quantity) AS Total_Item_Quantity
, SUM(Discount_Amount) AS Total_Discount_Amount
, MAX(Coupon_Code) AS Coupon_Code
, MAX(Coupon_Amount) AS Coupon_Amount
, CASE WHEN SUM(CASE WHEN item_Return_Datetime IS NOT NULL OR item_Refund_Datetime IS NOT NULL THEN 1 ELSE 0 END) = COUNT(*) THEN 'Y' ELSE 'N' END AS Order_Canceled_Flag
, CASE WHEN MAX(item_Return_Datetime) IS NOT NULL THEN 'Y' ELSE 'N' END AS Has_Returned_Items_Flag
, CASE WHEN MAX(item_Refund_Datetime) IS NOT NULL THEN 'Y' ELSE 'N' END AS Has_Refunded_Items_Flag
, CASE SUBSTRING(order_id FOR 2 FROM 2)
       WHEN 96 THEN 'Stolen Card'
       WHEN 97 THEN 'Compromised Account'
       WHEN 98 THEN 'Gift Certificate Abuse'
       WHEN 99 THEN 'Researching'
       ELSE NULL
  END AS Fraud_Code
, CASE WHEN SUBSTRING(order_id FOR 2 FROM 2) > 95
       THEN CASE SUBSTRING(order_id FOR 1 FROM 6)
                 WHEN 5 THEN 'Resolved - Account Canceled'
                 WHEN 6 THEN 'Resolved - No Fraud'
                 WHEN 7 THEN 'Resolved - Order Canceled'
                 WHEN 8 THEN 'Resolved - Authorities Notified'
                 ELSE 'In Progress'
            END
       ELSE NULL
  END AS Fraud_Resolution_Code
, MAX(ship_Address_Line1) AS Billing_Address_Line1 -- Need to change this to a window function for first ship address as the billing address
, MAX(ship_Address_Line2) AS Billing_Address_Line2
, MAX(ship_Address_Line3) AS Billing_Address_Line3
, MAX(ship_Address_City) AS Billing_Address_City
, MAX(ship_Address_State) AS Billing_Address_State
, MAX(ship_Address_Postal_Code) AS Billing_Address_Postal_Code
, MAX(ship_Address_Country) AS Billing_Address_Country
, MAX(ship_Phone_Number) AS Billing_Phone_Number
, MAX(ship_Customer_Name) AS Customer_Name
, MAX(ship_Customer_Email_Address) AS Customer_Email_Address
, MAX(Ordering_Session_ID) AS Ordering_Session_ID
, MAX(Website_URL) AS Website_URL
FROM retail_demo.order_lineitems
GROUP BY order_id
;
psql:/home/gpadmin/lsp/load/retaildw/gen_facts.sql:100: ERROR:  Canceling query because of high VMEM usage. Used: 18436MB, available 2044MB, red zone: 18432MB (runaway_cleaner.c:152)  (seg5312 test38.ic:40000 pid=416630) (dispatcher.c:1701)
{noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)