You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Weizhong (JIRA)" <ji...@apache.org> on 2016/03/11 03:28:06 UTC

[jira] [Updated] (SPARK-13800) Hive conf will be modified on multi-beeline connect to thriftserver

     [ https://issues.apache.org/jira/browse/SPARK-13800?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Weizhong updated SPARK-13800:
-----------------------------
    Description: 
{color:red}connect to Hive MetaStore service as we have set hive.metastore.uris in hive-site.xml{color}

1. start ThriftServer
2. beeline 1 connect to TS, then run
{code:sql}
create database if not exists hive_bin_partitioned_orc_3;
use hive_bin_partitioned_orc_3;

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set spark.sql.autoBroadcastJoinThreshold=-1;

drop table if exists store_returns;
create table store_returns(
    sr_returned_date_sk       int    ,
    sr_return_time_sk         int    ,
    sr_item_sk                int    ,
    sr_customer_sk            int    ,
    sr_cdemo_sk               int    ,
    sr_hdemo_sk               int    ,
    sr_addr_sk                int    ,
    sr_store_sk               int    ,
    sr_reason_sk              int    ,
    sr_ticket_number          int    ,
    sr_return_quantity        int    ,
    sr_return_amt             float  ,
    sr_return_tax             float  ,
    sr_return_amt_inc_tax     float  ,
    sr_fee                    float  ,
    sr_return_ship_cost       float  ,
    sr_refunded_cash          float  ,
    sr_reversed_charge        float  ,
    sr_store_credit           float  ,
    sr_net_loss               float
)
partitioned by (sr_returned_date string)
stored as orc;

insert overwrite table store_returns partition (sr_returned_date) 
select
  sr.sr_returned_date_sk,
  sr.sr_return_time_sk,
  sr.sr_item_sk,
  sr.sr_customer_sk,
  sr.sr_cdemo_sk,
  sr.sr_hdemo_sk,
  sr.sr_addr_sk,
  sr.sr_store_sk,
  sr.sr_reason_sk,
  sr.sr_ticket_number,
  sr.sr_return_quantity,
  sr.sr_return_amt,
  sr.sr_return_tax,
  sr.sr_return_amt_inc_tax,
  sr.sr_fee,
  sr.sr_return_ship_cost,
  sr.sr_refunded_cash,
  sr.sr_reversed_charge,
  sr.sr_store_credit,
  sr.sr_net_loss,
  dd.d_date as sr_returned_date 
from tpcds_text_3.store_returns sr
join tpcds_text_3.date_dim dd
on (sr.sr_returned_date_sk = dd.d_date_sk);
{code}
3. beeline 2 connect to TS, then run
{code:sql}
show tables;
{code}

*INSERT ... SELECT failed as hive.exec.max.dynamic.partitions have been modified to default value(1000).*

{noformat}
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:497)
	at org.apache.spark.sql.hive.client.Shim_v1_2.loadDynamicPartitions(HiveShim.scala:602)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply$mcV$sp(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:322)
	at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:269)
	at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:268)
	at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:311)
	at org.apache.spark.sql.hive.client.ClientWrapper.loadDynamicPartitions(ClientWrapper.scala:894)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:228)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:226)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1711)
	... 25 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Number of dynamic partitions created is 1823, which is more than 1000. To solve this try to set hive.exec.max.dynamic.partitions to at least 1823.
	at org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1584)
	... 43 more
{noformat}

  was:
1. Start ThriftServer
2. beeline 1 connect to TS, then run
{code:sql}
create database if not exists hive_bin_partitioned_orc_3;
use hive_bin_partitioned_orc_3;

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set spark.sql.autoBroadcastJoinThreshold=-1;

drop table if exists store_returns;
create table store_returns(
    sr_returned_date_sk       int    ,
    sr_return_time_sk         int    ,
    sr_item_sk                int    ,
    sr_customer_sk            int    ,
    sr_cdemo_sk               int    ,
    sr_hdemo_sk               int    ,
    sr_addr_sk                int    ,
    sr_store_sk               int    ,
    sr_reason_sk              int    ,
    sr_ticket_number          int    ,
    sr_return_quantity        int    ,
    sr_return_amt             float  ,
    sr_return_tax             float  ,
    sr_return_amt_inc_tax     float  ,
    sr_fee                    float  ,
    sr_return_ship_cost       float  ,
    sr_refunded_cash          float  ,
    sr_reversed_charge        float  ,
    sr_store_credit           float  ,
    sr_net_loss               float
)
partitioned by (sr_returned_date string)
stored as orc;

insert overwrite table store_returns partition (sr_returned_date) 
select
  sr.sr_returned_date_sk,
  sr.sr_return_time_sk,
  sr.sr_item_sk,
  sr.sr_customer_sk,
  sr.sr_cdemo_sk,
  sr.sr_hdemo_sk,
  sr.sr_addr_sk,
  sr.sr_store_sk,
  sr.sr_reason_sk,
  sr.sr_ticket_number,
  sr.sr_return_quantity,
  sr.sr_return_amt,
  sr.sr_return_tax,
  sr.sr_return_amt_inc_tax,
  sr.sr_fee,
  sr.sr_return_ship_cost,
  sr.sr_refunded_cash,
  sr.sr_reversed_charge,
  sr.sr_store_credit,
  sr.sr_net_loss,
  dd.d_date as sr_returned_date 
from tpcds_text_3.store_returns sr
join tpcds_text_3.date_dim dd
on (sr.sr_returned_date_sk = dd.d_date_sk);
{code}
3. beeline 2 connect to TS, then run
{code:sql}
show tables;
{code}

*INSERT ... SELECT failed as hive.exec.max.dynamic.partitions have been modified to default value(1000).*

{noformat}
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:497)
	at org.apache.spark.sql.hive.client.Shim_v1_2.loadDynamicPartitions(HiveShim.scala:602)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply$mcV$sp(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:322)
	at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:269)
	at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:268)
	at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:311)
	at org.apache.spark.sql.hive.client.ClientWrapper.loadDynamicPartitions(ClientWrapper.scala:894)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:228)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:226)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1711)
	... 25 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Number of dynamic partitions created is 1823, which is more than 1000. To solve this try to set hive.exec.max.dynamic.partitions to at least 1823.
	at org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1584)
	... 43 more
{noformat}


> Hive conf will be modified on multi-beeline connect to thriftserver
> -------------------------------------------------------------------
>
>                 Key: SPARK-13800
>                 URL: https://issues.apache.org/jira/browse/SPARK-13800
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.5.1
>            Reporter: Weizhong
>            Priority: Minor
>
> {color:red}connect to Hive MetaStore service as we have set hive.metastore.uris in hive-site.xml{color}
> 1. start ThriftServer
> 2. beeline 1 connect to TS, then run
> {code:sql}
> create database if not exists hive_bin_partitioned_orc_3;
> use hive_bin_partitioned_orc_3;
> set hive.exec.dynamic.partition.mode=nonstrict;
> set hive.exec.max.dynamic.partitions=10000;
> set spark.sql.autoBroadcastJoinThreshold=-1;
> drop table if exists store_returns;
> create table store_returns(
>     sr_returned_date_sk       int    ,
>     sr_return_time_sk         int    ,
>     sr_item_sk                int    ,
>     sr_customer_sk            int    ,
>     sr_cdemo_sk               int    ,
>     sr_hdemo_sk               int    ,
>     sr_addr_sk                int    ,
>     sr_store_sk               int    ,
>     sr_reason_sk              int    ,
>     sr_ticket_number          int    ,
>     sr_return_quantity        int    ,
>     sr_return_amt             float  ,
>     sr_return_tax             float  ,
>     sr_return_amt_inc_tax     float  ,
>     sr_fee                    float  ,
>     sr_return_ship_cost       float  ,
>     sr_refunded_cash          float  ,
>     sr_reversed_charge        float  ,
>     sr_store_credit           float  ,
>     sr_net_loss               float
> )
> partitioned by (sr_returned_date string)
> stored as orc;
> insert overwrite table store_returns partition (sr_returned_date) 
> select
>   sr.sr_returned_date_sk,
>   sr.sr_return_time_sk,
>   sr.sr_item_sk,
>   sr.sr_customer_sk,
>   sr.sr_cdemo_sk,
>   sr.sr_hdemo_sk,
>   sr.sr_addr_sk,
>   sr.sr_store_sk,
>   sr.sr_reason_sk,
>   sr.sr_ticket_number,
>   sr.sr_return_quantity,
>   sr.sr_return_amt,
>   sr.sr_return_tax,
>   sr.sr_return_amt_inc_tax,
>   sr.sr_fee,
>   sr.sr_return_ship_cost,
>   sr.sr_refunded_cash,
>   sr.sr_reversed_charge,
>   sr.sr_store_credit,
>   sr.sr_net_loss,
>   dd.d_date as sr_returned_date 
> from tpcds_text_3.store_returns sr
> join tpcds_text_3.date_dim dd
> on (sr.sr_returned_date_sk = dd.d_date_sk);
> {code}
> 3. beeline 2 connect to TS, then run
> {code:sql}
> show tables;
> {code}
> *INSERT ... SELECT failed as hive.exec.max.dynamic.partitions have been modified to default value(1000).*
> {noformat}
> Caused by: java.lang.reflect.InvocationTargetException
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:497)
> 	at org.apache.spark.sql.hive.client.Shim_v1_2.loadDynamicPartitions(HiveShim.scala:602)
> 	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply$mcV$sp(ClientWrapper.scala:895)
> 	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
> 	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadDynamicPartitions$1.apply(ClientWrapper.scala:895)
> 	at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:322)
> 	at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:269)
> 	at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:268)
> 	at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:311)
> 	at org.apache.spark.sql.hive.client.ClientWrapper.loadDynamicPartitions(ClientWrapper.scala:894)
> 	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:228)
> 	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anon$1.run(InsertIntoHiveTable.scala:226)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at javax.security.auth.Subject.doAs(Subject.java:422)
> 	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1711)
> 	... 25 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Number of dynamic partitions created is 1823, which is more than 1000. To solve this try to set hive.exec.max.dynamic.partitions to at least 1823.
> 	at org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1584)
> 	... 43 more
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org