You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "angerszhu (Jira)" <ji...@apache.org> on 2019/09/09 11:47:00 UTC

[jira] [Updated] (SPARK-29022) SparkSQLCLI can use 'ADD JAR' 's jar as Serder class

     [ https://issues.apache.org/jira/browse/SPARK-29022?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

angerszhu updated SPARK-29022:
------------------------------
    Description: 
Spark SQL CLI can't use class in jars add by SQL 'ADD JAR'
{code:java}
spark-sql> add jar /root/.m2/repository/org/apache/hive/hcatalog/hive-hcatalog-core/2.3.6/hive-hcatalog-core-2.3.6.jar;
ADD JAR /root/.m2/repository/org/apache/hive/hcatalog/hive-hcatalog-core/2.3.6/hive-hcatalog-core-2.3.6.jar
spark-sql> CREATE TABLE addJar(key string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
spark-sql> select * from addJar;
19/09/07 03:06:54 ERROR SparkSQLDriver: Failed in [select * from addJar]
java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hive.hcatalog.data.JsonSerDe
	at org.apache.hadoop.hive.ql.plan.TableDesc.getDeserializerClass(TableDesc.java:79)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.addColumnMetadataToConf(HiveTableScanExec.scala:123)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopConf$lzycompute(HiveTableScanExec.scala:101)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopConf(HiveTableScanExec.scala:98)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopReader$lzycompute(HiveTableScanExec.scala:110)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopReader(HiveTableScanExec.scala:105)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.$anonfun$doExecute$1(HiveTableScanExec.scala:188)
	at org.apache.spark.util.Utils$.withDummyCallSite(Utils.scala:2488)
	at org.apache.spark.sql.hive.execution.HiveTableScanExec.doExecute(HiveTableScanExec.scala:188)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
	at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:329)
	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:378)
	at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:408)
	at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:367)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:272)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.hive.hcatalog.data.JsonSerDe
	at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:471)
	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:588)
	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:521)
	at java.base/java.lang.Class.forName0(Native Method)
	at java.base/java.lang.Class.forName(Class.java:398)
	at org.apache.hadoop.hive.ql.plan.TableDesc.getDeserializerClass(TableDesc.java:76)
	... 38 more
OptionsAttachments{code}

> SparkSQLCLI can use 'ADD JAR' 's jar as Serder class
> ----------------------------------------------------
>
>                 Key: SPARK-29022
>                 URL: https://issues.apache.org/jira/browse/SPARK-29022
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 3.0.0
>            Reporter: angerszhu
>            Priority: Major
>
> Spark SQL CLI can't use class in jars add by SQL 'ADD JAR'
> {code:java}
> spark-sql> add jar /root/.m2/repository/org/apache/hive/hcatalog/hive-hcatalog-core/2.3.6/hive-hcatalog-core-2.3.6.jar;
> ADD JAR /root/.m2/repository/org/apache/hive/hcatalog/hive-hcatalog-core/2.3.6/hive-hcatalog-core-2.3.6.jar
> spark-sql> CREATE TABLE addJar(key string) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
> spark-sql> select * from addJar;
> 19/09/07 03:06:54 ERROR SparkSQLDriver: Failed in [select * from addJar]
> java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hive.hcatalog.data.JsonSerDe
> 	at org.apache.hadoop.hive.ql.plan.TableDesc.getDeserializerClass(TableDesc.java:79)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.addColumnMetadataToConf(HiveTableScanExec.scala:123)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopConf$lzycompute(HiveTableScanExec.scala:101)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopConf(HiveTableScanExec.scala:98)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopReader$lzycompute(HiveTableScanExec.scala:110)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.hadoopReader(HiveTableScanExec.scala:105)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.$anonfun$doExecute$1(HiveTableScanExec.scala:188)
> 	at org.apache.spark.util.Utils$.withDummyCallSite(Utils.scala:2488)
> 	at org.apache.spark.sql.hive.execution.HiveTableScanExec.doExecute(HiveTableScanExec.scala:188)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:189)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:227)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:224)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:185)
> 	at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:329)
> 	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:378)
> 	at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:408)
> 	at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65)
> 	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100)
> 	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
> 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:367)
> 	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:272)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> 	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920)
> 	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179)
> 	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202)
> 	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89)
> 	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999)
> 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008)
> 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.ClassNotFoundException: org.apache.hive.hcatalog.data.JsonSerDe
> 	at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:471)
> 	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:588)
> 	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:521)
> 	at java.base/java.lang.Class.forName0(Native Method)
> 	at java.base/java.lang.Class.forName(Class.java:398)
> 	at org.apache.hadoop.hive.ql.plan.TableDesc.getDeserializerClass(TableDesc.java:76)
> 	... 38 more
> OptionsAttachments{code}



--
This message was sent by Atlassian Jira
(v8.3.2#803003)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org