You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Loïc Chanel <lo...@telecomnancy.net> on 2017/08/07 16:26:36 UTC

Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe

Hi,

As I tried to run some queries with JSON SerDe from Spark SQL client, I
encountered that error :

17/08/07 18:20:40 ERROR SparkSQLDriver: Failed in [create external table
client_project.test_ext(DocVersion string, DriverID string) row format
serde 'org.apache.hive.hcatalog.data.JsonSerDe' WITH SERDEPROPERTIES
("ignore.malformed.json" = "true") location
'/etl/client/ct/aftermarket/processing/proj/envt=M5']
org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot
validate serde: org.apache.hive.hcatalog.data.JsonSerDe
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:455)
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:440)
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:278)
        at
org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:233)
        at
org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:270)
        at
org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:440)
        at
org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:430)
        at
org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:561)
        at
org.apache.spark.sql.hive.execution.HiveNativeCommand.run(HiveNativeCommand.scala:33)
        at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
        at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
        at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:69)
        at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140)
        at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138)
        at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933)
        at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933)
        at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144)
        at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129)
        at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308)
        at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:685)
        at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
        at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot
validate serde: org.apache.hive.hcatalog.data.JsonSerDe
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:455)
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:440)
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:278)
        at
org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:233)
        at
org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:270)
        at
org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:440)
        at
org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:430)
        at
org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:561)
        at
org.apache.spark.sql.hive.execution.HiveNativeCommand.run(HiveNativeCommand.scala:33)
        at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
        at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
        at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:69)
        at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140)
        at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138)
        at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933)
        at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933)
        at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144)
        at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129)
        at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308)
        at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
        at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:685)
        at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
        at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

I double-checked, my classpath is clean, I have both
of /usr/hdp/current/hive-webhcat/share/hcatalog/hive-hcatalog-core.jar
and /usr/hdp/2.3.4.0-3485/hive/lib/hive-serde.jar lib in it.

Does someone know that problem ? Any inputs about where it could come from ?

Thanks in advance for your help !
Regards,


Loïc

Loïc CHANEL
System Big Data engineer
MS&T - Worldline Analytics Platform - Worldline (Villeurbanne, France)

Re: Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe

Posted by Loïc Chanel <lo...@telecomnancy.net>.
Still any inputs on that one ?
Nobody met it ?

Regards,


Loïc

Loïc CHANEL
System Big Data engineer
MS&T - Worldline Analytics Platform - Worldline (Villeurbanne, France)

2017-08-07 18:26 GMT+02:00 Loïc Chanel <lo...@telecomnancy.net>:

> Hi,
>
> As I tried to run some queries with JSON SerDe from Spark SQL client, I
> encountered that error :
>
> 17/08/07 18:20:40 ERROR SparkSQLDriver: Failed in [create external table
> client_project.test_ext(DocVersion string, DriverID string) row format
> serde 'org.apache.hive.hcatalog.data.JsonSerDe' WITH SERDEPROPERTIES
> ("ignore.malformed.json" = "true") location '/etl/client/ct/aftermarket/
> processing/proj/envt=M5']
> org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
> Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot
> validate serde: org.apache.hive.hcatalog.data.JsonSerDe
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> runHive$1.apply(ClientWrapper.scala:455)
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> runHive$1.apply(ClientWrapper.scala:440)
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> withHiveState$1.apply(ClientWrapper.scala:278)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> retryLocked(ClientWrapper.scala:233)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> withHiveState(ClientWrapper.scala:270)
>         at org.apache.spark.sql.hive.client.ClientWrapper.runHive(
> ClientWrapper.scala:440)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> runSqlHive(ClientWrapper.scala:430)
>         at org.apache.spark.sql.hive.HiveContext.runSqlHive(
> HiveContext.scala:561)
>         at org.apache.spark.sql.hive.execution.HiveNativeCommand.
> run(HiveNativeCommand.scala:33)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> sideEffectResult$lzycompute(commands.scala:57)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> sideEffectResult(commands.scala:57)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> doExecute(commands.scala:69)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$
> execute$5.apply(SparkPlan.scala:140)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$
> execute$5.apply(SparkPlan.scala:138)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:147)
>         at org.apache.spark.sql.execution.SparkPlan.execute(
> SparkPlan.scala:138)
>         at org.apache.spark.sql.SQLContext$QueryExecution.
> toRdd$lzycompute(SQLContext.scala:933)
>         at org.apache.spark.sql.SQLContext$QueryExecution.
> toRdd(SQLContext.scala:933)
>         at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144)
>         at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129)
>         at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
>         at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.
> run(SparkSQLDriver.scala:62)
>         at org.apache.spark.sql.hive.thriftserver.
> SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(
> CliDriver.java:376)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(
> SparkSQLCLIDriver.scala:226)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(
> SparkSQLCLIDriver.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(
> NativeMethodAccessorImpl.java:62)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:497)
>         at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$
> deploy$SparkSubmit$$runMain(SparkSubmit.scala:685)
>         at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(
> SparkSubmit.scala:180)
>         at org.apache.spark.deploy.SparkSubmit$.submit(
> SparkSubmit.scala:205)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.
> scala:120)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
> Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Cannot
> validate serde: org.apache.hive.hcatalog.data.JsonSerDe
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> runHive$1.apply(ClientWrapper.scala:455)
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> runHive$1.apply(ClientWrapper.scala:440)
>         at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$
> withHiveState$1.apply(ClientWrapper.scala:278)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> retryLocked(ClientWrapper.scala:233)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> withHiveState(ClientWrapper.scala:270)
>         at org.apache.spark.sql.hive.client.ClientWrapper.runHive(
> ClientWrapper.scala:440)
>         at org.apache.spark.sql.hive.client.ClientWrapper.
> runSqlHive(ClientWrapper.scala:430)
>         at org.apache.spark.sql.hive.HiveContext.runSqlHive(
> HiveContext.scala:561)
>         at org.apache.spark.sql.hive.execution.HiveNativeCommand.
> run(HiveNativeCommand.scala:33)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> sideEffectResult$lzycompute(commands.scala:57)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> sideEffectResult(commands.scala:57)
>         at org.apache.spark.sql.execution.ExecutedCommand.
> doExecute(commands.scala:69)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$
> execute$5.apply(SparkPlan.scala:140)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$
> execute$5.apply(SparkPlan.scala:138)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:147)
>         at org.apache.spark.sql.execution.SparkPlan.execute(
> SparkPlan.scala:138)
>         at org.apache.spark.sql.SQLContext$QueryExecution.
> toRdd$lzycompute(SQLContext.scala:933)
>         at org.apache.spark.sql.SQLContext$QueryExecution.
> toRdd(SQLContext.scala:933)
>         at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144)
>         at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129)
>         at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
>         at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.
> run(SparkSQLDriver.scala:62)
>         at org.apache.spark.sql.hive.thriftserver.
> SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(
> CliDriver.java:376)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(
> SparkSQLCLIDriver.scala:226)
>         at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(
> SparkSQLCLIDriver.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(
> NativeMethodAccessorImpl.java:62)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:497)
>         at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$
> deploy$SparkSubmit$$runMain(SparkSubmit.scala:685)
>         at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(
> SparkSubmit.scala:180)
>         at org.apache.spark.deploy.SparkSubmit$.submit(
> SparkSubmit.scala:205)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.
> scala:120)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>
> I double-checked, my classpath is clean, I have both
> of /usr/hdp/current/hive-webhcat/share/hcatalog/hive-hcatalog-core.jar
> and /usr/hdp/2.3.4.0-3485/hive/lib/hive-serde.jar lib in it.
>
> Does someone know that problem ? Any inputs about where it could come from
> ?
>
> Thanks in advance for your help !
> Regards,
>
>
> Loïc
>
> Loïc CHANEL
> System Big Data engineer
> MS&T - Worldline Analytics Platform - Worldline (Villeurbanne, France)
>