You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2019/10/12 23:56:34 UTC
[GitHub] [incubator-hudi] gfn9cho opened a new issue #954:
org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException:
table not found
gfn9cho opened a new issue #954: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hivedb.tableName> table not found
URL: https://github.com/apache/incubator-hudi/issues/954
I am using hudi-spark-bundle-0.5.1-SNAPSHOT.jar in EMR and getting the below exception in hiveSync.
We are using AWS glue catalog for hive metastore.
Hive table is getting created. I could see the table in hive with no data in it.
org.apache.hudi.hive.HoodieHiveSyncException: Failed to sync partitions for table <tableName>
at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:172)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:107)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:67)
at org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:235)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)
... 69 elided
Caused by: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hiveDB>.<tableName> table not found
at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)
at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:2377)
at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:2362)
at org.apache.hudi.org.apache.hadoop_hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1162)
at org.apache.hudi.hive.HoodieHiveClient.scanTablePartitions(HoodieHiveClient.java:240)
at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:162)
... 95 more
Below is the code,
spark-shell --master yarn --deploy-mode client --conf spark.shuffle.spill=true \
--conf spark.scheduler.mode=FIFO \
--conf spark.executor.extraJavaOptions=-XX:MaxPermSize=1024m \
--conf spark.sql.planner.externalSort=true --conf spark.shuffle.manager=sort \
--conf spark.ui.port=8088 --conf spark.executor.memoryOverhead=2g \
--conf spark.rpc.message.maxSize=1024 --conf spark.file.transferTo=false \
--conf spark.driver.maxResultSize=3g --conf spark.rdd.compress=true \
--conf spark.executor.extraJavaOptions="-Dconfig.resource=spark-defaults.conf" \
--conf spark.driver.JavaOptions="-Dspark.yarn.app.container.log.dir=/mnt/var/log/hadoop" \
--conf spark.driver.extraJavaOptions="-Dconfig.file=spark-defaults.conf" \
--conf spark.sql.parquet.writeLegacyFormat=true \
--conf spark.enable.dynamicAllocation=true \
--conf spark.dynamicAllocation.maxExecutors=10 \
--conf spark.dynamicAllocation.minExecutors=1 \
--conf spark.executor.cores=5 \
--conf spark.executor.memory=3g --conf spark.driver.memory=2g \
--conf spark.executor.instances=4 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--name gwpl_staging_load_hudi \
--files /etc/spark/conf/hive-site.xml \
--properties-file /usr/lib/spark/conf/spark-defaults.conf \
--jars /home/hadoop/hudi/hudi-spark-bundle-0.5.1-SNAPSHOT.jar
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.spark.sql._
import org.apache.spark.sql.SaveMode._
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.joda.time.format.DateTimeFormat
val stagePrefix="stg_gwpl"
val harmonizedStageDB="uat_edf_staging"
val harmonizedstagePath="s3://sa-l3-uat-emr-edl-processed/staging"
val table="pc_policy"
val incrementalData=spark.sql("select * from uat_connect_gwpl_data_processed.pc_policy limit 100").cache
incrementalData.write.
format("org.apache.hudi").
option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY,"ID").
option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "ingestiondt").
option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "UpdateTime").
option(HoodieWriteConfig.TABLE_NAME, stagePrefix + "_hudi_" + table).
option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2:hiveserver:10000").
option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
option("hoodie.datasource.hive_sync.enable", true).
option("hoodie.datasource.hive_sync.database",harmonizedStageDB).
option("hoodie.datasource.hive_sync.table",stagePrefix + "_hudi_" + table).
option("hoodie.datasource.hive_sync.partition_fields","ingestiondt").
mode(SaveMode.Overwrite).
save(s"${harmonizedstagePath}/hudi/$table")
Please let me know if I can provide more details to it.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services