You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2019/10/12 23:56:34 UTC

[GitHub] [incubator-hudi] gfn9cho opened a new issue #954: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: table not found

gfn9cho opened a new issue #954:  org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hivedb.tableName> table not found
URL: https://github.com/apache/incubator-hudi/issues/954
 
 
   I am using hudi-spark-bundle-0.5.1-SNAPSHOT.jar in EMR and getting the below exception in hiveSync.
   We are using AWS glue catalog for hive metastore.
   Hive table is getting created. I could see the table in hive with no data in it.
   org.apache.hudi.hive.HoodieHiveSyncException: Failed to sync partitions for table <tableName>
     at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:172)
     at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:107)
     at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:67)
     at org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:235)
     at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169)
     at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91)
     at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
     at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
     at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
     at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
     at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
     at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
     at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
     at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
     at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
     at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
     at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
     at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
     at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
     at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
     at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
     at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
     at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
     at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
     at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
     at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
     at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)
     ... 69 elided
   Caused by: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hiveDB>.<tableName> table not found
     at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
     at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
     at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
     at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86)
     at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:2377)
     at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:2362)
     at org.apache.hudi.org.apache.hadoop_hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1162)
     at org.apache.hudi.hive.HoodieHiveClient.scanTablePartitions(HoodieHiveClient.java:240)
     at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:162)
     ... 95 more
   
   Below is the code,
   spark-shell --master yarn --deploy-mode client  --conf spark.shuffle.spill=true \
    --conf spark.scheduler.mode=FIFO \
    --conf spark.executor.extraJavaOptions=-XX:MaxPermSize=1024m \
    --conf spark.sql.planner.externalSort=true --conf spark.shuffle.manager=sort \
    --conf spark.ui.port=8088 --conf spark.executor.memoryOverhead=2g  \
    --conf spark.rpc.message.maxSize=1024 --conf spark.file.transferTo=false \
    --conf spark.driver.maxResultSize=3g --conf spark.rdd.compress=true \
    --conf spark.executor.extraJavaOptions="-Dconfig.resource=spark-defaults.conf" \
    --conf spark.driver.JavaOptions="-Dspark.yarn.app.container.log.dir=/mnt/var/log/hadoop" \
    --conf spark.driver.extraJavaOptions="-Dconfig.file=spark-defaults.conf" \
    --conf spark.sql.parquet.writeLegacyFormat=true \
    --conf spark.enable.dynamicAllocation=true \
    --conf spark.dynamicAllocation.maxExecutors=10 \
    --conf spark.dynamicAllocation.minExecutors=1 \
    --conf spark.executor.cores=5 \
    --conf spark.executor.memory=3g --conf spark.driver.memory=2g  \
    --conf spark.executor.instances=4 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer  \
    --name gwpl_staging_load_hudi \
    --files /etc/spark/conf/hive-site.xml \
    --properties-file /usr/lib/spark/conf/spark-defaults.conf \
    --jars /home/hadoop/hudi/hudi-spark-bundle-0.5.1-SNAPSHOT.jar 
   
   import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.config.HoodieWriteConfig
   import org.apache.spark.sql._
   import org.apache.spark.sql.SaveMode._
   import org.apache.spark.sql.expressions.Window
   import org.apache.spark.sql.functions._
   import org.joda.time.format.DateTimeFormat
   
   val stagePrefix="stg_gwpl"
   val harmonizedStageDB="uat_edf_staging"
   val harmonizedstagePath="s3://sa-l3-uat-emr-edl-processed/staging"
   val table="pc_policy"
   
   val incrementalData=spark.sql("select * from uat_connect_gwpl_data_processed.pc_policy limit 100").cache
   
   incrementalData.write.
   format("org.apache.hudi").
   option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY,"ID").
   option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "ingestiondt").
   option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "UpdateTime").
   option(HoodieWriteConfig.TABLE_NAME, stagePrefix + "_hudi_" + table).
   option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2:hiveserver:10000").
   option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
   option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
   option("hoodie.datasource.hive_sync.enable", true).
   option("hoodie.datasource.hive_sync.database",harmonizedStageDB).
   option("hoodie.datasource.hive_sync.table",stagePrefix + "_hudi_" + table).
   option("hoodie.datasource.hive_sync.partition_fields","ingestiondt").
   mode(SaveMode.Overwrite).
   save(s"${harmonizedstagePath}/hudi/$table")
   
   Please let me know if I can provide more details to it.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services