You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Ji Qi (Jira)" <ji...@apache.org> on 2022/05/30 15:13:00 UTC

[jira] [Resolved] (HUDI-4099) hive sync no partition table error

     [ https://issues.apache.org/jira/browse/HUDI-4099?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ji Qi resolved HUDI-4099.
-------------------------

> hive sync no partition table error
> ----------------------------------
>
>                 Key: HUDI-4099
>                 URL: https://issues.apache.org/jira/browse/HUDI-4099
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: hive
>    Affects Versions: 0.11.0
>            Reporter: bettermouse
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.11.1
>
>
> 1.spark-sql --packages org.apache.hudi:hudi-spark3.1-bundle_2.12:0.11.0 --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' 
> 2.
> create table hudi_ctas_cow_nonpcf_tbl01
> using hudi
> tblproperties (primaryKey = 'id',hoodie.datasource.hive_sync.mode='jdbc')
> as
> select 1 as id, 'a1' as name, 10 as price;
> 3. ERROR
> Caused by: org.apache.hudi.hive.HoodieHiveSyncException: Failed in executing SQL CREATE EXTERNAL TABLE IF NOT EXISTS `default`.`hudi_ctas_cow_nonpcf_tbl01`( `_hoodie_commit_time` string, `_hoodie_commit_seqno` string, `_hoodie_record_key` string, `_hoodie_partition_path` string, `_hoodie_file_name` string, `id` int, `name` string, `price` int) PARTITIONED BY (` String)   ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' WITH SERDEPROPERTIES ('hoodie.query.as.ro.table'='false','path'='hdfs://node1:9000/user/hive/warehouse/hudi_ctas_cow_nonpcf_tbl01') STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION 'hdfs://node1:9000/user/hive/warehouse/hudi_ctas_cow_nonpcf_tbl01' TBLPROPERTIES('spark.sql.sources.schema.numPartCols'='1','spark.sql.sources.schema.part.0'='\{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},\{"name":"id","type":"integer","nullable":true,"metadata":{}},\{"name":"name","type":"string","nullable":true,"metadata":{}},\{"name":"price","type":"integer","nullable":true,"metadata":{}},\{"name":"","type":"string","nullable":false,"metadata":{}}]}','spark.sql.sources.schema.partCol.0'='','spark.sql.sources.schema.numParts'='1','spark.sql.sources.provider'='hudi','spark.sql.create.version'='3.1.3')
>         at org.apache.hudi.hive.ddl.JDBCExecutor.runSQL(JDBCExecutor.java:67)
>         at org.apache.hudi.hive.ddl.QueryBasedDDLExecutor.createTable(QueryBasedDDLExecutor.java:84)
>         at org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:168)
>         at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:276)
>         at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:217)
>         at org.apache.hudi.hive.HiveSyncTool.doSync(HiveSyncTool.java:150)
>         at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:138)
>         ... 53 more
> Caused by: org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: FAILED: ParseException line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
>         at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:267)
>         at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:253)
>         at org.apache.hive.jdbc.HiveStatement.runAsyncOnServer(HiveStatement.java:313)
>         at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:253)
>         at org.apache.hudi.hive.ddl.JDBCExecutor.runSQL(JDBCExecutor.java:65)
>         ... 59 more
> Caused by: org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: FAILED: ParseException line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
>         at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:380)
>         at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:206)
>         at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:290)
>         at org.apache.hive.service.cli.operation.Operation.run(Operation.java:320)
>         at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:530)
>         at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:517)
>         at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:310)
>         at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:530)
>         at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1437)
>         at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1422)
>         at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
>         at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>         at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
>         at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.parse.ParseException:line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
>         at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:211)
>         at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:77)
>         at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:70)
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:468)
>         at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
>         at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1295)
>         at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:204)
> 4. notice PARTITIONED BY (` String) 
> ------------------
> when use hms
> create table hudi_ctas_cow_nonpcf_tbl03
> using hudi
> tblproperties (primaryKey = 'id',hoodie.datasource.hive_sync.mode='hms')
> as
> select 1 as id, 'a1' as name, 10 as price;
> success,but an error occurred in Hive 
> hive> select * from hudi_ctas_cow_nonpcf_tbl03;
> FAILED: RuntimeException org.apache.hadoop.hive.ql.metadata.HiveException: Failed with exception nulljava.lang.NullPointerException
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.getPartitionKeyOI(FetchOperator.java:237)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.setupOutputObjectInspector(FetchOperator.java:586)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.initialize(FetchOperator.java:181)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.<init>(FetchOperator.java:146)
>         at org.apache.hadoop.hive.ql.exec.FetchTask.initialize(FetchTask.java:87)
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:541)
>         at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
>         at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1457)
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
>         at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
>         at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:498)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> In hudi,no partiton  mean "",
> when "".split(",")  get array [""],  should get [] 



--
This message was sent by Atlassian Jira
(v8.20.7#820007)