You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "bettermouse (Jira)" <ji...@apache.org> on 2022/05/14 18:48:00 UTC

[jira] [Created] (HUDI-4099) hive sync no partition table error

bettermouse created HUDI-4099:
---------------------------------

             Summary: hive sync no partition table error
                 Key: HUDI-4099
                 URL: https://issues.apache.org/jira/browse/HUDI-4099
             Project: Apache Hudi
          Issue Type: Bug
          Components: hive
            Reporter: bettermouse


1.spark-sql --packages org.apache.hudi:hudi-spark3.1-bundle_2.12:0.11.0 --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' 
2.
create table hudi_ctas_cow_nonpcf_tbl01
using hudi
tblproperties (primaryKey = 'id',hoodie.datasource.hive_sync.mode='jdbc')
as
select 1 as id, 'a1' as name, 10 as price;

3. ERROR
Caused by: org.apache.hudi.hive.HoodieHiveSyncException: Failed in executing SQL CREATE EXTERNAL TABLE IF NOT EXISTS `default`.`hudi_ctas_cow_nonpcf_tbl01`( `_hoodie_commit_time` string, `_hoodie_commit_seqno` string, `_hoodie_record_key` string, `_hoodie_partition_path` string, `_hoodie_file_name` string, `id` int, `name` string, `price` int) PARTITIONED BY (` String)   ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' WITH SERDEPROPERTIES ('hoodie.query.as.ro.table'='false','path'='hdfs://node1:9000/user/hive/warehouse/hudi_ctas_cow_nonpcf_tbl01') STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION 'hdfs://node1:9000/user/hive/warehouse/hudi_ctas_cow_nonpcf_tbl01' TBLPROPERTIES('spark.sql.sources.schema.numPartCols'='1','spark.sql.sources.schema.part.0'='\{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},\{"name":"id","type":"integer","nullable":true,"metadata":{}},\{"name":"name","type":"string","nullable":true,"metadata":{}},\{"name":"price","type":"integer","nullable":true,"metadata":{}},\{"name":"","type":"string","nullable":false,"metadata":{}}]}','spark.sql.sources.schema.partCol.0'='','spark.sql.sources.schema.numParts'='1','spark.sql.sources.provider'='hudi','spark.sql.create.version'='3.1.3')
        at org.apache.hudi.hive.ddl.JDBCExecutor.runSQL(JDBCExecutor.java:67)
        at org.apache.hudi.hive.ddl.QueryBasedDDLExecutor.createTable(QueryBasedDDLExecutor.java:84)
        at org.apache.hudi.hive.HoodieHiveClient.createTable(HoodieHiveClient.java:168)
        at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:276)
        at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:217)
        at org.apache.hudi.hive.HiveSyncTool.doSync(HiveSyncTool.java:150)
        at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:138)
        ... 53 more
Caused by: org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: FAILED: ParseException line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
        at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:267)
        at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:253)
        at org.apache.hive.jdbc.HiveStatement.runAsyncOnServer(HiveStatement.java:313)
        at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:253)
        at org.apache.hudi.hive.ddl.JDBCExecutor.runSQL(JDBCExecutor.java:65)
        ... 59 more
Caused by: org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: FAILED: ParseException line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
        at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:380)
        at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:206)
        at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:290)
        at org.apache.hive.service.cli.operation.Operation.run(Operation.java:320)
        at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:530)
        at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:517)
        at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:310)
        at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:530)
        at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1437)
        at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1422)
        at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
        at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
        at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
        at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.parse.ParseException:line 1:1635 cannot recognize input near '<EOF>' '<EOF>' '<EOF>' in column specification
        at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:211)
        at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:77)
        at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:70)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:468)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
        at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1295)
        at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:204)
4. notice PARTITIONED BY (` String) 

------------------
when use hms

create table hudi_ctas_cow_nonpcf_tbl03
using hudi
tblproperties (primaryKey = 'id',hoodie.datasource.hive_sync.mode='hms')
as
select 1 as id, 'a1' as name, 10 as price;

success,but an error occurred in Hive 

hive> select * from hudi_ctas_cow_nonpcf_tbl03;
FAILED: RuntimeException org.apache.hadoop.hive.ql.metadata.HiveException: Failed with exception nulljava.lang.NullPointerException
        at org.apache.hadoop.hive.ql.exec.FetchOperator.getPartitionKeyOI(FetchOperator.java:237)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.setupOutputObjectInspector(FetchOperator.java:586)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.initialize(FetchOperator.java:181)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.<init>(FetchOperator.java:146)
        at org.apache.hadoop.hive.ql.exec.FetchTask.initialize(FetchTask.java:87)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:541)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1457)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
        at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
        at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:136)


In hudi,no partiton  mean "",
when "".split(",")  get array [""],  should get [] 



--
This message was sent by Atlassian Jira
(v8.20.7#820007)