You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Rajkumar Singh (Jira)" <ji...@apache.org> on 2020/09/14 18:40:00 UTC

[jira] [Created] (HIVE-24163) Dynamic Partitioning Insert fail for MM table fail while Move Operation

Rajkumar Singh created HIVE-24163:
-------------------------------------

             Summary: Dynamic Partitioning Insert fail for MM table fail while Move Operation
                 Key: HIVE-24163
                 URL: https://issues.apache.org/jira/browse/HIVE-24163
             Project: Hive
          Issue Type: Bug
          Components: Hive
            Reporter: Rajkumar Singh
             Fix For: 3.1.2


-- create MM table 
{code:java}
CREATE TABLE `part1`(                              |
|   `id` double,                                     |
|   `n` double,                                      |
|   `name` varchar(8),                               |
|   `sex` varchar(1))                                |
| PARTITIONED BY (                                   |
|   `weight` string,                                 |
|   `age` string,                                    |
|   `height` string)                                 |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'  |
| WITH SERDEPROPERTIES (                             |
|   'field.delim'='\u0001',                          |
|   'line.delim'='\n',                               |
|   'serialization.format'='\u0001')                 |
| STORED AS INPUTFORMAT                              |
|   'org.apache.hadoop.mapred.TextInputFormat'       |
| OUTPUTFORMAT                                       |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION                                           |
|   'hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1' |
| TBLPROPERTIES (                                    |
|   'bucketing_version'='2',                         |
|   'transactional'='true',                          |
|   'transactional_properties'='insert_only',        |
|   'transient_lastDdlTime'='1599053368')    
{code}

-- create managed table 

{code:java}
CREATE TABLE `class`(                              |
|   `name` varchar(8),                               |
|   `sex` varchar(1),                                |
|   `age` double,                                    |
|   `height` double,                                 |
|   `weight` double)                                 |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'      |
| STORED AS INPUTFORMAT                              |
|   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'  |
| OUTPUTFORMAT                                       |
|   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' |
| LOCATION                                           |
|   'hdfs://hostname:8020/warehouse/tablespace/managed/hive/class' |
| TBLPROPERTIES (                                    |
|   'bucketing_version'='2',                         |
|   'transactional'='true',                          |
|   'transactional_properties'='default',            |
|   'transient_lastDdlTime'='1599053345')  
{code}


-- Run Insert query

{code:java}
INSERT INTO TABLE `part1` PARTITION (`Weight`,`Age`,`Height`)  SELECT 0, 0, `Name`,`Sex`,`Weight`,`Age`,`Height` FROM `class`;
{code}

it fail during the MoveTask execution:

{code:java}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: partition hdfs://hostname:8020/warehouse/tablespace/managed/hive/part1/.hive-staging_hive_2020-09-02_13-29-58_765_4475282758764123921-1/-ext-10000/tmpstats-0_FS_3 is not a directory!
        at org.apache.hadoop.hive.ql.metadata.Hive.getValidPartitionsInPath(Hive.java:2769) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:2837) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.MoveTask.handleDynParts(MoveTask.java:562) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:440) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:359) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:721) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:488) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:482) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) ~[hive-exec-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]
        at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:225) ~[hive-service-3.1.3000.7.2.0.0-237.jar:3.1.3000.7.2.0.0-237]

{code}

The reason is Task write the fsstat during the FileSinkOperator closing, HS2 ran the MoveTask to move data into the destination partition directory, while getting the partition location hive check whether destination is directory or not and failing.

-- hive set the stat location during 
https://github.com/apache/hive/blob/d700ea54ec5da5364d92a9faaa58f89ea03181e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L8135

which is relative to the  hive-staging directory:

https://github.com/apache/hive/blob/fecad5b0f72c535ed1c53f2cc62b0d6649b651ae/ql/src/java/org/apache/hadoop/hive/ql/Context.java#L617








--
This message was sent by Atlassian Jira
(v8.3.4#803005)