You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/07/20 03:04:13 UTC

[GitHub] [hudi] xiarixiaoyao commented on pull request #6124: [HUDI-4404] Fix insert into dynamic partition write misalignment

xiarixiaoyao commented on PR #6124:
URL: https://github.com/apache/hudi/pull/6124#issuecomment-1189754470

   @XuQianJin-Stars  pls notice this test case
   ```
     def createAndPreparePartitionTable(spark: SparkSession, tableName: String, tablePath: String, tableType: String): Unit = {
       // try to clean tablePath
       spark.sql(
         s"""
            |create table $tableName (
            |  id int, comb int, col0 int, col1 bigint, col2 float, col3 double, col4 decimal(10,4), col5 string, col6 date, col7 timestamp, col8 boolean, col9 binary, par date
            |) using hudi
            | location '$tablePath'
            | options (
            |  type = '$tableType',
            |  primaryKey = 'id',
            |  preCombineField = 'comb'
            | )
            | partitioned by (par)
                """.stripMargin)
       spark.sql(
         s"""
            | insert into $tableName values
            | (1,1,11,100001,101.01,1001.0001,100001.0001,'a000001','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
            | (2,2,12,100002,102.02,1002.0002,100002.0002,'a000002','2021-12-25','2021-12-25 12:02:02',true,'a02','2021-12-25'),
            | (3,3,13,100003,103.03,1003.0003,100003.0003,'a000003','2021-12-25','2021-12-25 12:03:03',false,'a03','2021-12-25'),
            | (4,4,14,100004,104.04,1004.0004,100004.0004,'a000004','2021-12-26','2021-12-26 12:04:04',true,'a04','2021-12-26'),
            | (5,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
            |""".stripMargin)
     }
   
     test("base test") {
       withTempDir { tmp =>
         Seq("cow", "mor").foreach { tableType =>
           val tableName = generateTableName
           val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
           if (HoodieSparkUtils.gteqSpark3_1) {
             createAndPreparePartitionTable(spark, tableName, tablePath, tableType)
           }
         }
       }
     }
   ```
   
   ```
   cannot resolve 'CAST(`col6` AS DATE)' due to data type mismatch: cannot cast decimal(8,4) to date;
   'Project [cast(col1#0 as bigint) AS col1#36L, cast(col2#1 as float) AS col2#37, cast(col3#2 as double) AS col3#38, cast(col4#3 as decimal(10,4)) AS col4#39, cast(col5#4 as string) AS col5#40, cast(col6#5 as date) AS col6#41, cast(col7#6 as timestamp) AS col7#42, cast(col8#7 as boolean) AS col8#43, cast(col9#8 as binary) AS col9#44, cast(col10#9 as timestamp) AS col7#45, col11#10 AS col8#46, cast(col12#11 as binary) AS col9#47, cast(col13#12 as date) AS par#48]
   +- Project [null AS _hoodie_commit_time#13, null AS _hoodie_commit_seqno#14, null AS _hoodie_record_key#15, null AS _hoodie_partition_path#16, null AS _hoodie_file_name#17, col1#0, col2#1, col3#2, col4#3, col5#4, col6#5, col7#6, col8#7, col9#8, col10#9, col11#10, col12#11, col13#12]
      +- LocalRelation [col1#0, col2#1, col3#2, col4#3, col5#4, col6#5, col7#6, col8#7, col9#8, col10#9, col11#10, col12#11, col13#12]
   
   org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(`col6` AS DATE)' due to data type mismatch: cannot cast decimal(8,4) to date;
   'Project [cast(col1#0 as bigint) AS col1#36L, cast(col2#1 as float) AS col2#37, cast(col3#2 as double) AS col3#38, cast(col4#3 as decimal(10,4)) AS col4#39, cast(col5#4 as string) AS col5#40, cast(col6#5 as date) AS col6#41, cast(col7#6 as timestamp) AS col7#42, cast(col8#7 as boolean) AS col8#43, cast(col9#8 as binary) AS col9#44, cast(col10#9 as timestamp) AS col7#45, col11#10 AS col8#46, cast(col12#11 as binary) AS col9#47, cast(col13#12 as date) AS par#48]
   +- Project [null AS _hoodie_commit_time#13, null AS _hoodie_commit_seqno#14, null AS _hoodie_record_key#15, null AS _hoodie_partition_path#16, null AS _hoodie_file_name#17, col1#0, col2#1, col3#2, col4#3, col5#4, col6#5, col7#6, col8#7, col9#8, col10#9, col11#10, col12#11, col13#12]
      +- LocalRelation [col1#0, col2#1, col3#2, col4#3, col5#4, col6#5, col7#6, col8#7, col9#8, col10#9, col11#10, col12#11, col13#12]
   
   	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
   	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$$nestedInanonfun$checkAnalysis$1$2.applyOrElse(CheckAnalysis.scala:164)
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org