You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Tao Meng (Jira)" <ji...@apache.org> on 2022/02/10 12:52:00 UTC
[jira] [Created] (HUDI-3408) fixed the bug that BUCKET_INDEX cannot process special characters

Tao Meng created HUDI-3408:
------------------------------

             Summary: fixed the bug that BUCKET_INDEX cannot process special characters
                 Key: HUDI-3408
                 URL: https://issues.apache.org/jira/browse/HUDI-3408
             Project: Apache Hudi
          Issue Type: Bug
          Components: core
    Affects Versions: 0.10.1
         Environment: spark3.1.1

            Reporter: Tao Meng


BucketIdentifier use split(":") to split  recordKeyName and recordKeyValue,  if current recordKeyValue is  ":::"  ,  above split give a wrong result.

test("test Bucket") {
withTempDir { tmp =>
Seq("mor").foreach { tableType =>
val tableName = generateTableName
val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
spark.sql(
s"""
|create table $tableName (
| id int, comb int, col0 int, col1 bigint, col2 float, col3 double, col4 decimal(10,4), col5 string, col6 date, col7 timestamp, col8 boolean, col9 binary, par date
|) using hudi
| location '$tablePath'
| options (
| type = '$tableType',
| primaryKey = 'id,col0,col5',
| preCombineField = 'comb',
| hoodie.index.type = 'BUCKET',
| hoodie.bucket.index.num.buckets = '900'
| )
| partitioned by (par)
""".stripMargin)
spark.sql(
s"""
| insert into $tableName values
| (1,1,11,100001,101.01,1001.0001,100001.0001,':::','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
| (2,2,12,100002,102.02,1002.0002,100002.0002,'a000002','2021-12-25','2021-12-25 12:02:02',true,'a02','2021-12-25'),
| (3,3,13,100003,103.03,1003.0003,100003.0003,'a000003','2021-12-25','2021-12-25 12:03:03',false,'a03','2021-12-25'),
| (4,4,14,100004,104.04,1004.0004,100004.0004,'a000004','2021-12-26','2021-12-26 12:04:04',true,'a04','2021-12-26'),
| (5,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
|""".stripMargin)
}
}
}

 

Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
    at org.apache.hudi.index.bucket.BucketIdentifier.lambda$getBucketId$2(BucketIdentifier.java:46)
    at java.util.stream.Collectors.lambda$toMap$58(Collectors.java:1321)
    at java.util.stream.ReduceOps$3ReducingSink.accept(ReduceOps.java:169)
    at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
    at java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
    at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
    at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
    at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
    at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
    at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
    at org.apache.hudi.index.bucket.BucketIdentifier.getBucketId(BucketIdentifier.java:46)
    at org.apache.hudi.index.bucket.BucketIdentifier.getBucketId(BucketIdentifier.java:36)
    at org.apache.hudi.index.bucket.SparkBucketIndex$1.computeNext(SparkBucketIndex.java:80)
    at org.apache.hudi.index.bucket.SparkBucketIndex$1.computeNext(SparkBucketIndex.java:70)
    at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:125)
    ... 25 more

 



--
This message was sent by Atlassian Jira
(v8.20.1#820001)