You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Tao Meng (Jira)" <ji...@apache.org> on 2022/02/10 12:52:00 UTC
[jira] [Created] (HUDI-3408) fixed the bug that BUCKET_INDEX cannot process special characters
Tao Meng created HUDI-3408:
------------------------------
Summary: fixed the bug that BUCKET_INDEX cannot process special characters
Key: HUDI-3408
URL: https://issues.apache.org/jira/browse/HUDI-3408
Project: Apache Hudi
Issue Type: Bug
Components: core
Affects Versions: 0.10.1
Environment: spark3.1.1
Reporter: Tao Meng
BucketIdentifier use split(":") to split recordKeyName and recordKeyValue, if current recordKeyValue is ":::" , above split give a wrong result.
test("test Bucket") {
withTempDir { tmp =>
Seq("mor").foreach { tableType =>
val tableName = generateTableName
val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}"
spark.sql(
s"""
|create table $tableName (
| id int, comb int, col0 int, col1 bigint, col2 float, col3 double, col4 decimal(10,4), col5 string, col6 date, col7 timestamp, col8 boolean, col9 binary, par date
|) using hudi
| location '$tablePath'
| options (
| type = '$tableType',
| primaryKey = 'id,col0,col5',
| preCombineField = 'comb',
| hoodie.index.type = 'BUCKET',
| hoodie.bucket.index.num.buckets = '900'
| )
| partitioned by (par)
""".stripMargin)
spark.sql(
s"""
| insert into $tableName values
| (1,1,11,100001,101.01,1001.0001,100001.0001,':::','2021-12-25','2021-12-25 12:01:01',true,'a01','2021-12-25'),
| (2,2,12,100002,102.02,1002.0002,100002.0002,'a000002','2021-12-25','2021-12-25 12:02:02',true,'a02','2021-12-25'),
| (3,3,13,100003,103.03,1003.0003,100003.0003,'a000003','2021-12-25','2021-12-25 12:03:03',false,'a03','2021-12-25'),
| (4,4,14,100004,104.04,1004.0004,100004.0004,'a000004','2021-12-26','2021-12-26 12:04:04',true,'a04','2021-12-26'),
| (5,5,15,100005,105.05,1005.0005,100005.0005,'a000005','2021-12-26','2021-12-26 12:05:05',false,'a05','2021-12-26')
|""".stripMargin)
}
}
}
Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
at org.apache.hudi.index.bucket.BucketIdentifier.lambda$getBucketId$2(BucketIdentifier.java:46)
at java.util.stream.Collectors.lambda$toMap$58(Collectors.java:1321)
at java.util.stream.ReduceOps$3ReducingSink.accept(ReduceOps.java:169)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
at org.apache.hudi.index.bucket.BucketIdentifier.getBucketId(BucketIdentifier.java:46)
at org.apache.hudi.index.bucket.BucketIdentifier.getBucketId(BucketIdentifier.java:36)
at org.apache.hudi.index.bucket.SparkBucketIndex$1.computeNext(SparkBucketIndex.java:80)
at org.apache.hudi.index.bucket.SparkBucketIndex$1.computeNext(SparkBucketIndex.java:70)
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:125)
... 25 more
--
This message was sent by Atlassian Jira
(v8.20.1#820001)