You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yu...@apache.org on 2022/10/06 13:07:57 UTC
[hudi] 01/07: [HUDI-3378][HUDI-3379][HUDI-3381] Migrate usage of HoodieRecordPayload and raw Avro payload to HoodieRecord (#5522)
This is an automated email from the ASF dual-hosted git repository.
yuzhaojing pushed a commit to branch release-feature-rfc46
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 8a9dcf599bdf9d460b4883bd6c76f9bcfc0ac316
Author: komao <ma...@gmail.com>
AuthorDate: Fri Jun 10 23:20:47 2022 +0800
[HUDI-3378][HUDI-3379][HUDI-3381] Migrate usage of HoodieRecordPayload and raw Avro payload to HoodieRecord (#5522)
Co-authored-by: Alexey Kudinkin <al...@infinilake.com>
Co-authored-by: wangzixuan.wzxuan <wa...@bytedance.com>
---
.../hudi/cli/commands/ArchivedCommitsCommand.java | 8 +-
.../apache/hudi/cli/commands/ExportCommand.java | 6 +-
.../hudi/cli/commands/HoodieLogFileCommand.java | 13 +-
.../scala/org/apache/hudi/cli/SparkHelpers.scala | 3 +-
.../cli/commands/TestHoodieLogFileCommand.java | 11 +-
.../java/org/apache/hudi/client/BaseClusterer.java | 3 +-
.../java/org/apache/hudi/client/BaseCompactor.java | 3 +-
.../apache/hudi/client/BaseHoodieWriteClient.java | 3 +-
.../apache/hudi/client/HoodieTimelineArchiver.java | 7 +-
.../apache/hudi/client/utils/MergingIterator.java | 16 +-
.../common/table/log/HoodieFileSliceReader.java | 21 ++-
.../hudi/execution/CopyOnWriteInsertHandler.java | 18 +-
.../hudi/execution/HoodieLazyInsertIterable.java | 34 ++--
.../org/apache/hudi/index/HoodieIndexUtils.java | 4 +-
.../org/apache/hudi/io/AppendHandleFactory.java | 3 +-
.../org/apache/hudi/io/CreateHandleFactory.java | 3 +-
.../org/apache/hudi/io/HoodieAppendHandle.java | 91 +++++----
.../org/apache/hudi/io/HoodieBootstrapHandle.java | 3 +-
.../org/apache/hudi/io/HoodieConcatHandle.java | 13 +-
.../org/apache/hudi/io/HoodieCreateHandle.java | 44 ++---
.../java/org/apache/hudi/io/HoodieIOHandle.java | 3 +-
.../hudi/io/HoodieKeyLocationFetchHandle.java | 3 +-
.../org/apache/hudi/io/HoodieKeyLookupHandle.java | 10 +-
.../java/org/apache/hudi/io/HoodieMergeHandle.java | 71 ++++---
.../org/apache/hudi/io/HoodieRangeInfoHandle.java | 7 +-
.../java/org/apache/hudi/io/HoodieReadHandle.java | 7 +-
.../apache/hudi/io/HoodieSortedMergeHandle.java | 18 +-
.../hudi/io/HoodieUnboundedCreateHandle.java | 3 +-
.../java/org/apache/hudi/io/HoodieWriteHandle.java | 89 +--------
.../hudi/io/SingleFileHandleCreateFactory.java | 3 +-
.../org/apache/hudi/io/WriteHandleFactory.java | 3 +-
...HFileWriter.java => HoodieAvroHFileWriter.java} | 21 +--
...odieOrcWriter.java => HoodieAvroOrcWriter.java} | 10 +-
.../hudi/io/storage/HoodieAvroParquetWriter.java | 6 +-
.../hudi/io/storage/HoodieFileWriterFactory.java | 23 ++-
.../apache/hudi/table/HoodieCompactionHandler.java | 5 +-
.../java/org/apache/hudi/table/HoodieTable.java | 3 +-
.../hudi/table/action/BaseActionExecutor.java | 3 +-
.../action/bootstrap/BootstrapRecordConsumer.java | 12 +-
.../table/action/clean/CleanActionExecutor.java | 3 +-
.../action/clean/CleanPlanActionExecutor.java | 3 +-
.../hudi/table/action/clean/CleanPlanner.java | 3 +-
.../cluster/ClusteringPlanActionExecutor.java | 3 +-
.../strategy/ClusteringExecutionStrategy.java | 3 +-
.../cluster/strategy/ClusteringPlanStrategy.java | 3 +-
.../PartitionAwareClusteringPlanStrategy.java | 3 +-
.../action/cluster/strategy/UpdateStrategy.java | 3 +-
.../table/action/commit/BaseBulkInsertHelper.java | 3 +-
.../action/commit/BaseCommitActionExecutor.java | 3 +-
.../hudi/table/action/commit/BaseDeleteHelper.java | 3 +-
.../hudi/table/action/commit/BaseMergeHelper.java | 48 +++--
.../hudi/table/action/commit/BaseWriteHelper.java | 3 +-
.../table/action/commit/HoodieDeleteHelper.java | 3 +-
.../table/action/commit/HoodieMergeHelper.java | 60 +++++-
.../table/action/commit/HoodieWriteHelper.java | 10 +-
.../hudi/table/action/compact/CompactHelpers.java | 3 +-
.../hudi/table/action/compact/HoodieCompactor.java | 3 +-
.../compact/RunCompactionActionExecutor.java | 3 +-
.../compact/ScheduleCompactionActionExecutor.java | 3 +-
.../table/action/index/RunIndexActionExecutor.java | 3 +-
.../action/index/ScheduleIndexActionExecutor.java | 3 +-
.../action/restore/BaseRestoreActionExecutor.java | 3 +-
.../restore/CopyOnWriteRestoreActionExecutor.java | 3 +-
.../restore/MergeOnReadRestoreActionExecutor.java | 3 +-
.../rollback/BaseRollbackActionExecutor.java | 3 +-
.../rollback/BaseRollbackPlanActionExecutor.java | 3 +-
.../CopyOnWriteRollbackActionExecutor.java | 3 +-
.../rollback/MarkerBasedRollbackStrategy.java | 3 +-
.../MergeOnReadRollbackActionExecutor.java | 3 +-
.../action/rollback/RestorePlanActionExecutor.java | 3 +-
.../action/savepoint/SavepointActionExecutor.java | 3 +-
.../io/storage/TestHoodieHFileReaderWriter.java | 53 +++---
.../hudi/io/storage/TestHoodieOrcReaderWriter.java | 7 +-
.../io/storage/TestHoodieReaderWriterBase.java | 60 ++++--
.../hudi/testutils/HoodieWriteableTestTable.java | 22 ++-
.../apache/hudi/client/HoodieFlinkWriteClient.java | 4 +-
.../hudi/execution/ExplicitWriteHandler.java | 10 +-
.../hudi/execution/FlinkLazyInsertIterable.java | 3 +-
.../org/apache/hudi/index/FlinkHoodieIndex.java | 3 +-
.../apache/hudi/io/ExplicitWriteHandleFactory.java | 3 +-
.../java/org/apache/hudi/io/FlinkAppendHandle.java | 3 +-
.../hudi/io/FlinkConcatAndReplaceHandle.java | 14 +-
.../java/org/apache/hudi/io/FlinkConcatHandle.java | 12 +-
.../java/org/apache/hudi/io/FlinkCreateHandle.java | 3 +-
.../apache/hudi/io/FlinkMergeAndReplaceHandle.java | 3 +-
.../java/org/apache/hudi/io/FlinkMergeHandle.java | 3 +-
.../hudi/table/ExplicitWriteHandleTable.java | 3 +-
.../hudi/table/HoodieFlinkCopyOnWriteTable.java | 5 +-
.../hudi/table/HoodieFlinkMergeOnReadTable.java | 3 +-
.../org/apache/hudi/table/HoodieFlinkTable.java | 13 +-
.../commit/BaseFlinkCommitActionExecutor.java | 3 +-
.../commit/FlinkDeleteCommitActionExecutor.java | 3 +-
.../commit/FlinkInsertCommitActionExecutor.java | 3 +-
.../FlinkInsertOverwriteCommitActionExecutor.java | 3 +-
...nkInsertOverwriteTableCommitActionExecutor.java | 3 +-
.../FlinkInsertPreppedCommitActionExecutor.java | 3 +-
.../hudi/table/action/commit/FlinkMergeHelper.java | 19 +-
.../commit/FlinkUpsertCommitActionExecutor.java | 3 +-
.../FlinkUpsertPreppedCommitActionExecutor.java | 3 +-
.../hudi/table/action/commit/FlinkWriteHelper.java | 13 +-
.../delta/BaseFlinkDeltaCommitActionExecutor.java | 3 +-
.../FlinkUpsertDeltaCommitActionExecutor.java | 3 +-
...linkUpsertPreppedDeltaCommitActionExecutor.java | 3 +-
.../HoodieFlinkMergeOnReadTableCompactor.java | 3 +-
.../testutils/HoodieFlinkWriteableTestTable.java | 3 +-
.../apache/hudi/client/HoodieJavaWriteClient.java | 3 +-
.../JavaSizeBasedClusteringPlanStrategy.java | 3 +-
.../run/strategy/JavaExecutionStrategy.java | 17 +-
.../strategy/JavaSortAndSizeExecutionStrategy.java | 3 +-
.../hudi/execution/JavaLazyInsertIterable.java | 3 +-
.../JavaCustomColumnsSortPartitioner.java | 8 +-
.../bulkinsert/JavaGlobalSortPartitioner.java | 3 +-
.../bulkinsert/JavaNonSortPartitioner.java | 3 +-
.../org/apache/hudi/index/JavaHoodieIndex.java | 3 +-
.../hudi/table/HoodieJavaCopyOnWriteTable.java | 5 +-
.../hudi/table/HoodieJavaMergeOnReadTable.java | 3 +-
.../org/apache/hudi/table/HoodieJavaTable.java | 7 +-
.../JavaExecuteClusteringCommitActionExecutor.java | 3 +-
.../commit/BaseJavaCommitActionExecutor.java | 3 +-
.../commit/JavaBulkInsertCommitActionExecutor.java | 3 +-
.../table/action/commit/JavaBulkInsertHelper.java | 3 +-
.../JavaBulkInsertPreppedCommitActionExecutor.java | 3 +-
.../commit/JavaDeleteCommitActionExecutor.java | 3 +-
.../commit/JavaInsertCommitActionExecutor.java | 3 +-
.../JavaInsertOverwriteCommitActionExecutor.java | 3 +-
...vaInsertOverwriteTableCommitActionExecutor.java | 3 +-
.../JavaInsertPreppedCommitActionExecutor.java | 3 +-
.../hudi/table/action/commit/JavaMergeHelper.java | 15 +-
.../commit/JavaUpsertCommitActionExecutor.java | 3 +-
.../table/action/commit/JavaUpsertPartitioner.java | 3 +-
.../JavaUpsertPreppedCommitActionExecutor.java | 3 +-
.../hudi/table/action/commit/JavaWriteHelper.java | 8 +-
.../HoodieJavaMergeOnReadTableCompactor.java | 3 +-
.../BaseJavaDeltaCommitActionExecutor.java | 3 +-
...JavaUpsertPreppedDeltaCommitActionExecutor.java | 3 +-
.../TestJavaBulkInsertInternalPartitioner.java | 3 +-
.../org/apache/hudi/client/HoodieReadClient.java | 2 +-
.../hudi/client/HoodieSparkClusteringClient.java | 3 +-
.../apache/hudi/client/HoodieSparkCompactor.java | 3 +-
.../apache/hudi/client/SparkRDDWriteClient.java | 3 +-
.../strategy/SparkSingleFileSortPlanStrategy.java | 3 +-
.../SparkSizeBasedClusteringPlanStrategy.java | 3 +-
.../MultipleSparkJobExecutionStrategy.java | 8 +-
.../strategy/SingleSparkJobExecutionStrategy.java | 2 +-
.../SparkSingleFileSortExecutionStrategy.java | 3 +-
.../SparkSortAndSizeExecutionStrategy.java | 3 +-
.../update/strategy/BaseSparkUpdateStrategy.java | 3 +-
.../update/strategy/SparkAllowUpdateStrategy.java | 3 +-
.../update/strategy/SparkRejectUpdateStrategy.java | 3 +-
.../client/validator/SparkPreCommitValidator.java | 3 +-
.../SqlQueryEqualityPreCommitValidator.java | 3 +-
.../SqlQueryInequalityPreCommitValidator.java | 3 +-
.../validator/SqlQueryPreCommitValidator.java | 3 +-
.../SqlQuerySingleResultPreCommitValidator.java | 3 +-
.../hudi/execution/SparkLazyInsertIterable.java | 3 +-
.../bulkinsert/BulkInsertMapFunction.java | 3 +-
.../bulkinsert/GlobalSortPartitioner.java | 3 +-
.../execution/bulkinsert/NonSortPartitioner.java | 3 +-
.../RDDCustomColumnsSortPartitioner.java | 6 +-
.../bulkinsert/RDDPartitionSortPartitioner.java | 3 +-
.../bulkinsert/RDDSpatialCurveSortPartitioner.java | 9 +-
.../org/apache/hudi/index/SparkHoodieIndex.java | 3 +-
.../hudi/io/storage/row/HoodieRowCreateHandle.java | 1 +
.../hudi/table/HoodieSparkCopyOnWriteTable.java | 5 +-
.../hudi/table/HoodieSparkMergeOnReadTable.java | 3 +-
.../org/apache/hudi/table/HoodieSparkTable.java | 11 +-
.../SparkBootstrapCommitActionExecutor.java | 3 +-
.../SparkBootstrapDeltaCommitActionExecutor.java | 3 +-
...SparkExecuteClusteringCommitActionExecutor.java | 3 +-
.../commit/BaseSparkCommitActionExecutor.java | 3 +-
.../commit/BulkInsertDataInternalWriterHelper.java | 2 +-
.../action/commit/SparkBucketIndexPartitioner.java | 3 +-
.../SparkBulkInsertCommitActionExecutor.java | 3 +-
.../table/action/commit/SparkBulkInsertHelper.java | 3 +-
...SparkBulkInsertPreppedCommitActionExecutor.java | 3 +-
.../commit/SparkDeleteCommitActionExecutor.java | 3 +-
.../SparkDeletePartitionCommitActionExecutor.java | 3 +-
.../action/commit/SparkHoodiePartitioner.java | 3 +-
.../commit/SparkInsertCommitActionExecutor.java | 3 +-
.../SparkInsertOverwriteCommitActionExecutor.java | 3 +-
...rkInsertOverwriteTableCommitActionExecutor.java | 3 +-
.../SparkInsertPreppedCommitActionExecutor.java | 3 +-
.../commit/SparkUpsertCommitActionExecutor.java | 3 +-
.../SparkUpsertPreppedCommitActionExecutor.java | 3 +-
.../table/action/commit/UpsertPartitioner.java | 3 +-
.../HoodieSparkMergeOnReadTableCompactor.java | 3 +-
.../BaseSparkDeltaCommitActionExecutor.java | 3 +-
.../SparkBulkInsertDeltaCommitActionExecutor.java | 3 +-
...BulkInsertPreppedDeltaCommitActionExecutor.java | 3 +-
.../SparkDeleteDeltaCommitActionExecutor.java | 3 +-
.../SparkInsertDeltaCommitActionExecutor.java | 3 +-
...parkInsertPreppedDeltaCommitActionExecutor.java | 3 +-
.../SparkUpsertDeltaCommitActionExecutor.java | 3 +-
.../SparkUpsertDeltaCommitPartitioner.java | 3 +-
...parkUpsertPreppedDeltaCommitActionExecutor.java | 3 +-
.../hudi/client/TestUpdateSchemaEvolution.java | 4 +-
.../functional/TestHoodieBackedMetadata.java | 21 ++-
.../functional/TestHoodieBackedTableMetadata.java | 14 +-
.../hudi/execution/TestBoundedInMemoryQueue.java | 23 +--
...y.java => TestHoodieAvroFileWriterFactory.java} | 18 +-
.../io/storage/row/TestHoodieRowCreateHandle.java | 1 +
.../hudi/testutils/HoodieClientTestUtils.java | 2 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 17 +-
.../hudi/common/model/HoodieAvroIndexedRecord.java | 192 +++++++++++++++++++
.../apache/hudi/common/model/HoodieAvroRecord.java | 203 +++++++++++++++++++++
.../org/apache/hudi/common/model/HoodieRecord.java | 176 ++++++++++++++++--
.../hudi/common/model/HoodieRecordPayload.java | 5 +-
.../model/OverwriteWithLatestAvroPayload.java | 2 +-
.../hudi/common/table/TableSchemaResolver.java | 6 +-
.../table/log/AbstractHoodieLogRecordReader.java | 27 ++-
.../hudi/common/table/log/HoodieLogFileReader.java | 2 +-
.../table/log/HoodieMergedLogRecordScanner.java | 14 +-
.../table/log/block/HoodieAvroDataBlock.java | 54 ++++--
.../common/table/log/block/HoodieDataBlock.java | 54 +++---
.../table/log/block/HoodieHFileDataBlock.java | 51 +++---
.../table/log/block/HoodieParquetDataBlock.java | 21 ++-
.../table/timeline/HoodieArchivedTimeline.java | 7 +-
.../apache/hudi/common/util/CollectionUtils.java | 9 +
.../apache/hudi/common/util/MappingIterator.java | 47 +++++
.../hudi/io/storage/HoodieAvroFileReader.java | 102 +++++++++++
.../hudi/io/storage/HoodieAvroFileWriter.java | 30 ++-
...HFileReader.java => HoodieAvroHFileReader.java} | 78 ++++----
...odieOrcReader.java => HoodieAvroOrcReader.java} | 6 +-
...uetReader.java => HoodieAvroParquetReader.java} | 12 +-
.../apache/hudi/io/storage/HoodieFileReader.java | 28 +--
.../hudi/io/storage/HoodieFileReaderFactory.java | 20 +-
.../apache/hudi/io/storage/HoodieFileWriter.java | 24 +--
.../apache/hudi/io/storage/HoodieHFileUtils.java | 2 +-
.../hudi/io/storage/HoodieParquetStreamWriter.java | 23 ++-
.../hudi/metadata/HoodieBackedTableMetadata.java | 64 ++++---
.../hudi/metadata/HoodieMetadataPayload.java | 4 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 6 +-
.../common/functional/TestHoodieLogFormat.java | 16 +-
.../TestHoodieLogFormatAppendFailure.java | 5 +-
...y.java => TestHoodieAvroFileReaderFactory.java} | 13 +-
.../examples/quickstart/TestQuickstartData.java | 3 +-
.../hudi/sink/clustering/ClusteringOperator.java | 7 +-
.../hudi/hadoop/HoodieHFileRecordReader.java | 18 +-
.../org/apache/hudi/hadoop/InputSplitUtils.java | 13 +-
.../realtime/RealtimeCompactedRecordReader.java | 8 +-
.../realtime/RealtimeUnmergedRecordReader.java | 2 +-
.../utils/HoodieRealtimeRecordReaderUtils.java | 4 +-
.../hudi/hadoop/testutils/InputFormatTestUtil.java | 8 +-
.../reader/DFSHoodieDatasetInputReader.java | 21 +--
.../scala/org/apache/hudi/HoodieBaseRelation.scala | 4 +-
.../org/apache/hudi/HoodieMergeOnReadRDD.scala | 1 -
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 8 +-
.../HoodieBulkInsertInternalWriterTestBase.java | 10 +-
.../hudi/command/payload/ExpressionPayload.scala | 4 +-
.../apache/hudi/functional/TestCOWDataSource.scala | 2 +-
.../apache/hudi/hive/testutils/HiveTestUtil.java | 6 +-
.../utilities/HoodieMetadataTableValidator.java | 3 +-
.../DeleteSupportSchemaPostProcessor.java | 6 +-
.../MaxwellJsonKafkaSourcePostProcessor.java | 4 +-
.../sources/TestJsonKafkaSourcePostProcessor.java | 4 +-
255 files changed, 1809 insertions(+), 1171 deletions(-)
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
index dcd6a2cf3c..bbfb277e4d 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
@@ -29,7 +29,9 @@ import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
@@ -79,7 +81,7 @@ public class ArchivedCommitsCommand {
// read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
- blk.getRecordIterator().forEachRemaining(readRecords::add);
+ blk.getRecordIterator(HoodieAvroIndexedRecord::new).forEachRemaining(r -> readRecords.add((IndexedRecord) r.getData()));
}
List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
.filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION)
@@ -153,8 +155,8 @@ public class ArchivedCommitsCommand {
// read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
- try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
- recordItr.forEachRemaining(readRecords::add);
+ try (ClosableIterator<HoodieRecord> recordItr = blk.getRecordIterator(HoodieAvroIndexedRecord::new)) {
+ recordItr.forEachRemaining(r -> readRecords.add((IndexedRecord) r.getData()));
}
}
List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
index 2406eddacf..30b48e4d37 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
@@ -31,7 +31,9 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
@@ -121,9 +123,9 @@ public class ExportCommand {
// read the avro blocks
while (reader.hasNext() && copyCount < limit) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
- try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = blk.getRecordIterator(HoodieAvroIndexedRecord::new)) {
while (recordItr.hasNext()) {
- IndexedRecord ir = recordItr.next();
+ IndexedRecord ir = (IndexedRecord) recordItr.next().getData();
// Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
// metadata record from the entry and convert it to json.
HoodieArchivedMetaEntry archiveEntryRecord = (HoodieArchivedMetaEntry) SpecificData.get()
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
index 56e00aa24c..bff6071761 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -30,9 +30,9 @@ import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieLogFormat;
@@ -61,6 +61,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
@@ -121,7 +122,7 @@ public class HoodieLogFileCommand {
instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
}
if (n instanceof HoodieDataBlock) {
- try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = ((HoodieDataBlock) n).getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
}
}
@@ -218,8 +219,8 @@ public class HoodieLogFileCommand {
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
.build();
- for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
- Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
+ for (HoodieRecord hoodieRecord : scanner) {
+ Option<IndexedRecord> record = hoodieRecord.toIndexedRecord(readerSchema, new Properties());
if (allRecords.size() < limit) {
allRecords.add(record.get());
}
@@ -235,10 +236,10 @@ public class HoodieLogFileCommand {
HoodieLogBlock n = reader.next();
if (n instanceof HoodieDataBlock) {
HoodieDataBlock blk = (HoodieDataBlock) n;
- try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = blk.getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(record -> {
if (allRecords.size() < limit) {
- allRecords.add(record);
+ allRecords.add((IndexedRecord) record.getData());
}
});
}
diff --git a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
index b9f8df5fc2..ddb6b6cf2f 100644
--- a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
+++ b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
@@ -18,7 +18,6 @@
package org.apache.hudi.cli
import org.apache.avro.Schema
-import org.apache.avro.generic.IndexedRecord
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hudi.avro.HoodieAvroWriteSupport
@@ -49,7 +48,7 @@ object SparkHelpers {
// Add current classLoad for config, if not will throw classNotFound of 'HoodieWrapperFileSystem'.
parquetConfig.getHadoopConf().setClassLoader(Thread.currentThread.getContextClassLoader)
- val writer = new HoodieAvroParquetWriter[IndexedRecord](destinationFile, parquetConfig, instantTime, new SparkTaskContextSupplier(), true)
+ val writer = new HoodieAvroParquetWriter(destinationFile, parquetConfig, instantTime, new SparkTaskContextSupplier(), true)
for (rec <- sourceRecords) {
val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString
if (!keysToSkip.contains(key)) {
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
index e93ad0c8ca..044d229bd0 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
@@ -28,9 +28,10 @@ import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
@@ -111,7 +112,7 @@ public class TestHoodieLogFileCommand extends CLIFunctionalTestHarness {
.withFileId("test-log-fileid1").overBaseCommit("100").withFs(fs).build()) {
// write data to file
- List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
+ List<HoodieRecord> records = SchemaTestUtil.generateTestRecords(0, 100).stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
@@ -191,7 +192,7 @@ public class TestHoodieLogFileCommand extends CLIFunctionalTestHarness {
.withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-log-fileid1").overBaseCommit(INSTANT_TIME).withFs(fs).withSizeThreshold(500).build();
- List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
+ List<HoodieRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100).stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
@@ -230,12 +231,12 @@ public class TestHoodieLogFileCommand extends CLIFunctionalTestHarness {
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
.build();
- Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
+ Iterator<HoodieRecord> records = scanner.iterator();
int num = 0;
int maxSize = 10;
List<IndexedRecord> indexRecords = new ArrayList<>();
while (records.hasNext() && num < maxSize) {
- Option<IndexedRecord> hoodieRecord = records.next().getData().getInsertValue(schema);
+ Option<IndexedRecord> hoodieRecord = ((HoodieAvroRecord)records.next()).getData().getInsertValue(schema);
indexRecords.add(hoodieRecord.get());
num++;
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
index 648ce805b0..a3f552e640 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
@@ -19,7 +19,6 @@
package org.apache.hudi.client;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import java.io.IOException;
@@ -28,7 +27,7 @@ import java.io.Serializable;
/**
* Client will run one round of clustering.
*/
-public abstract class BaseClusterer<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class BaseClusterer<T, I, K, O> implements Serializable {
private static final long serialVersionUID = 1L;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
index 88737dbcf1..ba4d3f77fd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
@@ -18,7 +18,6 @@
package org.apache.hudi.client;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import java.io.IOException;
@@ -27,7 +26,7 @@ import java.io.Serializable;
/**
* Run one round of compaction.
*/
-public abstract class BaseCompactor<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class BaseCompactor<T, I, K, O> implements Serializable {
private static final long serialVersionUID = 1L;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index d9f260e633..851d201f63 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -44,7 +44,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.TableServiceType;
import org.apache.hudi.common.model.WriteOperationType;
@@ -125,7 +124,7 @@ import static org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY;
* @param <K> Type of keys
* @param <O> Type of outputs
*/
-public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K, O> extends BaseHoodieClient
+public abstract class BaseHoodieWriteClient<T, I, K, O> extends BaseHoodieClient
implements RunsTableService {
protected static final String LOOKUP_STR = "lookup";
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
index 2992f4abd4..16db94bffd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
@@ -30,8 +30,10 @@ import org.apache.hudi.common.fs.StorageSchemes;
import org.apache.hudi.common.model.HoodieArchivedLogFile;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
@@ -342,7 +344,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
// Read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
- blk.getRecordIterator().forEachRemaining(records::add);
+ blk.getRecordIterator(HoodieAvroIndexedRecord::new).forEachRemaining(r -> records.add((IndexedRecord) r.getData()));
if (records.size() >= this.config.getCommitArchivalBatchSize()) {
writeToFile(wrapperSchema, records);
}
@@ -667,7 +669,8 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
Map<HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, wrapperSchema.toString());
final String keyField = table.getMetaClient().getTableConfig().getRecordKeyFieldProp();
- HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header, keyField);
+ List<HoodieRecord> indexRecords = records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
+ HoodieAvroDataBlock block = new HoodieAvroDataBlock(indexRecords, header, keyField);
writer.appendBlock(block);
records.clear();
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MergingIterator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MergingIterator.java
index 47dde723e0..f84f1c00d9 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MergingIterator.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/MergingIterator.java
@@ -18,20 +18,18 @@
package org.apache.hudi.client.utils;
-import java.util.Iterator;
-import java.util.function.Function;
-import org.apache.avro.generic.GenericRecord;
-
import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.common.util.collection.Pair;
-public class MergingIterator<T extends GenericRecord> implements Iterator<T> {
+import java.util.Iterator;
+import java.util.function.BiFunction;
+
+public class MergingIterator<T> implements Iterator<T> {
private final Iterator<T> leftIterator;
private final Iterator<T> rightIterator;
- private final Function<Pair<T,T>, T> mergeFunction;
+ private final BiFunction<T, T, T> mergeFunction;
- public MergingIterator(Iterator<T> leftIterator, Iterator<T> rightIterator, Function<Pair<T,T>, T> mergeFunction) {
+ public MergingIterator(Iterator<T> leftIterator, Iterator<T> rightIterator, BiFunction<T, T, T> mergeFunction) {
this.leftIterator = leftIterator;
this.rightIterator = rightIterator;
this.mergeFunction = mergeFunction;
@@ -47,6 +45,6 @@ public class MergingIterator<T extends GenericRecord> implements Iterator<T> {
@Override
public T next() {
- return mergeFunction.apply(Pair.of(leftIterator.next(), rightIterator.next()));
+ return mergeFunction.apply(leftIterator.next(), rightIterator.next());
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
index a042255cdc..52e411108f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
@@ -20,13 +20,12 @@
package org.apache.hudi.common.table.log;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.SpillableMapUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodiePayloadConfig;
import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
@@ -38,28 +37,28 @@ import java.util.stream.StreamSupport;
/**
* Reads records from base file and merges any updates from log files and provides iterable over all records in the file slice.
*/
-public class HoodieFileSliceReader<T extends HoodieRecordPayload> implements Iterator<HoodieRecord<T>> {
+public class HoodieFileSliceReader<T> implements Iterator<HoodieRecord<T>> {
private final Iterator<HoodieRecord<T>> recordsIterator;
public static HoodieFileSliceReader getFileSliceReader(
- Option<HoodieFileReader> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass,
+ Option<HoodieAvroFileReader> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass,
String preCombineField, Option<Pair<String, String>> simpleKeyGenFieldsOpt) throws IOException {
if (baseFileReader.isPresent()) {
Iterator baseIterator = baseFileReader.get().getRecordIterator(schema);
while (baseIterator.hasNext()) {
GenericRecord record = (GenericRecord) baseIterator.next();
- HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = transform(
+ HoodieRecord hoodieRecord = transform(
record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
scanner.processNextRecord(hoodieRecord);
}
return new HoodieFileSliceReader(scanner.iterator());
} else {
- Iterable<HoodieRecord<? extends HoodieRecordPayload>> iterable = () -> scanner.iterator();
+ Iterable<HoodieRecord> iterable = () -> scanner.iterator();
HoodiePayloadConfig payloadConfig = HoodiePayloadConfig.newBuilder().withPayloadOrderingField(preCombineField).build();
return new HoodieFileSliceReader(StreamSupport.stream(iterable.spliterator(), false)
.map(e -> {
try {
- GenericRecord record = (GenericRecord) e.getData().getInsertValue(schema, payloadConfig.getProps()).get();
+ GenericRecord record = (GenericRecord) e.toIndexedRecord(schema, payloadConfig.getProps()).get();
return transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
} catch (IOException io) {
throw new HoodieIOException("Error while creating reader for file slice with no base file.", io);
@@ -68,9 +67,11 @@ public class HoodieFileSliceReader<T extends HoodieRecordPayload> implements Ite
}
}
- private static HoodieRecord<? extends HoodieRecordPayload> transform(
- GenericRecord record, HoodieMergedLogRecordScanner scanner, String payloadClass,
- String preCombineField, Option<Pair<String, String>> simpleKeyGenFieldsOpt) {
+ private static HoodieRecord transform(GenericRecord record,
+ HoodieMergedLogRecordScanner scanner,
+ String payloadClass,
+ String preCombineField,
+ Option<Pair<String, String>> simpleKeyGenFieldsOpt) {
return simpleKeyGenFieldsOpt.isPresent()
? SpillableMapUtils.convertToHoodieRecordPayload(record,
payloadClass, preCombineField, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField(), Option.empty())
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/CopyOnWriteInsertHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/CopyOnWriteInsertHandler.java
index 5e1f832b7f..3a8a1ee007 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/CopyOnWriteInsertHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/CopyOnWriteInsertHandler.java
@@ -19,9 +19,9 @@
package org.apache.hudi.execution;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.HoodieLazyInsertIterable.HoodieInsertValueGenResult;
@@ -37,7 +37,7 @@ import java.util.Map;
/**
* Consumes stream of hoodie records from in-memory queue and writes to one or more create-handles.
*/
-public class CopyOnWriteInsertHandler<T extends HoodieRecordPayload>
+public class CopyOnWriteInsertHandler<T>
extends BoundedInMemoryQueueConsumer<HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> {
private HoodieWriteConfig config;
@@ -68,9 +68,9 @@ public class CopyOnWriteInsertHandler<T extends HoodieRecordPayload>
}
@Override
- public void consumeOneRecord(HoodieInsertValueGenResult<HoodieRecord> payload) {
- final HoodieRecord insertPayload = payload.record;
- String partitionPath = insertPayload.getPartitionPath();
+ public void consumeOneRecord(HoodieInsertValueGenResult<HoodieRecord> genResult) {
+ final HoodieRecord record = genResult.getResult();
+ String partitionPath = record.getPartitionPath();
HoodieWriteHandle<?,?,?,?> handle = handles.get(partitionPath);
if (handle == null) {
// If the records are sorted, this means that we encounter a new partition path
@@ -81,19 +81,19 @@ public class CopyOnWriteInsertHandler<T extends HoodieRecordPayload>
}
// Lazily initialize the handle, for the first time
handle = writeHandleFactory.create(config, instantTime, hoodieTable,
- insertPayload.getPartitionPath(), idPrefix, taskContextSupplier);
+ record.getPartitionPath(), idPrefix, taskContextSupplier);
handles.put(partitionPath, handle);
}
- if (!handle.canWrite(payload.record)) {
+ if (!handle.canWrite(genResult.getResult())) {
// Handle is full. Close the handle and add the WriteStatus
statuses.addAll(handle.close());
// Open new handle
handle = writeHandleFactory.create(config, instantTime, hoodieTable,
- insertPayload.getPartitionPath(), idPrefix, taskContextSupplier);
+ record.getPartitionPath(), idPrefix, taskContextSupplier);
handles.put(partitionPath, handle);
}
- handle.write(insertPayload, payload.insertValue, payload.exception);
+ handle.write(record, genResult.schema, new TypedProperties(genResult.props));
}
@Override
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
index 1754836c91..b0831f0bc9 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
@@ -18,21 +18,17 @@
package org.apache.hudi.execution;
+import org.apache.avro.Schema;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.utils.LazyIterableIterator;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.CollectionUtils;
-import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.CreateHandleFactory;
import org.apache.hudi.io.WriteHandleFactory;
import org.apache.hudi.table.HoodieTable;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
@@ -41,7 +37,7 @@ import java.util.function.Function;
/**
* Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new files.
*/
-public abstract class HoodieLazyInsertIterable<T extends HoodieRecordPayload>
+public abstract class HoodieLazyInsertIterable<T>
extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
protected final HoodieWriteConfig hoodieConfig;
@@ -78,19 +74,19 @@ public abstract class HoodieLazyInsertIterable<T extends HoodieRecordPayload>
}
// Used for caching HoodieRecord along with insertValue. We need this to offload computation work to buffering thread.
- public static class HoodieInsertValueGenResult<T extends HoodieRecord> {
- public T record;
- public Option<IndexedRecord> insertValue;
- // It caches the exception seen while fetching insert value.
- public Option<Exception> exception = Option.empty();
+ public static class HoodieInsertValueGenResult<R extends HoodieRecord> {
+ private final R record;
+ public final Schema schema;
+ public final Properties props;
- public HoodieInsertValueGenResult(T record, Schema schema, Properties properties) {
+ public HoodieInsertValueGenResult(R record, Schema schema, Properties properties) {
this.record = record;
- try {
- this.insertValue = ((HoodieRecordPayload) record.getData()).getInsertValue(schema, properties);
- } catch (Exception e) {
- this.exception = Option.of(e);
- }
+ this.schema = schema;
+ this.props = properties;
+ }
+
+ public R getResult() {
+ return record;
}
}
@@ -98,12 +94,12 @@ public abstract class HoodieLazyInsertIterable<T extends HoodieRecordPayload>
* Transformer function to help transform a HoodieRecord. This transformer is used by BufferedIterator to offload some
* expensive operations of transformation to the reader thread.
*/
- static <T extends HoodieRecordPayload> Function<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(
+ static <T> Function<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(
Schema schema, HoodieWriteConfig config) {
return hoodieRecord -> new HoodieInsertValueGenResult(hoodieRecord, schema, config.getProps());
}
- static <T extends HoodieRecordPayload> Function<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(
+ static <T> Function<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(
Schema schema) {
return hoodieRecord -> new HoodieInsertValueGenResult(hoodieRecord, schema, CollectionUtils.emptyProps());
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
index 61be856d36..69d39fd2d8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
@@ -32,7 +32,7 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIndexException;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager;
@@ -153,7 +153,7 @@ public class HoodieIndexUtils {
// Load all rowKeys from the file, to double-confirm
if (!candidateRecordKeys.isEmpty()) {
HoodieTimer timer = new HoodieTimer().startTimer();
- HoodieFileReader fileReader = HoodieFileReaderFactory.getFileReader(configuration, filePath);
+ HoodieAvroFileReader fileReader = HoodieFileReaderFactory.getFileReader(configuration, filePath);
Set<String> fileRowKeys = fileReader.filterRowKeys(new TreeSet<>(candidateRecordKeys));
foundRecordKeys.addAll(fileRowKeys);
LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/AppendHandleFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/AppendHandleFactory.java
index b4c83c141b..fdd232b55a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/AppendHandleFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/AppendHandleFactory.java
@@ -19,11 +19,10 @@
package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
-public class AppendHandleFactory<T extends HoodieRecordPayload, I, K, O> extends WriteHandleFactory<T, I, K, O> {
+public class AppendHandleFactory<T, I, K, O> extends WriteHandleFactory<T, I, K, O> {
@Override
public HoodieAppendHandle<T, I, K, O> create(final HoodieWriteConfig hoodieConfig, final String commitTime,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/CreateHandleFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/CreateHandleFactory.java
index 09131b421f..8dc19816fd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/CreateHandleFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/CreateHandleFactory.java
@@ -19,13 +19,12 @@
package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import java.io.Serializable;
-public class CreateHandleFactory<T extends HoodieRecordPayload, I, K, O> extends WriteHandleFactory<T, I, K, O> implements Serializable {
+public class CreateHandleFactory<T, I, K, O> extends WriteHandleFactory<T, I, K, O> implements Serializable {
private boolean preserveMetadata = false;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
index 3cb149427a..abdcfde31c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -19,11 +19,10 @@
package org.apache.hudi.io;
import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
-import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.BaseFile;
@@ -67,6 +66,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -79,7 +79,7 @@ import static org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRang
/**
* IO Operation to append data onto an existing file.
*/
-public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
+public class HoodieAppendHandle<T, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieAppendHandle.class);
// This acts as the sequenceID for records written
@@ -87,7 +87,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
protected final String fileId;
// Buffer for holding records in memory before they are flushed to disk
- private final List<IndexedRecord> recordList = new ArrayList<>();
+ private final List<HoodieRecord> recordList = new ArrayList<>();
// Buffer for holding records (to be deleted) in memory before they are flushed to disk
private final List<DeleteRecord> recordsToDelete = new ArrayList<>();
// Incoming records to be written to logs.
@@ -126,7 +126,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
super(config, instantTime, partitionPath, fileId, hoodieTable, taskContextSupplier);
this.fileId = fileId;
this.recordItr = recordItr;
- sizeEstimator = new DefaultSizeEstimator();
+ this.sizeEstimator = new DefaultSizeEstimator();
this.statuses = new ArrayList<>();
this.recordProperties.putAll(config.getProps());
}
@@ -206,8 +206,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
return hoodieRecord.getCurrentLocation() != null;
}
- private Option<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
- Option<Map<String, String>> recordMetadata = hoodieRecord.getData().getMetadata();
+ private Option<HoodieRecord> prepareRecord(HoodieRecord<T> hoodieRecord) {
+ Option<Map<String, String>> recordMetadata = hoodieRecord.getMetadata();
try {
// Pass the isUpdateRecord to the props for HoodieRecordPayload to judge
// Whether it is an update or insert record.
@@ -215,24 +215,15 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
// If the format can not record the operation field, nullify the DELETE payload manually.
boolean nullifyPayload = HoodieOperation.isDelete(hoodieRecord.getOperation()) && !config.allowOperationMetadataField();
recordProperties.put(HoodiePayloadProps.PAYLOAD_IS_UPDATE_RECORD_FOR_MOR, String.valueOf(isUpdateRecord));
- Option<IndexedRecord> avroRecord = nullifyPayload ? Option.empty() : hoodieRecord.getData().getInsertValue(tableSchema, recordProperties);
- if (avroRecord.isPresent()) {
- if (avroRecord.get().equals(IGNORE_RECORD)) {
- return avroRecord;
+ Option<HoodieRecord> finalRecord = Option.empty();
+ if (!nullifyPayload && hoodieRecord.isPresent(tableSchema, recordProperties)) {
+ if (hoodieRecord.shouldIgnore(tableSchema, recordProperties)) {
+ return Option.of(hoodieRecord);
}
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
- GenericRecord rewriteRecord = rewriteRecord((GenericRecord) avroRecord.get());
- avroRecord = Option.of(rewriteRecord);
- String seqId =
- HoodieRecord.generateSequenceId(instantTime, getPartitionId(), RECORD_COUNTER.getAndIncrement());
- if (config.populateMetaFields()) {
- HoodieAvroUtils.addHoodieKeyToRecord(rewriteRecord, hoodieRecord.getRecordKey(),
- hoodieRecord.getPartitionPath(), fileId);
- HoodieAvroUtils.addCommitMetadataToRecord(rewriteRecord, instantTime, seqId);
- }
- if (config.allowOperationMetadataField()) {
- HoodieAvroUtils.addOperationToRecord(rewriteRecord, hoodieRecord.getOperation());
- }
+ HoodieRecord rewrittenRecord = hoodieRecord.rewriteRecord(tableSchema, recordProperties, schemaOnReadEnabled, writeSchemaWithMetaFields);
+ HoodieRecord populatedRecord = populateMetadataFields(rewrittenRecord, tableSchema, recordProperties);
+ finalRecord = Option.of(populatedRecord);
if (isUpdateRecord) {
updatedRecordsWritten++;
} else {
@@ -248,7 +239,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
// part of marking
// record successful.
hoodieRecord.deflate();
- return avroRecord;
+ return finalRecord;
} catch (Exception e) {
LOG.error("Error writing record " + hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e, recordMetadata);
@@ -256,6 +247,24 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
return Option.empty();
}
+ private HoodieRecord populateMetadataFields(HoodieRecord<T> hoodieRecord, Schema schema, Properties prop) throws IOException {
+ Map<HoodieRecord.HoodieMetadataField, String> metadataValues = new HashMap<>();
+ String seqId =
+ HoodieRecord.generateSequenceId(instantTime, getPartitionId(), RECORD_COUNTER.getAndIncrement());
+ if (config.populateMetaFields()) {
+ metadataValues.put(HoodieRecord.HoodieMetadataField.FILENAME_METADATA_FIELD, fileId);
+ metadataValues.put(HoodieRecord.HoodieMetadataField.PARTITION_PATH_METADATA_FIELD, partitionPath);
+ metadataValues.put(HoodieRecord.HoodieMetadataField.RECORD_KEY_METADATA_FIELD, hoodieRecord.getRecordKey());
+ metadataValues.put(HoodieRecord.HoodieMetadataField.COMMIT_TIME_METADATA_FIELD, instantTime);
+ metadataValues.put(HoodieRecord.HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD, seqId);
+ }
+ if (config.allowOperationMetadataField()) {
+ metadataValues.put(HoodieRecord.HoodieMetadataField.OPERATION_METADATA_FIELD, hoodieRecord.getOperation().getName());
+ }
+
+ return hoodieRecord.addMetadataValues(schema, prop, metadataValues);
+ }
+
private void initNewStatus() {
HoodieDeltaWriteStat prevStat = (HoodieDeltaWriteStat) this.writeStatus.getStat();
// Make a new write status and copy basic fields over.
@@ -331,7 +340,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
statuses.add(this.writeStatus);
}
- private void processAppendResult(AppendResult result, List<IndexedRecord> recordList) {
+ private void processAppendResult(AppendResult result, List<HoodieRecord> recordList) throws IOException {
HoodieDeltaWriteStat stat = (HoodieDeltaWriteStat) this.writeStatus.getStat();
if (stat.getPath() == null) {
@@ -364,8 +373,13 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
.collect(Collectors.toList());
}
+ List<IndexedRecord> indexedRecords = new LinkedList<>();
+ for (HoodieRecord hoodieRecord : recordList) {
+ indexedRecords.add((IndexedRecord) hoodieRecord.toIndexedRecord(tableSchema, config.getProps()).get());
+ }
+
Map<String, HoodieColumnRangeMetadata<Comparable>> columnRangesMetadataMap =
- collectColumnRangeMetadata(recordList, fieldsToIndex, stat.getPath());
+ collectColumnRangeMetadata(indexedRecords, fieldsToIndex, stat.getPath());
stat.setRecordsStats(columnRangesMetadataMap);
}
@@ -423,7 +437,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
}
@Override
- public void write(HoodieRecord record, Option<IndexedRecord> insertValue) {
+ protected void doWrite(HoodieRecord record, Schema schema, TypedProperties props) {
Option<Map<String, String>> recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
try {
init(record);
@@ -481,7 +495,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
if (!partitionPath.equals(record.getPartitionPath())) {
HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: "
+ record.getPartitionPath() + " but trying to insert into partition: " + partitionPath);
- writeStatus.markFailure(record, failureEx, record.getData().getMetadata());
+ writeStatus.markFailure(record, failureEx, record.getMetadata());
return;
}
@@ -492,12 +506,17 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
record.seal();
}
// fetch the ordering val first in case the record was deflated.
- final Comparable<?> orderingVal = record.getData().getOrderingValue();
- Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
+ final Comparable<?> orderingVal = record.getOrderingValue();
+ Option<HoodieRecord> indexedRecord = prepareRecord(record);
if (indexedRecord.isPresent()) {
// Skip the ignored record.
- if (!indexedRecord.get().equals(IGNORE_RECORD)) {
- recordList.add(indexedRecord.get());
+ try {
+ if (!indexedRecord.get().shouldIgnore(tableSchema, recordProperties)) {
+ recordList.add(indexedRecord.get());
+ }
+ } catch (IOException e) {
+ writeStatus.markFailure(record, e, record.getMetadata());
+ LOG.error("Error writing record " + indexedRecord.get(), e);
}
} else {
recordsToDelete.add(DeleteRecord.create(record.getKey(), orderingVal));
@@ -542,17 +561,17 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
private static HoodieLogBlock getBlock(HoodieWriteConfig writeConfig,
HoodieLogBlock.HoodieLogBlockType logDataBlockFormat,
- List<IndexedRecord> recordList,
+ List<HoodieRecord> records,
Map<HeaderMetadataType, String> header,
String keyField) {
switch (logDataBlockFormat) {
case AVRO_DATA_BLOCK:
- return new HoodieAvroDataBlock(recordList, header, keyField);
+ return new HoodieAvroDataBlock(records, header, keyField);
case HFILE_DATA_BLOCK:
return new HoodieHFileDataBlock(
- recordList, header, writeConfig.getHFileCompressionAlgorithm(), new Path(writeConfig.getBasePath()));
+ records, header, writeConfig.getHFileCompressionAlgorithm(), new Path(writeConfig.getBasePath()));
case PARQUET_DATA_BLOCK:
- return new HoodieParquetDataBlock(recordList, header, keyField, writeConfig.getParquetCompressionCodec());
+ return new HoodieParquetDataBlock(records, header, keyField, writeConfig.getParquetCompressionCodec());
default:
throw new HoodieException("Data block format " + logDataBlockFormat + " not implemented");
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java
index 8e7f66467a..f110bf585d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java
@@ -21,7 +21,6 @@ package org.apache.hudi.io;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -33,7 +32,7 @@ import org.apache.hudi.table.HoodieTable;
* writing more than 1 skeleton file for the same bootstrap file.
* @param <T> HoodieRecordPayload
*/
-public class HoodieBootstrapHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieCreateHandle<T, I, K, O> {
+public class HoodieBootstrapHandle<T, I, K, O> extends HoodieCreateHandle<T, I, K, O> {
public HoodieBootstrapHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T, I, K, O> hoodieTable,
String partitionPath, String fileId, TaskContextSupplier taskContextSupplier) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieConcatHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieConcatHandle.java
index ca245e0c39..c428ab0622 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieConcatHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieConcatHandle.java
@@ -18,20 +18,18 @@
package org.apache.hudi.io;
+import org.apache.avro.Schema;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.keygen.BaseKeyGenerator;
-import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
-import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -69,7 +67,7 @@ import java.util.Map;
* happen and every batch should have new records to be inserted. Above example is for illustration purposes only.
*/
@NotThreadSafe
-public class HoodieConcatHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
+public class HoodieConcatHandle<T, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieConcatHandle.class);
// a representation of incoming records that tolerates duplicate keys
@@ -94,11 +92,12 @@ public class HoodieConcatHandle<T extends HoodieRecordPayload, I, K, O> extends
* Write old record as is w/o merging with incoming record.
*/
@Override
- public void write(GenericRecord oldRecord) {
- String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
+ public void write(HoodieRecord oldRecord) {
+ String key = oldRecord.getRecordKey(keyGeneratorOpt);
+ Schema schema = useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema;
try {
// NOTE: We're enforcing preservation of the record metadata to keep existing semantic
- writeToFile(new HoodieKey(key, partitionPath), oldRecord, true);
+ writeToFile(new HoodieKey(key, partitionPath), oldRecord, schema, config.getProps(), true);
} catch (IOException | RuntimeException e) {
String errMsg = String.format("Failed to write old record into new file for key %s from old file %s to new file %s with writerSchema %s",
key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
index 738e2d6b48..7e55b84135 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
@@ -19,17 +19,15 @@
package org.apache.hudi.io;
import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
import org.apache.hudi.common.model.IOType;
@@ -51,11 +49,11 @@ import java.util.List;
import java.util.Map;
@NotThreadSafe
-public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
+public class HoodieCreateHandle<T, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieCreateHandle.class);
- protected final HoodieFileWriter<IndexedRecord> fileWriter;
+ protected final HoodieFileWriter fileWriter;
protected final Path path;
protected long recordsWritten = 0;
protected long insertRecordsWritten = 0;
@@ -130,22 +128,20 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
* Perform the actual writing of the given record into the backing file.
*/
@Override
- public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
- Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
- if (HoodieOperation.isDelete(record.getOperation())) {
- avroRecord = Option.empty();
- }
+ protected void doWrite(HoodieRecord record, Schema schema, TypedProperties props) {
+ Option<Map<String, String>> recordMetadata = record.getMetadata();
try {
- if (avroRecord.isPresent()) {
- if (avroRecord.get().equals(IGNORE_RECORD)) {
+ if (!HoodieOperation.isDelete(record.getOperation()) && record.isPresent(schema, config.getProps())) {
+ if (record.shouldIgnore(schema, config.getProps())) {
return;
}
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
if (preserveMetadata) {
- fileWriter.writeAvro(record.getRecordKey(),
- rewriteRecordWithMetadata((GenericRecord) avroRecord.get(), path.getName()));
+ fileWriter.write(record.getRecordKey(), record.rewriteRecordWithMetadata(
+ schema, config.getProps(), schemaOnReadEnabled, writeSchemaWithMetaFields, path.getName()), writeSchemaWithMetaFields);
} else {
- fileWriter.writeAvroWithMetadata(record.getKey(), rewriteRecord((GenericRecord) avroRecord.get()));
+ fileWriter.writeWithMetadata(record.getKey(), record.rewriteRecordWithMetadata(
+ schema, config.getProps(), schemaOnReadEnabled, writeSchemaWithMetaFields, path.getName()), writeSchemaWithMetaFields);
}
// update the new location of record, so we know where to find it next
record.unseal();
@@ -180,18 +176,14 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
} else {
keyIterator = recordMap.keySet().stream().iterator();
}
- try {
- while (keyIterator.hasNext()) {
- final String key = keyIterator.next();
- HoodieRecord<T> record = recordMap.get(key);
- if (useWriterSchema) {
- write(record, record.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
- } else {
- write(record, record.getData().getInsertValue(tableSchema, config.getProps()));
- }
+ while (keyIterator.hasNext()) {
+ final String key = keyIterator.next();
+ HoodieRecord<T> record = recordMap.get(key);
+ if (useWriterSchema) {
+ write(record, tableSchemaWithMetaFields, config.getProps());
+ } else {
+ write(record, useWriterSchema ? tableSchemaWithMetaFields : tableSchema, config.getProps());
}
- } catch (IOException io) {
- throw new HoodieInsertException("Failed to insert records for path " + path, io);
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
index 1ad28d14b3..a8b7965d80 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
@@ -18,7 +18,6 @@
package org.apache.hudi.io;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -26,7 +25,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.fs.FileSystem;
-public abstract class HoodieIOHandle<T extends HoodieRecordPayload, I, K, O> {
+public abstract class HoodieIOHandle<T, I, K, O> {
protected final String instantTime;
protected final HoodieWriteConfig config;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
index ab8b83c14a..f062530368 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
@@ -21,7 +21,6 @@ package org.apache.hudi.io;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.BaseFileUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
@@ -40,7 +39,7 @@ import java.util.stream.Stream;
*
* @param <T>
*/
-public class HoodieKeyLocationFetchHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieReadHandle<T, I, K, O> {
+public class HoodieKeyLocationFetchHandle<T, I, K, O> extends HoodieReadHandle<T, I, K, O> {
private final Pair<String, HoodieBaseFile> partitionPathBaseFilePair;
private final Option<BaseKeyGenerator> keyGeneratorOpt;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
index a38ae7f1f1..bc1da8c217 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
@@ -18,18 +18,16 @@
package org.apache.hudi.io;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.index.HoodieIndexUtils;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.table.HoodieTable;
-
-import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -42,7 +40,7 @@ import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
/**
* Takes a bunch of keys and returns ones that are present in the file group.
*/
-public class HoodieKeyLookupHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieReadHandle<T, I, K, O> {
+public class HoodieKeyLookupHandle<T, I, K, O> extends HoodieReadHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieKeyLookupHandle.class);
@@ -68,7 +66,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload, I, K, O> exten
bloomFilter = hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight())
.orElseThrow(() -> new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight()));
} else {
- try (HoodieFileReader reader = createNewFileReader()) {
+ try (HoodieAvroFileReader reader = createNewFileReader()) {
bloomFilter = reader.readBloomFilter();
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
index 5515c2552e..543c51a805 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -19,6 +19,7 @@
package org.apache.hudi.io;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
@@ -27,7 +28,6 @@ import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
import org.apache.hudi.common.model.IOType;
@@ -40,16 +40,13 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCorruptedDataException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieUpsertException;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.io.storage.HoodieFileWriter;
import org.apache.hudi.keygen.BaseKeyGenerator;
-import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -63,6 +60,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.Properties;
import java.util.Set;
@SuppressWarnings("Duplicates")
@@ -95,13 +93,13 @@ import java.util.Set;
* </p>
*/
@NotThreadSafe
-public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
+public class HoodieMergeHandle<T, I, K, O> extends HoodieWriteHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieMergeHandle.class);
protected Map<String, HoodieRecord<T>> keyToNewRecords;
protected Set<String> writtenRecordKeys;
- protected HoodieFileWriter<IndexedRecord> fileWriter;
+ protected HoodieFileWriter fileWriter;
private boolean preserveMetadata = false;
protected Path newFilePath;
@@ -268,12 +266,12 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
+ ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
}
- protected boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, GenericRecord oldRecord, Option<IndexedRecord> indexedRecord) {
+ protected boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, HoodieRecord<T> oldRecord, Option<HoodieRecord> combineRecordOp) throws IOException {
boolean isDelete = false;
- if (indexedRecord.isPresent()) {
+ Schema schema = useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema;
+ if (combineRecordOp.isPresent()) {
updatedRecordsWritten++;
- GenericRecord record = (GenericRecord) indexedRecord.get();
- if (oldRecord != record) {
+ if (oldRecord.getData() != combineRecordOp.get().getData()) {
// the incoming record is chosen
isDelete = HoodieOperation.isDelete(hoodieRecord.getOperation());
} else {
@@ -281,31 +279,30 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
return false;
}
}
- return writeRecord(hoodieRecord, indexedRecord, isDelete);
+ return writeRecord(hoodieRecord, combineRecordOp, schema, config.getProps(), isDelete);
}
protected void writeInsertRecord(HoodieRecord<T> hoodieRecord) throws IOException {
Schema schema = useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema;
- Option<IndexedRecord> insertRecord = hoodieRecord.getData().getInsertValue(schema, config.getProps());
// just skip the ignored record
- if (insertRecord.isPresent() && insertRecord.get().equals(IGNORE_RECORD)) {
+ if (hoodieRecord.shouldIgnore(schema, config.getProps())) {
return;
}
- writeInsertRecord(hoodieRecord, insertRecord);
+ writeInsertRecord(hoodieRecord, Option.of(hoodieRecord), schema, config.getProps());
}
- protected void writeInsertRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> insertRecord) {
- if (writeRecord(hoodieRecord, insertRecord, HoodieOperation.isDelete(hoodieRecord.getOperation()))) {
+ protected void writeInsertRecord(HoodieRecord<T> hoodieRecord, Option<HoodieRecord> insertRecord, Schema schema, Properties prop) {
+ if (writeRecord(hoodieRecord, insertRecord, schema, prop, HoodieOperation.isDelete(hoodieRecord.getOperation()))) {
insertRecordsWritten++;
}
}
- protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord) {
- return writeRecord(hoodieRecord, indexedRecord, false);
+ protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<HoodieRecord> combineRecord, Schema schema, Properties prop) throws IOException {
+ return writeRecord(hoodieRecord, combineRecord, schema, prop, false);
}
- protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<IndexedRecord> indexedRecord, boolean isDelete) {
- Option recordMetadata = hoodieRecord.getData().getMetadata();
+ protected boolean writeRecord(HoodieRecord<T> hoodieRecord, Option<HoodieRecord> combineRecord, Schema schema, Properties prop, boolean isDelete) throws IOException {
+ Option recordMetadata = hoodieRecord.getMetadata();
if (!partitionPath.equals(hoodieRecord.getPartitionPath())) {
HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: "
+ hoodieRecord.getPartitionPath() + " but trying to insert into partition: " + partitionPath);
@@ -313,8 +310,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
return false;
}
try {
- if (indexedRecord.isPresent() && !isDelete) {
- writeToFile(hoodieRecord.getKey(), (GenericRecord) indexedRecord.get(), preserveMetadata && useWriterSchemaForCompaction);
+ if (combineRecord.isPresent() && combineRecord.get().isPresent(schema, config.getProps()) && !isDelete) {
+ writeToFile(hoodieRecord.getKey(), combineRecord.get(), schema, prop, preserveMetadata && useWriterSchemaForCompaction);
recordsWritten++;
} else {
recordsDeleted++;
@@ -335,23 +332,25 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
/**
* Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
*/
- public void write(GenericRecord oldRecord) {
- String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
+ public void write(HoodieRecord<T> oldRecord) {
+ String key = oldRecord.getRecordKey(keyGeneratorOpt);
boolean copyOldRecord = true;
+ Schema schema = useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema;
+ TypedProperties props = config.getPayloadConfig().getProps();
if (keyToNewRecords.containsKey(key)) {
// If we have duplicate records that we are updating, then the hoodie record will be deflated after
// writing the first record. So make a copy of the record to be merged
HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key).newInstance();
try {
- Option<IndexedRecord> combinedAvroRecord =
- hoodieRecord.getData().combineAndGetUpdateValue(oldRecord,
- useWriterSchemaForCompaction ? tableSchemaWithMetaFields : tableSchema,
- config.getPayloadConfig().getProps());
+ Option<HoodieRecord> combinedRecord =
+ hoodieRecord.combineAndGetUpdateValue(oldRecord,
+ schema,
+ props);
- if (combinedAvroRecord.isPresent() && combinedAvroRecord.get().equals(IGNORE_RECORD)) {
+ if (combinedRecord.isPresent() && combinedRecord.get().shouldIgnore(schema, props)) {
// If it is an IGNORE_RECORD, just copy the old record, and do not update the new record.
copyOldRecord = true;
- } else if (writeUpdateRecord(hoodieRecord, oldRecord, combinedAvroRecord)) {
+ } else if (writeUpdateRecord(hoodieRecord, oldRecord, combinedRecord)) {
/*
* ONLY WHEN 1) we have an update for this key AND 2) We are able to successfully
* write the combined new value
@@ -370,7 +369,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
if (copyOldRecord) {
try {
// NOTE: We're enforcing preservation of the record metadata to keep existing semantic
- writeToFile(new HoodieKey(key, partitionPath), oldRecord, true);
+ writeToFile(new HoodieKey(key, partitionPath), oldRecord, schema, props, true);
} catch (IOException | RuntimeException e) {
String errMsg = String.format("Failed to merge old record into new file for key %s from old file %s to new file %s with writerSchema %s",
key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true));
@@ -381,13 +380,13 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
}
}
- protected void writeToFile(HoodieKey key, GenericRecord avroRecord, boolean shouldPreserveRecordMetadata) throws IOException {
+ protected void writeToFile(HoodieKey key, HoodieRecord<T> record, Schema schema, Properties prop, boolean shouldPreserveRecordMetadata) throws IOException {
if (shouldPreserveRecordMetadata) {
// NOTE: `FILENAME_METADATA_FIELD` has to be rewritten to correctly point to the
// file holding this record even in cases when overall metadata is preserved
- fileWriter.writeAvro(key.getRecordKey(), rewriteRecordWithMetadata(avroRecord, newFilePath.getName()));
+ fileWriter.write(key.getRecordKey(), record.rewriteRecordWithMetadata(schema, prop, schemaOnReadEnabled, writeSchemaWithMetaFields, newFilePath.getName()), writeSchemaWithMetaFields);
} else {
- fileWriter.writeAvroWithMetadata(key, rewriteRecord(avroRecord));
+ fileWriter.writeWithMetadata(key, record.rewriteRecord(schema, prop, schemaOnReadEnabled, writeSchemaWithMetaFields), writeSchemaWithMetaFields);
}
}
@@ -452,7 +451,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
long oldNumWrites = 0;
try {
- HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), oldFilePath);
+ HoodieAvroFileReader reader = HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), oldFilePath);
oldNumWrites = reader.getTotalRecords();
} catch (IOException e) {
throw new HoodieUpsertException("Failed to check for merge data validation", e);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
index abe4a9befe..753d1db301 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
@@ -18,10 +18,9 @@
package org.apache.hudi.io;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.table.HoodieTable;
import java.io.IOException;
@@ -29,7 +28,7 @@ import java.io.IOException;
/**
* Extract range information for a given file slice.
*/
-public class HoodieRangeInfoHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieReadHandle<T, I, K, O> {
+public class HoodieRangeInfoHandle<T, I, K, O> extends HoodieReadHandle<T, I, K, O> {
public HoodieRangeInfoHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
Pair<String, String> partitionPathFilePair) {
@@ -37,7 +36,7 @@ public class HoodieRangeInfoHandle<T extends HoodieRecordPayload, I, K, O> exten
}
public String[] getMinMaxKeys() throws IOException {
- try (HoodieFileReader reader = createNewFileReader()) {
+ try (HoodieAvroFileReader reader = createNewFileReader()) {
return reader.readMinMaxRecordKeys();
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
index fee75b22de..b699e9700c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
@@ -21,11 +21,10 @@ package org.apache.hudi.io;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.table.HoodieTable;
@@ -34,7 +33,7 @@ import java.io.IOException;
/**
* Base class for read operations done logically on the file group.
*/
-public abstract class HoodieReadHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieIOHandle<T, I, K, O> {
+public abstract class HoodieReadHandle<T, I, K, O> extends HoodieIOHandle<T, I, K, O> {
protected final Pair<String, String> partitionPathFileIDPair;
@@ -62,7 +61,7 @@ public abstract class HoodieReadHandle<T extends HoodieRecordPayload, I, K, O> e
.getLatestBaseFile(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight()).get();
}
- protected HoodieFileReader createNewFileReader() throws IOException {
+ protected HoodieAvroFileReader createNewFileReader() throws IOException {
return HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(),
new Path(getLatestDataFile().getPath()));
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
index 7dce31a4c3..60c6a2da7f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
@@ -22,16 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.keygen.BaseKeyGenerator;
-import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
-import org.apache.avro.generic.GenericRecord;
-
import javax.annotation.concurrent.NotThreadSafe;
import java.io.IOException;
@@ -48,7 +44,7 @@ import java.util.Queue;
* keys in newRecordKeys (sorted in-memory).
*/
@NotThreadSafe
-public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
+public class HoodieSortedMergeHandle<T, I, K, O> extends HoodieMergeHandle<T, I, K, O> {
private final Queue<String> newRecordKeysSorted = new PriorityQueue<>();
@@ -75,8 +71,8 @@ public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> ext
* Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
*/
@Override
- public void write(GenericRecord oldRecord) {
- String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
+ public void write(HoodieRecord oldRecord) {
+ String key = oldRecord.getRecordKey(keyGeneratorOpt);
// To maintain overall sorted order across updates and inserts, write any new inserts whose keys are less than
// the oldRecord's key.
@@ -94,9 +90,9 @@ public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> ext
}
try {
if (useWriterSchemaForCompaction) {
- writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
+ writeRecord(hoodieRecord, Option.of(hoodieRecord), tableSchemaWithMetaFields, config.getProps());
} else {
- writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchema, config.getProps()));
+ writeRecord(hoodieRecord, Option.of(hoodieRecord), tableSchema, config.getProps());
}
insertRecordsWritten++;
writtenRecordKeys.add(keyToPreWrite);
@@ -117,9 +113,9 @@ public class HoodieSortedMergeHandle<T extends HoodieRecordPayload, I, K, O> ext
HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
if (!writtenRecordKeys.contains(hoodieRecord.getRecordKey())) {
if (useWriterSchemaForCompaction) {
- writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
+ writeRecord(hoodieRecord, Option.of(hoodieRecord), tableSchemaWithMetaFields, config.getProps());
} else {
- writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchema, config.getProps()));
+ writeRecord(hoodieRecord, Option.of(hoodieRecord), tableSchema, config.getProps());
}
insertRecordsWritten++;
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieUnboundedCreateHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieUnboundedCreateHandle.java
index ebbc7a5c28..71a1981687 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieUnboundedCreateHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieUnboundedCreateHandle.java
@@ -20,7 +20,6 @@ package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -36,7 +35,7 @@ import javax.annotation.concurrent.NotThreadSafe;
* Please use this with caution. This can end up creating very large files if not used correctly.
*/
@NotThreadSafe
-public class HoodieUnboundedCreateHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieCreateHandle<T, I, K, O> {
+public class HoodieUnboundedCreateHandle<T, I, K, O> extends HoodieCreateHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieUnboundedCreateHandle.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
index abf5c0face..782ce0b7f2 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
@@ -18,14 +18,17 @@
package org.apache.hudi.io;
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.util.HoodieTimer;
@@ -38,48 +41,22 @@ import org.apache.hudi.io.storage.HoodieFileWriter;
import org.apache.hudi.io.storage.HoodieFileWriterFactory;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.WriteMarkersFactory;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
-import java.util.HashMap;
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
/**
* Base class for all write operations logically performed at the file group level.
*/
-public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieIOHandle<T, I, K, O> {
+public abstract class HoodieWriteHandle<T, I, K, O> extends HoodieIOHandle<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(HoodieWriteHandle.class);
- /**
- * A special record returned by {@link HoodieRecordPayload}, which means
- * {@link HoodieWriteHandle} should just skip this record.
- * This record is only used for {@link HoodieRecordPayload} currently, so it should not
- * shuffle though network, we can compare the record locally by the equal method.
- * The HoodieRecordPayload#combineAndGetUpdateValue and HoodieRecordPayload#getInsertValue
- * have 3 kind of return:
- * 1、Option.empty
- * This means we should delete this record.
- * 2、IGNORE_RECORD
- * This means we should not process this record,just skip.
- * 3、Other non-empty record
- * This means we should process this record.
- *
- * We can see the usage of IGNORE_RECORD in
- * org.apache.spark.sql.hudi.command.payload.ExpressionPayload
- */
- public static IgnoreRecord IGNORE_RECORD = new IgnoreRecord();
-
/**
* The specified schema of the table. ("specified" denotes that this is configured by the client,
* as opposed to being implicitly fetched out of the commit metadata)
@@ -212,35 +189,15 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
/**
* Perform the actual writing of the given record into the backing file.
*/
- public void write(HoodieRecord record, Option<IndexedRecord> insertValue) {
+ protected void doWrite(HoodieRecord record, Schema schema, TypedProperties props) {
// NO_OP
}
/**
* Perform the actual writing of the given record into the backing file.
*/
- public void write(HoodieRecord record, Option<IndexedRecord> avroRecord, Option<Exception> exception) {
- Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
- if (exception.isPresent() && exception.get() instanceof Throwable) {
- // Not throwing exception from here, since we don't want to fail the entire job for a single record
- writeStatus.markFailure(record, exception.get(), recordMetadata);
- LOG.error("Error writing record " + record, exception.get());
- } else {
- write(record, avroRecord);
- }
- }
-
- /**
- * Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields.
- */
- protected GenericRecord rewriteRecord(GenericRecord record) {
- return schemaOnReadEnabled ? HoodieAvroUtils.rewriteRecordWithNewSchema(record, writeSchemaWithMetaFields, new HashMap<>())
- : HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields);
- }
-
- protected GenericRecord rewriteRecordWithMetadata(GenericRecord record, String fileName) {
- return schemaOnReadEnabled ? HoodieAvroUtils.rewriteEvolutionRecordWithMetadata(record, writeSchemaWithMetaFields, fileName)
- : HoodieAvroUtils.rewriteRecordWithMetadata(record, writeSchemaWithMetaFields, fileName);
+ public void write(HoodieRecord record, Schema schema, TypedProperties props) {
+ doWrite(record, schema, props);
}
public abstract List<WriteStatus> close();
@@ -273,7 +230,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
}
protected HoodieFileWriter createNewFileWriter(String instantTime, Path path, HoodieTable<T, I, K, O> hoodieTable,
- HoodieWriteConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException {
+ HoodieWriteConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException {
return HoodieFileWriterFactory.getFileWriter(instantTime, path, hoodieTable, config, schema, taskContextSupplier);
}
@@ -319,32 +276,4 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
+ "file suffix: " + fileSuffix + " error");
}
}
-
- private static class IgnoreRecord implements GenericRecord {
-
- @Override
- public void put(int i, Object v) {
-
- }
-
- @Override
- public Object get(int i) {
- return null;
- }
-
- @Override
- public Schema getSchema() {
- return null;
- }
-
- @Override
- public void put(String key, Object v) {
-
- }
-
- @Override
- public Object get(String key) {
- return null;
- }
- }
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/SingleFileHandleCreateFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/SingleFileHandleCreateFactory.java
index a3f7c04ef2..fa5ce2c68b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/SingleFileHandleCreateFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/SingleFileHandleCreateFactory.java
@@ -19,7 +19,6 @@
package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.table.HoodieTable;
@@ -32,7 +31,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
* <p>
* Please use this with caution. This can end up creating very large files if not used correctly.
*/
-public class SingleFileHandleCreateFactory<T extends HoodieRecordPayload, I, K, O> extends CreateHandleFactory<T, I, K, O> implements Serializable {
+public class SingleFileHandleCreateFactory<T, I, K, O> extends CreateHandleFactory<T, I, K, O> implements Serializable {
private final AtomicBoolean isHandleCreated = new AtomicBoolean(false);
private final String fileId;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/WriteHandleFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/WriteHandleFactory.java
index c267b5969d..46a0b1c614 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/WriteHandleFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/WriteHandleFactory.java
@@ -20,13 +20,12 @@ package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import java.io.Serializable;
-public abstract class WriteHandleFactory<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class WriteHandleFactory<T, I, K, O> implements Serializable {
private int numFilesWritten = 0;
public abstract HoodieWriteHandle<T, I, K, O> create(HoodieWriteConfig config, String commitTime, HoodieTable<T, I, K, O> hoodieTable,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java
similarity index 87%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java
index f065608b29..c143f782d4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java
@@ -36,7 +36,6 @@ import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
@@ -53,8 +52,8 @@ import java.util.concurrent.atomic.AtomicLong;
* 1. Records should be added in order of keys
* 2. There are no column stats
*/
-public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
- implements HoodieFileWriter<R> {
+public class HoodieAvroHFileWriter
+ implements HoodieAvroFileWriter {
private static AtomicLong recordIndex = new AtomicLong(1);
private final Path file;
@@ -73,8 +72,8 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
// This is private in CacheConfig so have been copied here.
private static String DROP_BEHIND_CACHE_COMPACTION_KEY = "hbase.hfile.drop.behind.compaction";
- public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileConfig, Schema schema,
- TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
+ public HoodieAvroHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileConfig, Schema schema,
+ TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
Configuration conf = FSUtils.registerFileSystem(file, hfileConfig.getHadoopConf());
this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf);
@@ -106,11 +105,11 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
.withFileContext(context)
.create();
- writer.appendFileInfo(HoodieHFileReader.SCHEMA_KEY.getBytes(), schema.toString().getBytes());
+ writer.appendFileInfo(HoodieAvroHFileReader.SCHEMA_KEY.getBytes(), schema.toString().getBytes());
}
@Override
- public void writeAvroWithMetadata(HoodieKey key, R avroRecord) throws IOException {
+ public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord) throws IOException {
if (populateMetaFields) {
prepRecordWithMetadata(key, avroRecord, instantTime,
taskContextSupplier.getPartitionIdSupplier().get(), recordIndex.getAndIncrement(), file.getName());
@@ -167,11 +166,11 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
if (maxRecordKey == null) {
maxRecordKey = "";
}
- writer.appendFileInfo(HoodieHFileReader.KEY_MIN_RECORD.getBytes(), minRecordKey.getBytes());
- writer.appendFileInfo(HoodieHFileReader.KEY_MAX_RECORD.getBytes(), maxRecordKey.getBytes());
- writer.appendFileInfo(HoodieHFileReader.KEY_BLOOM_FILTER_TYPE_CODE.getBytes(),
+ writer.appendFileInfo(HoodieAvroHFileReader.KEY_MIN_RECORD.getBytes(), minRecordKey.getBytes());
+ writer.appendFileInfo(HoodieAvroHFileReader.KEY_MAX_RECORD.getBytes(), maxRecordKey.getBytes());
+ writer.appendFileInfo(HoodieAvroHFileReader.KEY_BLOOM_FILTER_TYPE_CODE.getBytes(),
bloomFilter.getBloomFilterTypeCode().toString().getBytes());
- writer.appendMetaBlock(HoodieHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() {
+ writer.appendMetaBlock(HoodieAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() {
@Override
public void write(DataOutput out) throws IOException {
out.write(bloomFilter.serializeToString().getBytes());
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java
similarity index 93%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java
index a532ac66c9..acce47f7d5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java
@@ -29,7 +29,6 @@ import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
@@ -48,8 +47,7 @@ import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TY
import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER;
import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER;
-public class HoodieOrcWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
- implements HoodieFileWriter<R>, Closeable {
+public class HoodieAvroOrcWriter implements HoodieAvroFileWriter, Closeable {
private static final AtomicLong RECORD_INDEX = new AtomicLong(1);
private final long maxFileSize;
@@ -68,8 +66,8 @@ public class HoodieOrcWriter<T extends HoodieRecordPayload, R extends IndexedRec
private String minRecordKey;
private String maxRecordKey;
- public HoodieOrcWriter(String instantTime, Path file, HoodieOrcConfig config, Schema schema,
- TaskContextSupplier taskContextSupplier) throws IOException {
+ public HoodieAvroOrcWriter(String instantTime, Path file, HoodieOrcConfig config, Schema schema,
+ TaskContextSupplier taskContextSupplier) throws IOException {
Configuration conf = FSUtils.registerFileSystem(file, config.getHadoopConf());
this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf);
@@ -95,7 +93,7 @@ public class HoodieOrcWriter<T extends HoodieRecordPayload, R extends IndexedRec
}
@Override
- public void writeAvroWithMetadata(HoodieKey key, R avroRecord) throws IOException {
+ public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord) throws IOException {
prepRecordWithMetadata(key, avroRecord, instantTime,
taskContextSupplier.getPartitionIdSupplier().get(), RECORD_INDEX.getAndIncrement(), file.getName());
writeAvro(key.getRecordKey(), avroRecord);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
index 06631dc53f..36033d26b0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
@@ -35,9 +35,9 @@ import java.io.IOException;
* ATTENTION: HoodieParquetWriter is not thread safe and developer should take care of the order of write and close
*/
@NotThreadSafe
-public class HoodieAvroParquetWriter<R extends IndexedRecord>
+public class HoodieAvroParquetWriter
extends HoodieBaseParquetWriter<IndexedRecord>
- implements HoodieFileWriter<R> {
+ implements HoodieAvroFileWriter {
private final String fileName;
private final String instantTime;
@@ -60,7 +60,7 @@ public class HoodieAvroParquetWriter<R extends IndexedRecord>
}
@Override
- public void writeAvroWithMetadata(HoodieKey key, R avroRecord) throws IOException {
+ public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord) throws IOException {
if (populateMetaFields) {
prepRecordWithMetadata(key, avroRecord, instantTime,
taskContextSupplier.getPartitionIdSupplier().get(), getWrittenRecordCount(), fileName);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
index 9ee8571ebd..5083ccf6c0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -46,7 +45,7 @@ import static org.apache.hudi.io.storage.HoodieHFileConfig.PREFETCH_ON_OPEN;
public class HoodieFileWriterFactory {
- public static <T extends HoodieRecordPayload, R extends IndexedRecord, I, K, O> HoodieFileWriter<R> getFileWriter(
+ public static <T, I, K, O> HoodieFileWriter getFileWriter(
String instantTime, Path path, HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig config, Schema schema,
TaskContextSupplier taskContextSupplier) throws IOException {
final String extension = FSUtils.getFileExtension(path.getName());
@@ -64,14 +63,14 @@ public class HoodieFileWriterFactory {
throw new UnsupportedOperationException(extension + " format not supported yet.");
}
- private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newParquetFileWriter(
+ private static <T> HoodieAvroFileWriter newParquetFileWriter(
String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable,
TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
return newParquetFileWriter(instantTime, path, config, schema, hoodieTable.getHadoopConf(),
taskContextSupplier, populateMetaFields, populateMetaFields);
}
- private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newParquetFileWriter(
+ private static <T, R extends IndexedRecord> HoodieAvroFileWriter newParquetFileWriter(
String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException {
Option<BloomFilter> filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty();
@@ -81,34 +80,34 @@ public class HoodieFileWriterFactory {
config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
conf, config.getParquetCompressionRatio(), config.parquetDictionaryEnabled());
- return new HoodieAvroParquetWriter<>(path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields);
+ return new HoodieAvroParquetWriter(path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields);
}
- static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newHFileFileWriter(
+ static <T, R extends IndexedRecord> HoodieAvroFileWriter newHFileFileWriter(
String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
TaskContextSupplier taskContextSupplier) throws IOException {
BloomFilter filter = createBloomFilter(config);
HoodieHFileConfig hfileConfig = new HoodieHFileConfig(conf,
config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(),
- HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION,
+ HoodieAvroHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION,
filter, HFILE_COMPARATOR);
- return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields());
+ return new HoodieAvroHFileWriter(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields());
}
- private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFileWriter<R> newOrcFileWriter(
+ private static <T, R extends IndexedRecord> HoodieAvroFileWriter newOrcFileWriter(
String instantTime, Path path, HoodieWriteConfig config, Schema schema, Configuration conf,
TaskContextSupplier taskContextSupplier) throws IOException {
BloomFilter filter = createBloomFilter(config);
HoodieOrcConfig orcConfig = new HoodieOrcConfig(conf, config.getOrcCompressionCodec(),
config.getOrcStripeSize(), config.getOrcBlockSize(), config.getOrcMaxFileSize(), filter);
- return new HoodieOrcWriter<>(instantTime, path, orcConfig, schema, taskContextSupplier);
+ return new HoodieAvroOrcWriter(instantTime, path, orcConfig, schema, taskContextSupplier);
}
private static BloomFilter createBloomFilter(HoodieWriteConfig config) {
return BloomFilterFactory.createBloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP(),
- config.getDynamicBloomFilterMaxNumEntries(),
- config.getBloomFilterType());
+ config.getDynamicBloomFilterMaxNumEntries(),
+ config.getBloomFilterType());
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieCompactionHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieCompactionHandler.java
index eeb287abd5..8d44c603c6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieCompactionHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieCompactionHandler.java
@@ -22,7 +22,6 @@ package org.apache.hudi.table;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import java.io.IOException;
import java.util.Iterator;
@@ -34,10 +33,10 @@ import java.util.Map;
*
* @param <T> HoodieRecordPayload type.
*/
-public interface HoodieCompactionHandler<T extends HoodieRecordPayload> {
+public interface HoodieCompactionHandler<T> {
Iterator<List<WriteStatus>> handleUpdate(String instantTime, String partitionPath, String fileId,
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile oldDataFile) throws IOException;
Iterator<List<WriteStatus>> handleInsert(String instantTime, String partitionPath, String fileId,
- Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap);
+ Map<String, HoodieRecord> recordMap);
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
index 5ca3aee764..badd68b9c3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -43,7 +43,6 @@ import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hudi.common.fs.OptimisticConsistencyGuard;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -112,7 +111,7 @@ import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartition
* @param <K> Type of keys
* @param <O> Type of outputs
*/
-public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class HoodieTable<T, I, K, O> implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieTable.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
index f893b4ccd5..18e5e17a6b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
@@ -26,12 +26,11 @@ import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
-public abstract class BaseActionExecutor<T extends HoodieRecordPayload, I, K, O, R> implements Serializable {
+public abstract class BaseActionExecutor<T, I, K, O, R> implements Serializable {
protected final transient HoodieEngineContext context;
protected final transient Configuration hadoopConf;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
index 8966a5d51c..73ad53e4a6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
@@ -18,14 +18,11 @@
package org.apache.hudi.table.action.bootstrap;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
-import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.HoodieBootstrapHandle;
-import java.io.IOException;
-
/**
* Consumer that dequeues records from queue and sends to Merge Handle for writing.
*/
@@ -39,12 +36,7 @@ public class BootstrapRecordConsumer extends BoundedInMemoryQueueConsumer<Hoodie
@Override
protected void consumeOneRecord(HoodieRecord record) {
- try {
- bootstrapHandle.write(record, ((HoodieRecordPayload) record.getData())
- .getInsertValue(bootstrapHandle.getWriterSchemaWithMetaFields()));
- } catch (IOException e) {
- throw new HoodieIOException(e.getMessage(), e);
- }
+ bootstrapHandle.write(record, bootstrapHandle.getWriterSchemaWithMetaFields(), new TypedProperties());
}
@Override
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index 56b01ec77b..8859e0784e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -28,7 +28,6 @@ import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.HoodieCleanStat;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.CleanFileInfo;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.CleanerUtils;
@@ -56,7 +55,7 @@ import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-public class CleanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieCleanMetadata> {
+public class CleanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieCleanMetadata> {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LogManager.getLogger(CleanActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
index 7f3b437178..1f6a5a1d79 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java
@@ -25,7 +25,6 @@ import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.CleanFileInfo;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -46,7 +45,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCleanerPlan>> {
+public class CleanPlanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCleanerPlan>> {
private static final Logger LOG = LogManager.getLogger(CleanPlanner.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 9027ab045a..6e1ecfe93d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -30,7 +30,6 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
@@ -74,7 +73,7 @@ import java.util.stream.Stream;
* <p>
* 2) It bounds the growth of the files in the file system
*/
-public class CleanPlanner<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public class CleanPlanner<T, I, K, O> implements Serializable {
private static final Logger LOG = LogManager.getLogger(CleanPlanner.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
index e0e02bae8e..683be09efe 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.cluster;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -41,7 +40,7 @@ import java.io.IOException;
import java.util.Collections;
import java.util.Map;
-public class ClusteringPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieClusteringPlan>> {
+public class ClusteringPlanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieClusteringPlan>> {
private static final Logger LOG = LogManager.getLogger(ClusteringPlanActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringExecutionStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringExecutionStrategy.java
index 163947fa34..a4b09d006e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringExecutionStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringExecutionStrategy.java
@@ -20,7 +20,6 @@ package org.apache.hudi.table.action.cluster.strategy;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -34,7 +33,7 @@ import java.io.Serializable;
/**
* Pluggable implementation for writing data into new file groups based on ClusteringPlan.
*/
-public abstract class ClusteringExecutionStrategy<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class ClusteringExecutionStrategy<T, I, K, O> implements Serializable {
private static final Logger LOG = LogManager.getLogger(ClusteringExecutionStrategy.class);
private final HoodieTable<T, I, K, O> hoodieTable;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
index dd827ff5a9..7bc504351a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/ClusteringPlanStrategy.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.BaseFile;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.view.SyncableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
@@ -50,7 +49,7 @@ import java.util.stream.Stream;
/**
* Pluggable implementation for scheduling clustering and creating ClusteringPlan.
*/
-public abstract class ClusteringPlanStrategy<T extends HoodieRecordPayload,I,K,O> implements Serializable {
+public abstract class ClusteringPlanStrategy<T,I,K,O> implements Serializable {
private static final Logger LOG = LogManager.getLogger(ClusteringPlanStrategy.class);
public static final int CLUSTERING_PLAN_VERSION_1 = 1;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
index 7042585f59..334d1f1c0f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
@@ -24,7 +24,6 @@ import org.apache.hudi.avro.model.HoodieClusteringStrategy;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
@@ -44,7 +43,7 @@ import java.util.stream.Stream;
/**
* Scheduling strategy with restriction that clustering groups can only contain files from same partition.
*/
-public abstract class PartitionAwareClusteringPlanStrategy<T extends HoodieRecordPayload,I,K,O> extends ClusteringPlanStrategy<T,I,K,O> {
+public abstract class PartitionAwareClusteringPlanStrategy<T,I,K,O> extends ClusteringPlanStrategy<T,I,K,O> {
private static final Logger LOG = LogManager.getLogger(PartitionAwareClusteringPlanStrategy.class);
public PartitionAwareClusteringPlanStrategy(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig writeConfig) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/UpdateStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/UpdateStrategy.java
index c08c3f312d..4463f7887b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/UpdateStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/UpdateStrategy.java
@@ -20,7 +20,6 @@ package org.apache.hudi.table.action.cluster.strategy;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.table.HoodieTable;
@@ -30,7 +29,7 @@ import java.util.Set;
/**
* When file groups in clustering, write records to these file group need to check.
*/
-public abstract class UpdateStrategy<T extends HoodieRecordPayload, I> implements Serializable {
+public abstract class UpdateStrategy<T, I> implements Serializable {
protected final transient HoodieEngineContext engineContext;
protected final HoodieTable table;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
index aa7196e3db..b559938567 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
@@ -18,7 +18,6 @@
package org.apache.hudi.table.action.commit;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.WriteHandleFactory;
@@ -26,7 +25,7 @@ import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public abstract class BaseBulkInsertHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseBulkInsertHelper<T, I, K, O, R> {
/**
* Mark instant as inflight, write input records, update index and return result.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index 1227c444f2..3cd1320ee0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
@@ -31,7 +31,6 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -70,7 +69,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I, K, O, R>
+public abstract class BaseCommitActionExecutor<T, I, K, O, R>
extends BaseActionExecutor<T, I, K, O, R> {
private static final Logger LOG = LogManager.getLogger(BaseCommitActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
index b119587f47..ceeb2aeb70 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
@@ -19,7 +19,6 @@
package org.apache.hudi.table.action.commit;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -29,7 +28,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
*
* @param <T>
*/
-public abstract class BaseDeleteHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseDeleteHelper<T, I, K, O, R> {
/**
* Deduplicate Hoodie records, using the given deduplication function.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
index 5ead348140..393ee9ddb6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
@@ -18,12 +18,13 @@
package org.apache.hudi.table.action.commit;
-import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.utils.MergingIterator;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
@@ -40,6 +41,8 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import javax.annotation.Nonnull;
+
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
@@ -47,7 +50,7 @@ import java.util.Iterator;
/**
* Helper to read records from previous version of base file and run Merge.
*/
-public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
+public abstract class BaseMergeHelper<T, I, K, O> {
/**
* Read records from previous version of base file and merge.
@@ -57,7 +60,7 @@ public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
*/
public abstract void runMerge(HoodieTable<T, I, K, O> table, HoodieMergeHandle<T, I, K, O> upsertHandle) throws IOException;
- protected GenericRecord transformRecordBasedOnNewSchema(GenericDatumReader<GenericRecord> gReader, GenericDatumWriter<GenericRecord> gWriter,
+ protected HoodieRecord transformRecordBasedOnNewSchema(GenericDatumReader<GenericRecord> gReader, GenericDatumWriter<GenericRecord> gWriter,
ThreadLocal<BinaryEncoder> encoderCache, ThreadLocal<BinaryDecoder> decoderCache,
GenericRecord gRec) {
ByteArrayOutputStream inStream = null;
@@ -71,7 +74,7 @@ public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inStream.toByteArray(), decoderCache.get());
decoderCache.set(decoder);
GenericRecord transformedRec = gReader.read(null, decoder);
- return transformedRec;
+ return new HoodieAvroIndexedRecord(transformedRec);
} catch (IOException e) {
throw new HoodieException(e);
} finally {
@@ -87,14 +90,17 @@ public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
* Create Parquet record iterator that provides a stitched view of record read from skeleton and bootstrap file.
* Skeleton file is a representation of the bootstrap file inside the table, with just the bare bone fields needed
* for indexing, writing and other functionality.
- *
*/
- protected Iterator<GenericRecord> getMergingIterator(HoodieTable<T, I, K, O> table, HoodieMergeHandle<T, I, K, O> mergeHandle,
- HoodieBaseFile baseFile, HoodieFileReader<GenericRecord> reader,
- Schema readSchema, boolean externalSchemaTransformation) throws IOException {
+ protected Iterator<HoodieRecord> getMergingIterator(HoodieTable<T, I, K, O> table,
+ HoodieMergeHandle<T, I, K, O> mergeHandle,
+ HoodieBaseFile baseFile,
+ HoodieFileReader reader,
+ Schema readerSchema,
+ boolean externalSchemaTransformation) throws IOException {
Path externalFilePath = new Path(baseFile.getBootstrapBaseFile().get().getPath());
Configuration bootstrapFileConfig = new Configuration(table.getHadoopConf());
- HoodieFileReader<GenericRecord> bootstrapReader = HoodieFileReaderFactory.<GenericRecord>getFileReader(bootstrapFileConfig, externalFilePath);
+ HoodieFileReader bootstrapReader = HoodieFileReaderFactory.getFileReader(bootstrapFileConfig, externalFilePath);
+
Schema bootstrapReadSchema;
if (externalSchemaTransformation) {
bootstrapReadSchema = bootstrapReader.getSchema();
@@ -102,14 +108,25 @@ public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
bootstrapReadSchema = mergeHandle.getWriterSchema();
}
- return new MergingIterator<>(reader.getRecordIterator(readSchema), bootstrapReader.getRecordIterator(bootstrapReadSchema),
- (inputRecordPair) -> HoodieAvroUtils.stitchRecords(inputRecordPair.getLeft(), inputRecordPair.getRight(), mergeHandle.getWriterSchemaWithMetaFields()));
+ return new MergingIterator<>(
+ reader.getRecordIterator(readerSchema, HoodieAvroIndexedRecord::new),
+ bootstrapReader.getRecordIterator(bootstrapReadSchema, HoodieAvroIndexedRecord::new),
+ (oneRecord, otherRecord) -> mergeRecords(oneRecord, otherRecord, readerSchema, mergeHandle.getWriterSchemaWithMetaFields()));
+ }
+
+ @Nonnull
+ private static HoodieRecord mergeRecords(HoodieRecord one, HoodieRecord other, Schema readerSchema, Schema writerSchema) {
+ try {
+ return one.mergeWith(other, readerSchema, writerSchema);
+ } catch (IOException e) {
+ throw new HoodieIOException("Failed to merge records", e);
+ }
}
/**
* Consumer that dequeues records from queue and sends to Merge Handle.
*/
- protected static class UpdateHandler extends BoundedInMemoryQueueConsumer<GenericRecord, Void> {
+ protected static class UpdateHandler extends BoundedInMemoryQueueConsumer<HoodieRecord, Void> {
private final HoodieMergeHandle upsertHandle;
@@ -118,12 +135,13 @@ public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
}
@Override
- protected void consumeOneRecord(GenericRecord record) {
+ protected void consumeOneRecord(HoodieRecord record) {
upsertHandle.write(record);
}
@Override
- protected void finish() {}
+ protected void finish() {
+ }
@Override
protected Void getResult() {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
index c69d8746d1..984deda36d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
@@ -19,7 +19,6 @@
package org.apache.hudi.table.action.commit;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex;
@@ -30,7 +29,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.time.Duration;
import java.time.Instant;
-public abstract class BaseWriteHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseWriteHelper<T, I, K, O, R> {
public HoodieWriteMetadata<O> write(String instantTime,
I inputRecords,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieDeleteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieDeleteHelper.java
index fff52eb24d..f54184abb0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieDeleteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieDeleteHelper.java
@@ -26,7 +26,6 @@ import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
@@ -45,7 +44,7 @@ import java.util.HashMap;
* @param <T>
*/
@SuppressWarnings("checkstyle:LineLength")
-public class HoodieDeleteHelper<T extends HoodieRecordPayload, R> extends
+public class HoodieDeleteHelper<T, R> extends
BaseDeleteHelper<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, R> {
private HoodieDeleteHelper() {
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
index 5d1a55453d..8f3eb67e9d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
@@ -19,14 +19,15 @@
package org.apache.hudi.table.action.commit;
import org.apache.avro.SchemaCompatibility;
-import org.apache.hudi.avro.HoodieAvroUtils;
+
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.InternalSchemaCache;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
@@ -49,17 +50,22 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
+import java.util.Properties;
import java.util.stream.Collectors;
-public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
+public class HoodieMergeHelper<T> extends
BaseMergeHelper<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
+ private static final Logger LOG = LogManager.getLogger(HoodieMergeHelper.class);
+
private HoodieMergeHelper() {
}
@@ -92,7 +98,7 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
}
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
- HoodieFileReader<GenericRecord> reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
+ HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
Option<InternalSchema> querySchemaOpt = SerDeHelper.fromJson(table.getConfig().getInternalSchema());
boolean needToReWriteRecord = false;
@@ -124,14 +130,14 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
}
try {
- final Iterator<GenericRecord> readerIterator;
+ final Iterator<HoodieRecord> readerIterator;
if (baseFile.getBootstrapBaseFile().isPresent()) {
readerIterator = getMergingIterator(table, mergeHandle, baseFile, reader, readSchema, externalSchemaTransformation);
} else {
if (needToReWriteRecord) {
- readerIterator = HoodieAvroUtils.rewriteRecordWithNewSchema(reader.getRecordIterator(), readSchema, renameCols);
+ readerIterator = new RewriteIterator(reader.getRecordIterator(HoodieAvroIndexedRecord::new), readSchema, readSchema, table.getConfig().getProps(), renameCols);
} else {
- readerIterator = reader.getRecordIterator(readSchema);
+ readerIterator = reader.getRecordIterator(readSchema, HoodieAvroIndexedRecord::new);
}
}
@@ -142,7 +148,8 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
if (!externalSchemaTransformation) {
return record;
}
- return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) record);
+ // TODO Other type of record need to change
+ return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) ((HoodieRecord)record).getData());
}, table.getPreExecuteRunnable());
wrapper.execute();
} catch (Exception e) {
@@ -160,4 +167,41 @@ public class HoodieMergeHelper<T extends HoodieRecordPayload> extends
mergeHandle.close();
}
}
+
+ class RewriteIterator implements ClosableIterator<HoodieRecord> {
+
+ private final ClosableIterator<HoodieRecord> iter;
+ private final Schema newSchema;
+ private final Schema recordSchema;
+ private final Properties prop;
+ private final Map<String, String> renameCols;
+
+ public RewriteIterator(ClosableIterator<HoodieRecord> iter, Schema newSchema, Schema recordSchema, Properties prop, Map<String, String> renameCols) {
+ this.iter = iter;
+ this.newSchema = newSchema;
+ this.recordSchema = recordSchema;
+ this.prop = prop;
+ this.renameCols = renameCols;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+
+ @Override
+ public HoodieRecord next() {
+ try {
+ return iter.next().rewriteRecordWithNewSchema(recordSchema, prop, newSchema, renameCols);
+ } catch (IOException e) {
+ LOG.error("Error rewrite record with new schema", e);
+ throw new HoodieException(e);
+ }
+ }
+
+ @Override
+ public void close() {
+ iter.close();
+ }
+ }
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieWriteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieWriteHelper.java
index b1d18573fc..201c89b192 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieWriteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieWriteHelper.java
@@ -22,16 +22,14 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.SerializableSchema;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieTable;
-public class HoodieWriteHelper<T extends HoodieRecordPayload, R> extends BaseWriteHelper<T, HoodieData<HoodieRecord<T>>,
+public class HoodieWriteHelper<T, R> extends BaseWriteHelper<T, HoodieData<HoodieRecord<T>>,
HoodieData<HoodieKey>, HoodieData<WriteStatus>, R> {
private HoodieWriteHelper() {
@@ -66,10 +64,10 @@ public class HoodieWriteHelper<T extends HoodieRecordPayload, R> extends BaseWri
return Pair.of(key, record);
}).reduceByKey((rec1, rec2) -> {
@SuppressWarnings("unchecked")
- T reducedData = (T) rec2.getData().preCombine(rec1.getData(), schema.get(), CollectionUtils.emptyProps());
- HoodieKey reducedKey = rec1.getData().equals(reducedData) ? rec1.getKey() : rec2.getKey();
+ HoodieRecord reducedRec = rec2.preCombine(rec1, schema.get(), CollectionUtils.emptyProps());
+ HoodieKey reducedKey = rec1.getData().equals(reducedRec) ? rec1.getKey() : rec2.getKey();
- return new HoodieAvroRecord<>(reducedKey, reducedData);
+ return (HoodieRecord<T>) reducedRec.newInstance(reducedKey);
}, reduceParallelism).map(Pair::getRight);
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java
index 3379d16f4c..75fbb6b27a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java
@@ -22,7 +22,6 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -43,7 +42,7 @@ import java.util.List;
* @param <K> Type of keys
* @param <O> Type of outputs
*/
-public class CompactHelpers<T extends HoodieRecordPayload, I, K, O> {
+public class CompactHelpers<T, I, K, O> {
private static final CompactHelpers SINGLETON_INSTANCE = new CompactHelpers();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
index 75954872ae..69c6b48fea 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
@@ -31,7 +31,6 @@ import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -75,7 +74,7 @@ import static java.util.stream.Collectors.toList;
/**
* A HoodieCompactor runs compaction on a hoodie table.
*/
-public abstract class HoodieCompactor<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class HoodieCompactor<T, I, K, O> implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieCompactor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
index fc4ae986e6..bfee3adbd4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/RunCompactionActionExecutor.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -45,7 +44,7 @@ import java.io.IOException;
import java.util.List;
@SuppressWarnings("checkstyle:LineLength")
-public class RunCompactionActionExecutor<T extends HoodieRecordPayload> extends
+public class RunCompactionActionExecutor<T> extends
BaseActionExecutor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, HoodieWriteMetadata<HoodieData<WriteStatus>>> {
private final HoodieCompactor compactor;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
index 4fb5f9f7dd..019ec4d947 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.common.engine.EngineType;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -48,7 +47,7 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-public class ScheduleCompactionActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCompactionPlan>> {
+public class ScheduleCompactionActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieCompactionPlan>> {
private static final Logger LOG = LogManager.getLogger(ScheduleCompactionActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
index 96d46928e7..c107a83220 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
@@ -28,7 +28,6 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -82,7 +81,7 @@ import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartition
* Reads the index plan and executes the plan.
* It also reconciles updates on data timeline while indexing was in progress.
*/
-public class RunIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexCommitMetadata>> {
+public class RunIndexActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexCommitMetadata>> {
private static final Logger LOG = LogManager.getLogger(RunIndexActionExecutor.class);
private static final Integer INDEX_COMMIT_METADATA_VERSION_1 = 1;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
index d562dec671..91587b8bd5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/ScheduleIndexActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.avro.model.HoodieIndexPartitionInfo;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -60,7 +59,7 @@ import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartition
* 3. Initialize file groups for the enabled partition types within a transaction.
* </li>
*/
-public class ScheduleIndexActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexPlan>> {
+public class ScheduleIndexActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieIndexPlan>> {
private static final Logger LOG = LogManager.getLogger(ScheduleIndexActionExecutor.class);
private static final Integer INDEX_PLAN_VERSION_1 = 1;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
index 62ecbe2a31..03e2adbd6f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
@@ -24,7 +24,6 @@ import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -49,7 +48,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public abstract class BaseRestoreActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieRestoreMetadata> {
+public abstract class BaseRestoreActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieRestoreMetadata> {
private static final Logger LOG = LogManager.getLogger(BaseRestoreActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/CopyOnWriteRestoreActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/CopyOnWriteRestoreActionExecutor.java
index f6e104e3dc..9dcb340913 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/CopyOnWriteRestoreActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/CopyOnWriteRestoreActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.restore;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -30,7 +29,7 @@ import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
-public class CopyOnWriteRestoreActionExecutor<T extends HoodieRecordPayload, I, K, O>
+public class CopyOnWriteRestoreActionExecutor<T, I, K, O>
extends BaseRestoreActionExecutor<T, I, K, O> {
public CopyOnWriteRestoreActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java
index 01c3d44fab..a7e5774515 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.restore;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -29,7 +28,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.rollback.MergeOnReadRollbackActionExecutor;
-public class MergeOnReadRestoreActionExecutor<T extends HoodieRecordPayload, I, K, O>
+public class MergeOnReadRestoreActionExecutor<T, I, K, O>
extends BaseRestoreActionExecutor<T, I, K, O> {
public MergeOnReadRestoreActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<T, I, K, O> table,
String instantTime, String restoreInstantTime) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
index 4add51886f..3bc48f3e79 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
@@ -25,7 +25,6 @@ import org.apache.hudi.client.transaction.TransactionManager;
import org.apache.hudi.common.HoodieRollbackStat;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -50,7 +49,7 @@ import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
-public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieRollbackMetadata> {
+public abstract class BaseRollbackActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieRollbackMetadata> {
private static final Logger LOG = LogManager.getLogger(BaseRollbackActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackPlanActionExecutor.java
index 63b9e8a414..c8aa992cf8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackPlanActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.avro.model.HoodieRollbackRequest;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -44,7 +43,7 @@ import java.util.List;
* Base rollback plan action executor to assist in scheduling rollback requests. This phase serialized {@link HoodieRollbackPlan}
* to rollback.requested instant.
*/
-public class BaseRollbackPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieRollbackPlan>> {
+public class BaseRollbackPlanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieRollbackPlan>> {
private static final Logger LOG = LogManager.getLogger(BaseRollbackPlanActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
index e766dbdc81..a7d43d712b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.rollback;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.common.HoodieRollbackStat;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.HoodieTimer;
@@ -34,7 +33,7 @@ import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
-public class CopyOnWriteRollbackActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseRollbackActionExecutor<T, I, K, O> {
+public class CopyOnWriteRollbackActionExecutor<T, I, K, O> extends BaseRollbackActionExecutor<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(CopyOnWriteRollbackActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
index 87ee7d9472..000ea21af9 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
@@ -48,7 +47,7 @@ import static org.apache.hudi.table.action.rollback.BaseRollbackHelper.EMPTY_STR
/**
* Performs rollback using marker files generated during the write..
*/
-public class MarkerBasedRollbackStrategy<T extends HoodieRecordPayload, I, K, O> implements BaseRollbackPlanActionExecutor.RollbackStrategy {
+public class MarkerBasedRollbackStrategy<T, I, K, O> implements BaseRollbackPlanActionExecutor.RollbackStrategy {
private static final Logger LOG = LogManager.getLogger(MarkerBasedRollbackStrategy.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
index 46d4d84ebf..682c87c7f5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
@@ -22,7 +22,6 @@ package org.apache.hudi.table.action.rollback;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.common.HoodieRollbackStat;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -34,7 +33,7 @@ import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
-public class MergeOnReadRollbackActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseRollbackActionExecutor<T, I, K, O> {
+public class MergeOnReadRollbackActionExecutor<T, I, K, O> extends BaseRollbackActionExecutor<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(MergeOnReadRollbackActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
index e33dffcb7b..34617afc52 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
@@ -22,7 +22,6 @@ package org.apache.hudi.table.action.rollback;
import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -45,7 +44,7 @@ import java.util.stream.Stream;
/**
* Plans the restore action and add a restore.requested meta file to timeline.
*/
-public class RestorePlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieRestorePlan>> {
+public class RestorePlanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieRestorePlan>> {
private static final Logger LOG = LogManager.getLogger(RestorePlanActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java
index 7f408c1b8d..3bfdd20721 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -44,7 +43,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class SavepointActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieSavepointMetadata> {
+public class SavepointActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieSavepointMetadata> {
private static final Logger LOG = LogManager.getLogger(SavepointActionExecutor.class);
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index a45b8a9aaa..6fb80c2d85 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -33,6 +33,7 @@ import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.FileIOUtils;
@@ -69,7 +70,7 @@ import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
import static org.apache.hudi.common.util.CollectionUtils.toStream;
import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR;
-import static org.apache.hudi.io.storage.HoodieHFileReader.SCHEMA_KEY;
+import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
@@ -89,7 +90,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
}
@Override
- protected HoodieFileWriter<GenericRecord> createWriter(
+ protected HoodieAvroHFileWriter createWriter(
Schema avroSchema, boolean populateMetaFields) throws Exception {
String instantTime = "000";
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
@@ -104,15 +105,15 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier);
when(partitionSupplier.get()).thenReturn(10);
- return HoodieFileWriterFactory.newHFileFileWriter(
+ return (HoodieAvroHFileWriter)HoodieFileWriterFactory.newHFileFileWriter(
instantTime, getFilePath(), writeConfig, avroSchema, conf, mockTaskContextSupplier);
}
@Override
- protected HoodieFileReader<GenericRecord> createReader(
+ protected HoodieAvroFileReader createReader(
Configuration conf) throws Exception {
CacheConfig cacheConfig = new CacheConfig(conf);
- return new HoodieHFileReader<>(conf, getFilePath(), cacheConfig, getFilePath().getFileSystem(conf));
+ return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig, getFilePath().getFileSystem(conf));
}
@Override
@@ -144,7 +145,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
@MethodSource("populateMetaFieldsAndTestAvroWithMeta")
public void testWriteReadHFileWithMetaFields(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception {
Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc");
- HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, populateMetaFields);
+ HoodieAvroHFileWriter writer = createWriter(avroSchema, populateMetaFields);
List<String> keys = new ArrayList<>();
Map<String, GenericRecord> recordMap = new TreeMap<>();
for (int i = 0; i < 100; i++) {
@@ -167,8 +168,8 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
writer.close();
Configuration conf = new Configuration();
- HoodieHFileReader hoodieHFileReader = (HoodieHFileReader) createReader(conf);
- List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
+ HoodieAvroHFileReader hoodieHFileReader = (HoodieAvroHFileReader) createReader(conf);
+ List<IndexedRecord> records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader);
assertEquals(new ArrayList<>(recordMap.values()), records);
hoodieHFileReader.close();
@@ -182,8 +183,8 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
List<GenericRecord> expectedRecords = rowsList.stream().map(recordMap::get).collect(Collectors.toList());
- hoodieHFileReader = (HoodieHFileReader<GenericRecord>) createReader(conf);
- List<GenericRecord> result = HoodieHFileReader.readRecords(hoodieHFileReader, rowsList);
+ hoodieHFileReader = (HoodieAvroHFileReader) createReader(conf);
+ List<GenericRecord> result = HoodieAvroHFileReader.readRecords(hoodieHFileReader, rowsList).stream().map(r -> (GenericRecord)r).collect(Collectors.toList());
assertEquals(expectedRecords, result);
@@ -212,8 +213,8 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
byte[] content = FileIOUtils.readAsByteArray(
fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen());
// Reading byte array in HFile format, without actual file path
- HoodieHFileReader<GenericRecord> hfileReader =
- new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+ HoodieAvroHFileReader hfileReader =
+ new HoodieAvroHFileReader(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
assertEquals(NUM_RECORDS, hfileReader.getTotalRecords());
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
@@ -222,20 +223,20 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
@Test
public void testReaderGetRecordIterator() throws Exception {
writeFileWithSimpleSchema();
- HoodieHFileReader<GenericRecord> hfileReader =
- (HoodieHFileReader<GenericRecord>) createReader(new Configuration());
+ HoodieAvroHFileReader hfileReader =
+ (HoodieAvroHFileReader) createReader(new Configuration());
List<String> keys =
IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), IntStream.range(10, 20))
.mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toList());
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
- Iterator<GenericRecord> iterator = hfileReader.getRecordsByKeysIterator(keys, avroSchema);
+ Iterator<IndexedRecord> iterator = hfileReader.getRecordsByKeysIterator(keys, avroSchema);
List<Integer> expectedIds =
IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 20))
.boxed().collect(Collectors.toList());
int index = 0;
while (iterator.hasNext()) {
- GenericRecord record = iterator.next();
+ GenericRecord record = (GenericRecord) iterator.next();
String key = "key" + String.format("%02d", expectedIds.get(index));
assertEquals(key, record.get("_row_key").toString());
assertEquals(Integer.toString(expectedIds.get(index)), record.get("time").toString());
@@ -247,18 +248,18 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
@Test
public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
writeFileWithSimpleSchema();
- HoodieHFileReader<GenericRecord> hfileReader =
- (HoodieHFileReader<GenericRecord>) createReader(new Configuration());
+ HoodieAvroHFileReader hfileReader =
+ (HoodieAvroHFileReader) createReader(new Configuration());
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
List<String> keyPrefixes = Collections.singletonList("key");
- Iterator<GenericRecord> iterator =
+ Iterator<IndexedRecord> iterator =
hfileReader.getRecordsByKeyPrefixIterator(keyPrefixes, avroSchema);
- List<GenericRecord> recordsByPrefix = toStream(iterator).collect(Collectors.toList());
+ List<GenericRecord> recordsByPrefix = toStream(iterator).map(r -> (GenericRecord)r).collect(Collectors.toList());
- List<GenericRecord> allRecords = toStream(hfileReader.getRecordIterator()).collect(Collectors.toList());
+ List<GenericRecord> allRecords = toStream(hfileReader.getRecordIterator(HoodieAvroIndexedRecord::new)).map(r -> (GenericRecord)r.getData()).collect(Collectors.toList());
assertEquals(allRecords, recordsByPrefix);
@@ -268,6 +269,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+ .map(r -> (GenericRecord)r)
.collect(Collectors.toList());
assertEquals(expectedKey1s, recordsByPrefix);
@@ -277,6 +279,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key25"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+ .map(r -> (GenericRecord)r)
.collect(Collectors.toList());
assertEquals(expectedKey25, recordsByPrefix);
@@ -285,6 +288,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key99"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+ .map(r -> (GenericRecord)r)
.collect(Collectors.toList());
assertEquals(Collections.emptyList(), recordsByPrefix);
@@ -293,6 +297,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1234"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+ .map(r -> (GenericRecord)r)
.collect(Collectors.toList());
assertEquals(Collections.emptyList(), recordsByPrefix);
@@ -353,8 +358,8 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
verifyHFileReader(
HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
- HoodieHFileReader<GenericRecord> hfileReader =
- new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+ HoodieAvroHFileReader hfileReader =
+ new HoodieAvroHFileReader(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
@@ -362,7 +367,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
content = readHFileFromResources(complexHFile);
verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
- hfileReader = new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
+ hfileReader = new HoodieAvroHFileReader(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc");
assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
index 817fc25a5d..10e0ce2256 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.orc.CompressionKind;
@@ -52,7 +51,7 @@ public class TestHoodieOrcReaderWriter extends TestHoodieReaderWriterBase {
}
@Override
- protected HoodieFileWriter<GenericRecord> createWriter(
+ protected HoodieAvroOrcWriter createWriter(
Schema avroSchema, boolean populateMetaFields) throws Exception {
BloomFilter filter = BloomFilterFactory.createBloomFilter(1000, 0.00001, -1, BloomFilterTypeCode.SIMPLE.name());
Configuration conf = new Configuration();
@@ -65,11 +64,11 @@ public class TestHoodieOrcReaderWriter extends TestHoodieReaderWriterBase {
when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier);
when(partitionSupplier.get()).thenReturn(10);
String instantTime = "000";
- return new HoodieOrcWriter<>(instantTime, getFilePath(), config, avroSchema, mockTaskContextSupplier);
+ return new HoodieAvroOrcWriter(instantTime, getFilePath(), config, avroSchema, mockTaskContextSupplier);
}
@Override
- protected HoodieFileReader<GenericRecord> createReader(
+ protected HoodieAvroFileReader createReader(
Configuration conf) throws Exception {
return HoodieFileReaderFactory.getFileReader(conf, getFilePath());
}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java
index 902f42e38f..cf701bc017 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java
@@ -21,10 +21,13 @@ package org.apache.hudi.io.storage;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.AfterEach;
@@ -60,10 +63,10 @@ public abstract class TestHoodieReaderWriterBase {
protected abstract Path getFilePath();
- protected abstract HoodieFileWriter<GenericRecord> createWriter(
+ protected abstract HoodieAvroFileWriter createWriter(
Schema avroSchema, boolean populateMetaFields) throws Exception;
- protected abstract HoodieFileReader<GenericRecord> createReader(
+ protected abstract HoodieAvroFileReader createReader(
Configuration conf) throws Exception;
protected abstract void verifyMetadata(Configuration conf) throws IOException;
@@ -87,7 +90,7 @@ public abstract class TestHoodieReaderWriterBase {
Configuration conf = new Configuration();
verifyMetadata(conf);
- HoodieFileReader<GenericRecord> hoodieReader = createReader(conf);
+ HoodieAvroFileReader hoodieReader = createReader(conf);
BloomFilter filter = hoodieReader.readBloomFilter();
for (int i = 0; i < NUM_RECORDS; i++) {
String key = "key" + String.format("%02d", i);
@@ -110,7 +113,7 @@ public abstract class TestHoodieReaderWriterBase {
Configuration conf = new Configuration();
verifyMetadata(conf);
verifySchema(conf, schemaPath);
- verifySimpleRecords(createReader(conf).getRecordIterator());
+ verifySimpleRecords(new TransformIterator(createReader(conf).getRecordIterator(HoodieAvroIndexedRecord::new)));
}
@Test
@@ -118,7 +121,7 @@ public abstract class TestHoodieReaderWriterBase {
String schemaPath = "/exampleSchemaWithUDT.avsc";
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, schemaPath);
Schema udtSchema = avroSchema.getField("driver").schema().getTypes().get(1);
- HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, true);
+ HoodieAvroFileWriter writer = createWriter(avroSchema, true);
for (int i = 0; i < NUM_RECORDS; i++) {
GenericRecord record = new GenericData.Record(avroSchema);
String key = "key" + String.format("%02d", i);
@@ -137,7 +140,7 @@ public abstract class TestHoodieReaderWriterBase {
Configuration conf = new Configuration();
verifyMetadata(conf);
verifySchema(conf, schemaPath);
- verifyComplexRecords(createReader(conf).getRecordIterator());
+ verifyComplexRecords(new TransformIterator(createReader(conf).getRecordIterator(HoodieAvroIndexedRecord::new)));
}
@Test
@@ -145,7 +148,7 @@ public abstract class TestHoodieReaderWriterBase {
writeFileWithSimpleSchema();
Configuration conf = new Configuration();
- HoodieFileReader<GenericRecord> hoodieReader = createReader(conf);
+ HoodieAvroFileReader hoodieReader = createReader(conf);
String[] schemaList = new String[] {
"/exampleEvolvedSchema.avsc", "/exampleEvolvedSchemaChangeOrder.avsc",
"/exampleEvolvedSchemaColumnRequire.avsc", "/exampleEvolvedSchemaColumnType.avsc",
@@ -166,21 +169,22 @@ public abstract class TestHoodieReaderWriterBase {
protected void writeFileWithSimpleSchema() throws Exception {
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
- HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, true);
+ HoodieAvroFileWriter writer = createWriter(avroSchema, true);
for (int i = 0; i < NUM_RECORDS; i++) {
GenericRecord record = new GenericData.Record(avroSchema);
String key = "key" + String.format("%02d", i);
record.put("_row_key", key);
record.put("time", Integer.toString(i));
record.put("number", i);
- writer.writeAvro(key, record);
+ HoodieRecord avroRecord = new HoodieAvroIndexedRecord(record);
+ writer.write(key, avroRecord, avroSchema);
}
writer.close();
}
protected void writeFileWithSchemaWithMeta() throws Exception {
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithMetaFields.avsc");
- HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, true);
+ HoodieAvroFileWriter writer = createWriter(avroSchema, true);
for (int i = 0; i < NUM_RECORDS; i++) {
GenericRecord record = new GenericData.Record(avroSchema);
String key = "key" + String.format("%02d", i);
@@ -193,10 +197,10 @@ public abstract class TestHoodieReaderWriterBase {
writer.close();
}
- protected void verifySimpleRecords(Iterator<GenericRecord> iterator) {
+ protected void verifySimpleRecords(Iterator<IndexedRecord> iterator) {
int index = 0;
while (iterator.hasNext()) {
- GenericRecord record = iterator.next();
+ GenericRecord record = (GenericRecord) iterator.next();
String key = "key" + String.format("%02d", index);
assertEquals(key, record.get("_row_key").toString());
assertEquals(Integer.toString(index), record.get("time").toString());
@@ -205,10 +209,10 @@ public abstract class TestHoodieReaderWriterBase {
}
}
- protected void verifyComplexRecords(Iterator<GenericRecord> iterator) {
+ protected void verifyComplexRecords(Iterator<IndexedRecord> iterator) {
int index = 0;
while (iterator.hasNext()) {
- GenericRecord record = iterator.next();
+ GenericRecord record = (GenericRecord) iterator.next();
String key = "key" + String.format("%02d", index);
assertEquals(key, record.get("_row_key").toString());
assertEquals(Integer.toString(index), record.get("time").toString());
@@ -232,7 +236,7 @@ public abstract class TestHoodieReaderWriterBase {
}
}
- private void verifyFilterRowKeys(HoodieFileReader<GenericRecord> hoodieReader) {
+ private void verifyFilterRowKeys(HoodieAvroFileReader hoodieReader) {
Set<String> candidateRowKeys = IntStream.range(40, NUM_RECORDS * 2)
.mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toCollection(TreeSet::new));
List<String> expectedKeys = IntStream.range(40, NUM_RECORDS)
@@ -241,12 +245,12 @@ public abstract class TestHoodieReaderWriterBase {
.stream().sorted().collect(Collectors.toList()));
}
- private void verifyReaderWithSchema(String schemaPath, HoodieFileReader<GenericRecord> hoodieReader) throws IOException {
+ private void verifyReaderWithSchema(String schemaPath, HoodieAvroFileReader hoodieReader) throws IOException {
Schema evolvedSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, schemaPath);
- Iterator<GenericRecord> iter = hoodieReader.getRecordIterator(evolvedSchema);
+ Iterator<IndexedRecord> iter = hoodieReader.getRecordIterator(evolvedSchema);
int index = 0;
while (iter.hasNext()) {
- verifyRecord(schemaPath, iter.next(), index);
+ verifyRecord(schemaPath, (GenericRecord) iter.next(), index);
index++;
}
}
@@ -264,4 +268,24 @@ public abstract class TestHoodieReaderWriterBase {
}
assertNull(record.get("added_field"));
}
+
+ class TransformIterator implements Iterator<IndexedRecord> {
+
+ private final Iterator<HoodieRecord> iter;
+
+ public TransformIterator(Iterator<HoodieRecord> iter) {
+ this.iter = iter;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+
+ @Override
+ public IndexedRecord next() {
+ return (GenericRecord) iter.next().getData();
+
+ }
+ }
}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
index 8e7df833cc..522bf69134 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
@@ -19,16 +19,11 @@
package org.apache.hudi.testutils;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.engine.TaskContextSupplier;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
@@ -43,11 +38,18 @@ import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieStorageConfig;
+import org.apache.hudi.io.storage.HoodieAvroOrcWriter;
import org.apache.hudi.io.storage.HoodieAvroParquetWriter;
import org.apache.hudi.io.storage.HoodieOrcConfig;
-import org.apache.hudi.io.storage.HoodieOrcWriter;
import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.orc.CompressionKind;
@@ -111,7 +113,7 @@ public class HoodieWriteableTestTable extends HoodieMetadataTestTable {
HoodieParquetConfig<HoodieAvroWriteSupport> config = new HoodieParquetConfig<>(writeSupport, CompressionCodecName.GZIP,
ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
new Configuration(), Double.parseDouble(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue()));
- try (HoodieAvroParquetWriter writer = new HoodieAvroParquetWriter<>(
+ try (HoodieAvroParquetWriter writer = new HoodieAvroParquetWriter(
new Path(Paths.get(basePath, partition, fileName).toString()), config, currentInstantTime,
contextSupplier, populateMetaFields)) {
int seqId = 1;
@@ -133,7 +135,7 @@ public class HoodieWriteableTestTable extends HoodieMetadataTestTable {
int orcBlockSize = Integer.parseInt(HoodieStorageConfig.ORC_BLOCK_SIZE.defaultValue());
int maxFileSize = Integer.parseInt(HoodieStorageConfig.ORC_FILE_MAX_SIZE.defaultValue());
HoodieOrcConfig config = new HoodieOrcConfig(conf, CompressionKind.ZLIB, orcStripSize, orcBlockSize, maxFileSize, filter);
- try (HoodieOrcWriter writer = new HoodieOrcWriter(
+ try (HoodieAvroOrcWriter writer = new HoodieAvroOrcWriter(
currentInstantTime,
new Path(Paths.get(basePath, partition, fileName).toString()),
config, schema, contextSupplier)) {
@@ -174,7 +176,7 @@ public class HoodieWriteableTestTable extends HoodieMetadataTestTable {
LOG.warn("Failed to convert record " + r.toString(), e);
return null;
}
- }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
+ }).map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
return Pair.of(partitionPath, logWriter.getLogFile());
}
}
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
index 191eb003b9..9085f392a9 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
@@ -27,8 +27,8 @@ import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
+import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.TableServiceType;
import org.apache.hudi.common.model.WriteOperationType;
@@ -83,7 +83,7 @@ import java.util.stream.Collectors;
* @param <T> type of the payload
*/
@SuppressWarnings("checkstyle:LineLength")
-public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
+public class HoodieFlinkWriteClient<T> extends
BaseHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
private static final Logger LOG = LoggerFactory.getLogger(HoodieFlinkWriteClient.class);
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/ExplicitWriteHandler.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/ExplicitWriteHandler.java
index 46eff58757..ca361bbdc1 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/ExplicitWriteHandler.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/ExplicitWriteHandler.java
@@ -19,8 +19,8 @@
package org.apache.hudi.execution;
import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -30,7 +30,7 @@ import java.util.List;
/**
* Consumes stream of hoodie records from in-memory queue and writes to one explicit create handle.
*/
-public class ExplicitWriteHandler<T extends HoodieRecordPayload>
+public class ExplicitWriteHandler<T>
extends BoundedInMemoryQueueConsumer<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> {
private final List<WriteStatus> statuses = new ArrayList<>();
@@ -42,9 +42,9 @@ public class ExplicitWriteHandler<T extends HoodieRecordPayload>
}
@Override
- public void consumeOneRecord(HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload) {
- final HoodieRecord insertPayload = payload.record;
- handle.write(insertPayload, payload.insertValue, payload.exception);
+ public void consumeOneRecord(HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> genResult) {
+ final HoodieRecord insertPayload = genResult.getResult();
+ handle.write(insertPayload, genResult.schema, new TypedProperties(genResult.props));
}
@Override
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/FlinkLazyInsertIterable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/FlinkLazyInsertIterable.java
index d0ec4e5ae6..94bffc64cd 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/FlinkLazyInsertIterable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/execution/FlinkLazyInsertIterable.java
@@ -21,7 +21,6 @@ package org.apache.hudi.execution;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
@@ -41,7 +40,7 @@ import java.util.List;
*
* @param <T> type of the payload
*/
-public class FlinkLazyInsertIterable<T extends HoodieRecordPayload> extends HoodieLazyInsertIterable<T> {
+public class FlinkLazyInsertIterable<T extends HoodieLazyInsertIterable<T> {
public FlinkLazyInsertIterable(Iterator<HoodieRecord<T>> recordItr,
boolean areRecordsSorted,
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
index be2273a840..f43130234f 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
@@ -26,7 +26,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.data.HoodieListData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.table.HoodieTable;
@@ -37,7 +36,7 @@ import java.util.stream.Collectors;
/**
* Base flink implementation of {@link HoodieIndex}.
*/
-public abstract class FlinkHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
+public abstract class FlinkHoodieIndex<T> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
protected FlinkHoodieIndex(HoodieWriteConfig config) {
super(config);
}
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/ExplicitWriteHandleFactory.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/ExplicitWriteHandleFactory.java
index e598a03375..c040603db4 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/ExplicitWriteHandleFactory.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/ExplicitWriteHandleFactory.java
@@ -19,14 +19,13 @@
package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
/**
* Create handle factory for Flink writer, use the specified write handle directly.
*/
-public class ExplicitWriteHandleFactory<T extends HoodieRecordPayload, I, K, O>
+public class ExplicitWriteHandleFactory<T, I, K, O>
extends WriteHandleFactory<T, I, K, O> {
private final HoodieWriteHandle<T, I, K, O> writeHandle;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkAppendHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkAppendHandle.java
index 2258375fdd..4b56d6a442 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkAppendHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkAppendHandle.java
@@ -21,7 +21,6 @@ package org.apache.hudi.io;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.WriteMarkers;
@@ -44,7 +43,7 @@ import java.util.List;
* <p>The back-up writer may rollover on condition(for e.g, the filesystem does not support append
* or the file size hits the configured threshold).
*/
-public class FlinkAppendHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkAppendHandle<T, I, K, O>
extends HoodieAppendHandle<T, I, K, O> implements MiniBatchHandle {
private static final Logger LOG = LoggerFactory.getLogger(FlinkAppendHandle.class);
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatAndReplaceHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatAndReplaceHandle.java
index 300e8c512b..662e8381e6 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatAndReplaceHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatAndReplaceHandle.java
@@ -18,16 +18,12 @@
package org.apache.hudi.io;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
-import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,7 +37,7 @@ import java.util.Iterator;
* <P>The records iterator for super constructor is reset as empty thus the initialization for new records
* does nothing. This handle keep the iterator for itself to override the write behavior.
*/
-public class FlinkConcatAndReplaceHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkConcatAndReplaceHandle<T, I, K, O>
extends FlinkMergeAndReplaceHandle<T, I, K, O> {
private static final Logger LOG = LoggerFactory.getLogger(FlinkConcatAndReplaceHandle.class);
@@ -59,10 +55,10 @@ public class FlinkConcatAndReplaceHandle<T extends HoodieRecordPayload, I, K, O>
* Write old record as is w/o merging with incoming record.
*/
@Override
- public void write(GenericRecord oldRecord) {
- String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
+ public void write(HoodieRecord oldRecord) {
+ String key = oldRecord.getRecordKey(keyGeneratorOpt);
try {
- fileWriter.writeAvro(key, oldRecord);
+ fileWriter.write(key, oldRecord, writeSchema);
} catch (IOException | RuntimeException e) {
String errMsg = String.format("Failed to write old record into new file for key %s from old file %s to new file %s with writerSchema %s",
key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true));
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatHandle.java
index 812155c3d2..8e4fb50e52 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkConcatHandle.java
@@ -20,13 +20,9 @@ package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpsertException;
-import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.table.HoodieTable;
-
-import org.apache.avro.generic.GenericRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,7 +36,7 @@ import java.util.Iterator;
* <P>The records iterator for super constructor is reset as empty thus the initialization for new records
* does nothing. This handle keep the iterator for itself to override the write behavior.
*/
-public class FlinkConcatHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkConcatHandle<T, I, K, O>
extends FlinkMergeHandle<T, I, K, O> {
private static final Logger LOG = LoggerFactory.getLogger(FlinkConcatHandle.class);
@@ -58,10 +54,10 @@ public class FlinkConcatHandle<T extends HoodieRecordPayload, I, K, O>
* Write old record as is w/o merging with incoming record.
*/
@Override
- public void write(GenericRecord oldRecord) {
- String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
+ public void write(HoodieRecord oldRecord) {
+ String key = oldRecord.getRecordKey(keyGeneratorOpt);
try {
- fileWriter.writeAvro(key, oldRecord);
+ fileWriter.write(key, oldRecord, writeSchema);
} catch (IOException | RuntimeException e) {
String errMsg = String.format("Failed to write old record into new file for key %s from old file %s to new file %s with writerSchema %s",
key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true));
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkCreateHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkCreateHandle.java
index 777e228c95..460329f446 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkCreateHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkCreateHandle.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
@@ -48,7 +47,7 @@ import java.util.List;
*
* @see FlinkMergeAndReplaceHandle
*/
-public class FlinkCreateHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkCreateHandle<T, I, K, O>
extends HoodieCreateHandle<T, I, K, O> implements MiniBatchHandle {
private static final Logger LOG = LogManager.getLogger(FlinkCreateHandle.class);
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandle.java
index 9fea0a9718..cc987b16bf 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandle.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
@@ -50,7 +49,7 @@ import java.util.List;
* then closes the file and rename to the old file name,
* behaves like the new data buffer are appended to the old file.
*/
-public class FlinkMergeAndReplaceHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkMergeAndReplaceHandle<T, I, K, O>
extends HoodieMergeHandle<T, I, K, O>
implements MiniBatchHandle {
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java
index 69121a9a04..ac68d92b91 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
@@ -51,7 +50,7 @@ import java.util.List;
*
* @see FlinkMergeAndReplaceHandle
*/
-public class FlinkMergeHandle<T extends HoodieRecordPayload, I, K, O>
+public class FlinkMergeHandle<T, I, K, O>
extends HoodieMergeHandle<T, I, K, O>
implements MiniBatchHandle {
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/ExplicitWriteHandleTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/ExplicitWriteHandleTable.java
index b95894bed8..4145c9630c 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/ExplicitWriteHandleTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/ExplicitWriteHandleTable.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.io.HoodieWriteHandle;
import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -32,7 +31,7 @@ import java.util.List;
* HoodieTable that need to pass in the
* {@link org.apache.hudi.io.HoodieWriteHandle} explicitly.
*/
-public interface ExplicitWriteHandleTable<T extends HoodieRecordPayload> {
+public interface ExplicitWriteHandleTable<T> {
/**
* Upsert a batch of new records into Hoodie table at the supplied instantTime.
*
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
index 7d2be6cb93..95bde05f0a 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
@@ -35,7 +35,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -84,7 +83,7 @@ import java.util.Map;
* <p>
* UPDATES - Produce a new version of the file, just replacing the updated records with new values
*/
-public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload>
+public class HoodieFlinkCopyOnWriteTable<T>
extends HoodieFlinkTable<T> implements HoodieCompactionHandler<T> {
private static final Logger LOG = LoggerFactory.getLogger(HoodieFlinkCopyOnWriteTable.class);
@@ -408,7 +407,7 @@ public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload>
@Override
public Iterator<List<WriteStatus>> handleInsert(
String instantTime, String partitionPath, String fileId,
- Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap) {
+ Map<String, HoodieRecord> recordMap) {
HoodieCreateHandle<?, ?, ?, ?> createHandle =
new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordMap, taskContextSupplier);
createHandle.write();
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkMergeOnReadTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkMergeOnReadTable.java
index aa8adde735..9d5311d810 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkMergeOnReadTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkMergeOnReadTable.java
@@ -24,7 +24,6 @@ import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -48,7 +47,7 @@ import java.util.Map;
/**
* Flink MERGE_ON_READ table.
*/
-public class HoodieFlinkMergeOnReadTable<T extends HoodieRecordPayload>
+public class HoodieFlinkMergeOnReadTable<T>
extends HoodieFlinkCopyOnWriteTable<T> {
HoodieFlinkMergeOnReadTable(
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
index 4e7dbe36c4..2730b992ce 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.Option;
@@ -40,10 +39,12 @@ import org.apache.avro.specific.SpecificRecordBase;
import java.util.List;
+import static org.apache.hudi.common.data.HoodieList.getList;
+
/**
* Impl of a flink hoodie table.
*/
-public abstract class HoodieFlinkTable<T extends HoodieRecordPayload>
+public abstract class HoodieFlinkTable<T>
extends HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>>
implements ExplicitWriteHandleTable<T> {
@@ -51,7 +52,7 @@ public abstract class HoodieFlinkTable<T extends HoodieRecordPayload>
super(config, context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieFlinkTable<T> create(HoodieWriteConfig config, HoodieFlinkEngineContext context) {
+ public static <T> HoodieFlinkTable<T> create(HoodieWriteConfig config, HoodieFlinkEngineContext context) {
HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
@@ -60,9 +61,9 @@ public abstract class HoodieFlinkTable<T extends HoodieRecordPayload>
return HoodieFlinkTable.create(config, context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieFlinkTable<T> create(HoodieWriteConfig config,
- HoodieFlinkEngineContext context,
- HoodieTableMetaClient metaClient) {
+ public static <T> HoodieFlinkTable<T> create(HoodieWriteConfig config,
+ HoodieFlinkEngineContext context,
+ HoodieTableMetaClient metaClient) {
final HoodieFlinkTable<T> hoodieFlinkTable;
switch (metaClient.getTableType()) {
case COPY_ON_WRITE:
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
index 51138cd29d..fc670d1f4b 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -67,7 +66,7 @@ import java.util.stream.Collectors;
* <p>Computing the records batch locations all at a time is a pressure to the engine,
* we should avoid that in streaming system.
*/
-public abstract class BaseFlinkCommitActionExecutor<T extends HoodieRecordPayload> extends
+public abstract class BaseFlinkCommitActionExecutor<T> extends
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, HoodieWriteMetadata> {
private static final Logger LOG = LogManager.getLogger(BaseFlinkCommitActionExecutor.class);
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteCommitActionExecutor.java
index 23e3c01eac..115d9555e2 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink delete commit action executor.
*/
-public class FlinkDeleteCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseFlinkCommitActionExecutor<T> {
+public class FlinkDeleteCommitActionExecutor<T> extends BaseFlinkCommitActionExecutor<T> {
private final List<HoodieKey> keys;
public FlinkDeleteCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertCommitActionExecutor.java
index 3ae6802e7c..387aae2976 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink insert commit action executor.
*/
-public class FlinkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseFlinkCommitActionExecutor<T> {
+public class FlinkInsertCommitActionExecutor<T> extends BaseFlinkCommitActionExecutor<T> {
private List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteCommitActionExecutor.java
index 5f6d06ea54..d4fdbffa0e 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -34,7 +33,7 @@ import java.util.List;
/**
* Flink INSERT OVERWRITE commit action executor.
*/
-public class FlinkInsertOverwriteCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class FlinkInsertOverwriteCommitActionExecutor<T>
extends BaseFlinkCommitActionExecutor<T> {
protected List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteTableCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteTableCommitActionExecutor.java
index f52b2d9c98..d1149614c4 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteTableCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertOverwriteTableCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink INSERT OVERWRITE TABLE commit action executor.
*/
-public class FlinkInsertOverwriteTableCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class FlinkInsertOverwriteTableCommitActionExecutor<T>
extends FlinkInsertOverwriteCommitActionExecutor<T> {
public FlinkInsertOverwriteTableCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertPreppedCommitActionExecutor.java
index 240b04d7eb..78c63f15a6 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkInsertPreppedCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink insert prepped commit action executor.
*/
-public class FlinkInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseFlinkCommitActionExecutor<T> {
+public class FlinkInsertPreppedCommitActionExecutor<T> extends BaseFlinkCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
index 01466484d6..ea5e139785 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
@@ -20,17 +20,18 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.io.HoodieMergeHandle;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.table.HoodieTable;
+import scala.collection.immutable.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
@@ -42,12 +43,11 @@ import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.util.Iterator;
-import java.util.List;
/**
* Flink merge helper.
*/
-public class FlinkMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
+public class FlinkMergeHelper<T> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
List<HoodieKey>, List<WriteStatus>> {
private FlinkMergeHelper() {
@@ -82,23 +82,24 @@ public class FlinkMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHe
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
Configuration cfgForHoodieFile = new Configuration(table.getHadoopConf());
- HoodieFileReader<GenericRecord> reader = HoodieFileReaderFactory.<GenericRecord>getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
+ HoodieAvroFileReader reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
try {
- final Iterator<GenericRecord> readerIterator;
+ final Iterator<HoodieRecord> readerIterator;
if (baseFile.getBootstrapBaseFile().isPresent()) {
readerIterator = getMergingIterator(table, mergeHandle, baseFile, reader, readSchema, externalSchemaTransformation);
} else {
- readerIterator = reader.getRecordIterator(readSchema);
+ readerIterator = reader.getRecordIterator(readSchema, HoodieAvroIndexedRecord::new);
}
ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
- wrapper = new BoundedInMemoryExecutor<>(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
+ wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
Option.of(new UpdateHandler(mergeHandle)), record -> {
if (!externalSchemaTransformation) {
return record;
}
- return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) record);
+ // TODO Other type of record need to change
+ return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) ((HoodieRecord)record).getData());
});
wrapper.execute();
} catch (Exception e) {
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertCommitActionExecutor.java
index 1e7dbb85b1..7194593e2a 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink upsert commit action executor.
*/
-public class FlinkUpsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseFlinkCommitActionExecutor<T> {
+public class FlinkUpsertCommitActionExecutor<T> extends BaseFlinkCommitActionExecutor<T> {
private List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertPreppedCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertPreppedCommitActionExecutor.java
index 8fb7bc6de2..1865cb0c4d 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkUpsertPreppedCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieWriteHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink upsert prepped commit action executor.
*/
-public class FlinkUpsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseFlinkCommitActionExecutor<T> {
+public class FlinkUpsertPreppedCommitActionExecutor<T> extends BaseFlinkCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
index ee57ecb29a..cfccf8584f 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
@@ -21,11 +21,9 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieListData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.exception.HoodieUpsertException;
@@ -51,7 +49,7 @@ import java.util.stream.Collectors;
* <p>Computing the records batch locations all at a time is a pressure to the engine,
* we should avoid that in streaming system.
*/
-public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
+public class FlinkWriteHelper<T, R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
List<HoodieKey>, List<WriteStatus>, R> {
private FlinkWriteHelper() {
@@ -99,17 +97,14 @@ public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends BaseWrit
// caution that the avro schema is not serializable
final Schema schema = new Schema.Parser().parse(schemaStr);
return keyedRecords.values().stream().map(x -> x.stream().reduce((rec1, rec2) -> {
- final T data1 = rec1.getData();
- final T data2 = rec2.getData();
-
- @SuppressWarnings("unchecked") final T reducedData = (T) data2.preCombine(data1, schema, CollectionUtils.emptyProps());
+ @SuppressWarnings("unchecked") final HoodieRecord reducedRec = rec2.preCombine(rec1);
// we cannot allow the user to change the key or partitionPath, since that will affect
// everything
// so pick it from one of the records.
- boolean choosePrev = data1 == reducedData;
+ boolean choosePrev = rec1 == reducedRec;
HoodieKey reducedKey = choosePrev ? rec1.getKey() : rec2.getKey();
HoodieOperation operation = choosePrev ? rec1.getOperation() : rec2.getOperation();
- HoodieRecord<T> hoodieRecord = new HoodieAvroRecord<>(reducedKey, reducedData, operation);
+ HoodieRecord<T> hoodieRecord = reducedRec.newInstance(reducedKey, operation);
// reuse the location from the first record.
hoodieRecord.setCurrentLocation(rec1.getCurrentLocation());
return hoodieRecord;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/BaseFlinkDeltaCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/BaseFlinkDeltaCommitActionExecutor.java
index d8ea958266..3fd8e23098 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/BaseFlinkDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/BaseFlinkDeltaCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit.delta;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.FlinkLazyInsertIterable;
@@ -37,7 +36,7 @@ import java.util.List;
/**
* Base flink delta commit action executor.
*/
-public abstract class BaseFlinkDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public abstract class BaseFlinkDeltaCommitActionExecutor<T>
extends BaseFlinkCommitActionExecutor<T> {
public BaseFlinkDeltaCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertDeltaCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertDeltaCommitActionExecutor.java
index c95a6c1c7b..7e17e38832 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertDeltaCommitActionExecutor.java
@@ -20,7 +20,6 @@ package org.apache.hudi.table.action.commit.delta;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.FlinkAppendHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink upsert delta commit action executor.
*/
-public class FlinkUpsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class FlinkUpsertDeltaCommitActionExecutor<T>
extends BaseFlinkDeltaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertPreppedDeltaCommitActionExecutor.java
index 94bcbc586c..683c7726c2 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/delta/FlinkUpsertPreppedDeltaCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit.delta;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.FlinkAppendHandle;
@@ -33,7 +32,7 @@ import java.util.List;
/**
* Flink upsert prepped delta commit action executor.
*/
-public class FlinkUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class FlinkUpsertPreppedDeltaCommitActionExecutor<T>
extends BaseFlinkDeltaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/compact/HoodieFlinkMergeOnReadTableCompactor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/compact/HoodieFlinkMergeOnReadTableCompactor.java
index 03b9f8e7ee..0dec2615bb 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/compact/HoodieFlinkMergeOnReadTableCompactor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/compact/HoodieFlinkMergeOnReadTableCompactor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -38,7 +37,7 @@ import java.util.List;
* <p>Note: the compaction logic is invoked through the flink pipeline.
*/
@SuppressWarnings("checkstyle:LineLength")
-public class HoodieFlinkMergeOnReadTableCompactor<T extends HoodieRecordPayload>
+public class HoodieFlinkMergeOnReadTableCompactor<T>
extends HoodieCompactor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
@Override
diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
index 2a69e6fd67..b5d774c654 100644
--- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
+++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
@@ -23,6 +23,7 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -144,7 +145,7 @@ public class HoodieFlinkWriteableTestTable extends HoodieWriteableTestTable {
LOG.warn("Failed to convert record " + r.toString(), e);
return null;
}
- }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
+ }).map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
return Pair.of(partitionPath, logWriter.getLogFile());
}
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
index b6951bc6b7..f3acae1c40 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -48,7 +47,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
+public class HoodieJavaWriteClient<T> extends
BaseHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
public HoodieJavaWriteClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/JavaSizeBasedClusteringPlanStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/JavaSizeBasedClusteringPlanStrategy.java
index b229165241..739c2ed48b 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/JavaSizeBasedClusteringPlanStrategy.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/JavaSizeBasedClusteringPlanStrategy.java
@@ -26,7 +26,6 @@ import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -48,7 +47,7 @@ import static org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_C
* 1) Creates clustering groups based on max size allowed per group.
* 2) Excludes files that are greater than 'small.file.limit' from clustering plan.
*/
-public class JavaSizeBasedClusteringPlanStrategy<T extends HoodieRecordPayload<T>>
+public class JavaSizeBasedClusteringPlanStrategy<T>
extends PartitionAwareClusteringPlanStrategy<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(JavaSizeBasedClusteringPlanStrategy.class);
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
index 456bb3cb47..7db20bdf9b 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
@@ -19,6 +19,10 @@
package org.apache.hudi.client.clustering.run.strategy;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieClusteringGroup;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
@@ -44,7 +48,7 @@ import org.apache.hudi.exception.HoodieClusteringException;
import org.apache.hudi.execution.bulkinsert.JavaBulkInsertInternalPartitionerFactory;
import org.apache.hudi.execution.bulkinsert.JavaCustomColumnsSortPartitioner;
import org.apache.hudi.io.IOUtils;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.keygen.KeyGenUtils;
@@ -52,11 +56,6 @@ import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -73,7 +72,7 @@ import static org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_C
/**
* Clustering strategy for Java engine.
*/
-public abstract class JavaExecutionStrategy<T extends HoodieRecordPayload<T>>
+public abstract class JavaExecutionStrategy<T>
extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(JavaExecutionStrategy.class);
@@ -195,7 +194,7 @@ public abstract class JavaExecutionStrategy<T extends HoodieRecordPayload<T>>
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
.build();
- Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
+ Option<HoodieAvroFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
? Option.empty()
: Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
@@ -221,7 +220,7 @@ public abstract class JavaExecutionStrategy<T extends HoodieRecordPayload<T>>
clusteringOps.forEach(clusteringOp -> {
try {
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(getWriteConfig().getSchema()));
- HoodieFileReader<IndexedRecord> baseFileReader = HoodieFileReaderFactory.getFileReader(getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath()));
+ HoodieAvroFileReader baseFileReader = HoodieFileReaderFactory.getFileReader(getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath()));
Iterator<IndexedRecord> recordIterator = baseFileReader.getRecordIterator(readerSchema);
recordIterator.forEachRemaining(record -> records.add(transform(record)));
} catch (IOException e) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
index d34673c2d9..8258c5e3e0 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaSortAndSizeExecutionStrategy.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.engine.EngineType;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.CreateHandleFactory;
@@ -43,7 +42,7 @@ import java.util.Map;
* 1) Java execution engine.
* 2) Uses bulk_insert to write data into new files.
*/
-public class JavaSortAndSizeExecutionStrategy<T extends HoodieRecordPayload<T>>
+public class JavaSortAndSizeExecutionStrategy<T>
extends JavaExecutionStrategy<T> {
private static final Logger LOG = LogManager.getLogger(JavaSortAndSizeExecutionStrategy.class);
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/JavaLazyInsertIterable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/JavaLazyInsertIterable.java
index 9821aedc87..ddf6345926 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/JavaLazyInsertIterable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/JavaLazyInsertIterable.java
@@ -21,7 +21,6 @@ package org.apache.hudi.execution;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
@@ -35,7 +34,7 @@ import org.apache.avro.Schema;
import java.util.Iterator;
import java.util.List;
-public class JavaLazyInsertIterable<T extends HoodieRecordPayload> extends HoodieLazyInsertIterable<T> {
+public class JavaLazyInsertIterable<T> extends HoodieLazyInsertIterable<T> {
public JavaLazyInsertIterable(Iterator<HoodieRecord<T>> recordItr,
boolean areRecordsSorted,
HoodieWriteConfig config,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaCustomColumnsSortPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaCustomColumnsSortPartitioner.java
index b9e466485f..540cac2ab7 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaCustomColumnsSortPartitioner.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaCustomColumnsSortPartitioner.java
@@ -20,8 +20,8 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.avro.Schema;
@@ -34,7 +34,7 @@ import java.util.stream.Collectors;
*
* @param <T> HoodieRecordPayload type
*/
-public class JavaCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
+public class JavaCustomColumnsSortPartitioner<T>
implements BulkInsertPartitioner<List<HoodieRecord<T>>> {
private final String[] sortColumnNames;
@@ -51,8 +51,8 @@ public class JavaCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
public List<HoodieRecord<T>> repartitionRecords(
List<HoodieRecord<T>> records, int outputPartitions) {
return records.stream().sorted((o1, o2) -> {
- Object values1 = HoodieAvroUtils.getRecordColumnValues(o1, sortColumnNames, schema, consistentLogicalTimestampEnabled);
- Object values2 = HoodieAvroUtils.getRecordColumnValues(o2, sortColumnNames, schema, consistentLogicalTimestampEnabled);
+ Object values1 = HoodieAvroUtils.getRecordColumnValues((HoodieAvroRecord)o1, sortColumnNames, schema, consistentLogicalTimestampEnabled);
+ Object values2 = HoodieAvroUtils.getRecordColumnValues((HoodieAvroRecord)o2, sortColumnNames, schema, consistentLogicalTimestampEnabled);
return values1.toString().compareTo(values2.toString());
}).collect(Collectors.toList());
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java
index d272849a19..5317914a9c 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaGlobalSortPartitioner.java
@@ -19,7 +19,6 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import java.util.Comparator;
@@ -32,7 +31,7 @@ import java.util.List;
*
* @param <T> HoodieRecordPayload type
*/
-public class JavaGlobalSortPartitioner<T extends HoodieRecordPayload>
+public class JavaGlobalSortPartitioner<T>
implements BulkInsertPartitioner<List<HoodieRecord<T>>> {
@Override
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java
index b40459d838..acd9f4621a 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/execution/bulkinsert/JavaNonSortPartitioner.java
@@ -19,7 +19,6 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import java.util.List;
@@ -30,7 +29,7 @@ import java.util.List;
*
* @param <T> HoodieRecordPayload type
*/
-public class JavaNonSortPartitioner<T extends HoodieRecordPayload>
+public class JavaNonSortPartitioner<T>
implements BulkInsertPartitioner<List<HoodieRecord<T>>> {
@Override
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
index dcc9d050dc..f8efb141fd 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
@@ -26,7 +26,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.data.HoodieListData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.table.HoodieTable;
@@ -34,7 +33,7 @@ import org.apache.hudi.table.HoodieTable;
import java.util.List;
import java.util.stream.Collectors;
-public abstract class JavaHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
+public abstract class JavaHoodieIndex<T> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
protected JavaHoodieIndex(HoodieWriteConfig config) {
super(config);
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
index 8e72682725..96d2c6f8bd 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
@@ -35,7 +35,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -79,7 +78,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-public class HoodieJavaCopyOnWriteTable<T extends HoodieRecordPayload>
+public class HoodieJavaCopyOnWriteTable<T>
extends HoodieJavaTable<T> implements HoodieCompactionHandler<T> {
private static final Logger LOG = LoggerFactory.getLogger(HoodieJavaCopyOnWriteTable.class);
@@ -306,7 +305,7 @@ public class HoodieJavaCopyOnWriteTable<T extends HoodieRecordPayload>
@Override
public Iterator<List<WriteStatus>> handleInsert(
String instantTime, String partitionPath, String fileId,
- Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap) {
+ Map<String, HoodieRecord> recordMap) {
HoodieCreateHandle<?, ?, ?, ?> createHandle =
new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordMap, taskContextSupplier);
createHandle.write();
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaMergeOnReadTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaMergeOnReadTable.java
index 32d30f704e..9f89e0f084 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaMergeOnReadTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaMergeOnReadTable.java
@@ -23,7 +23,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
@@ -38,7 +37,7 @@ import org.apache.hudi.table.action.deltacommit.JavaUpsertPreppedDeltaCommitActi
import java.util.List;
import java.util.Map;
-public class HoodieJavaMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieJavaCopyOnWriteTable<T> {
+public class HoodieJavaMergeOnReadTable<T> extends HoodieJavaCopyOnWriteTable<T> {
protected HoodieJavaMergeOnReadTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
super(config, context, metaClient);
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaTable.java
index 3c878cbc14..7b25f389cf 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaTable.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.Option;
@@ -36,13 +35,13 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public abstract class HoodieJavaTable<T extends HoodieRecordPayload>
+public abstract class HoodieJavaTable<T>
extends HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
protected HoodieJavaTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
super(config, context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieJavaTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
+ public static <T> HoodieJavaTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
@@ -50,7 +49,7 @@ public abstract class HoodieJavaTable<T extends HoodieRecordPayload>
return HoodieJavaTable.create(config, (HoodieJavaEngineContext) context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieJavaTable<T> create(HoodieWriteConfig config,
+ public static <T> HoodieJavaTable<T> create(HoodieWriteConfig config,
HoodieJavaEngineContext context,
HoodieTableMetaClient metaClient) {
switch (metaClient.getTableType()) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
index 168d558143..717f8446a2 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.ClusteringUtils;
@@ -36,7 +35,7 @@ import org.apache.hudi.table.action.commit.BaseJavaCommitActionExecutor;
import java.util.List;
-public class JavaExecuteClusteringCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaExecuteClusteringCommitActionExecutor<T>
extends BaseJavaCommitActionExecutor<T> {
private final HoodieClusteringPlan clusteringPlan;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
index 7762fd5ea3..a889a5e727 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -63,7 +62,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public abstract class BaseJavaCommitActionExecutor<T extends HoodieRecordPayload> extends
+public abstract class BaseJavaCommitActionExecutor<T> extends
BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, HoodieWriteMetadata> {
private static final Logger LOG = LogManager.getLogger(BaseJavaCommitActionExecutor.class);
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java
index d5c7a0b0b5..0a4b6a65f2 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -33,7 +32,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
import java.util.Map;
-public class JavaBulkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
+public class JavaBulkInsertCommitActionExecutor<T> extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> inputRecords;
private final Option<BulkInsertPartitioner> bulkInsertPartitioner;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
index e126372aa9..0c76ea168a 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
@@ -45,7 +44,7 @@ import java.util.List;
* @param <T>
*/
@SuppressWarnings("checkstyle:LineLength")
-public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends BaseBulkInsertHelper<T, List<HoodieRecord<T>>,
+public class JavaBulkInsertHelper<T, R> extends BaseBulkInsertHelper<T, List<HoodieRecord<T>>,
List<HoodieKey>, List<WriteStatus>, R> {
private JavaBulkInsertHelper() {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
index 14c4c8a93e..b7a5811605 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -32,7 +31,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaBulkInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaBulkInsertPreppedCommitActionExecutor<T>
extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedInputRecord;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteCommitActionExecutor.java
index 72c2332645..13cd8e5945 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -29,7 +28,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaDeleteCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
+public class JavaDeleteCommitActionExecutor<T> extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieKey> keys;
public JavaDeleteCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertCommitActionExecutor.java
index c1fae07a9d..ec4f987df6 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -29,7 +28,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
+public class JavaInsertCommitActionExecutor<T> extends BaseJavaCommitActionExecutor<T> {
private List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteCommitActionExecutor.java
index a99485e67b..1b64adf5b7 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.collection.Pair;
@@ -33,7 +32,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class JavaInsertOverwriteCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaInsertOverwriteCommitActionExecutor<T>
extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteTableCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteTableCommitActionExecutor.java
index a52ab6e0f3..3b4131e823 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteTableCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertOverwriteTableCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -34,7 +33,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class JavaInsertOverwriteTableCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaInsertOverwriteTableCommitActionExecutor<T>
extends JavaInsertOverwriteCommitActionExecutor<T> {
public JavaInsertOverwriteTableCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertPreppedCommitActionExecutor.java
index 349cf69dcc..7f3bcc4d8d 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaInsertPreppedCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -30,7 +29,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaInsertPreppedCommitActionExecutor<T>
extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
index 46dd30a7cb..4dcd0a6f7b 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
@@ -20,9 +20,9 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
@@ -44,7 +44,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.List;
-public class JavaMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
+public class JavaMergeHelper<T> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
List<HoodieKey>, List<WriteStatus>> {
private JavaMergeHelper() {
@@ -80,23 +80,24 @@ public class JavaMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHel
}
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
- HoodieFileReader<GenericRecord> reader = HoodieFileReaderFactory.<GenericRecord>getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
+ HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
try {
- final Iterator<GenericRecord> readerIterator;
+ final Iterator<HoodieRecord> readerIterator;
if (baseFile.getBootstrapBaseFile().isPresent()) {
readerIterator = getMergingIterator(table, mergeHandle, baseFile, reader, readSchema, externalSchemaTransformation);
} else {
- readerIterator = reader.getRecordIterator(readSchema);
+ readerIterator = reader.getRecordIterator(readSchema, HoodieAvroIndexedRecord::new);
}
ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
- wrapper = new BoundedInMemoryExecutor<>(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
+ wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
Option.of(new UpdateHandler(mergeHandle)), record -> {
if (!externalSchemaTransformation) {
return record;
}
- return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) record);
+ // TODO Other type of record need to change
+ return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) ((HoodieRecord)record).getData());
});
wrapper.execute();
} catch (Exception e) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertCommitActionExecutor.java
index ed0af44028..34ec4e792c 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -29,7 +28,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaUpsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
+public class JavaUpsertCommitActionExecutor<T> extends BaseJavaCommitActionExecutor<T> {
private List<HoodieRecord<T>> inputRecords;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
index fb19259b55..40cd2eb577 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -52,7 +51,7 @@ import java.util.stream.Collectors;
/**
* Packs incoming records to be upserted, into buckets.
*/
-public class JavaUpsertPartitioner<T extends HoodieRecordPayload<T>> implements Partitioner {
+public class JavaUpsertPartitioner<T> implements Partitioner {
private static final Logger LOG = LogManager.getLogger(JavaUpsertPartitioner.class);
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPreppedCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPreppedCommitActionExecutor.java
index 8eea5b5105..23fcf1f9ca 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPreppedCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -30,7 +29,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.List;
-public class JavaUpsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class JavaUpsertPreppedCommitActionExecutor<T>
extends BaseJavaCommitActionExecutor<T> {
private final List<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
index 6294b046d8..977dd4c690 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
@@ -21,10 +21,8 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieListData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.index.HoodieIndex;
@@ -37,7 +35,7 @@ import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
-public class JavaWriteHelper<T extends HoodieRecordPayload,R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
+public class JavaWriteHelper<T,R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
List<HoodieKey>, List<WriteStatus>, R> {
private JavaWriteHelper() {
@@ -70,11 +68,11 @@ public class JavaWriteHelper<T extends HoodieRecordPayload,R> extends BaseWriteH
final Schema schema = new Schema.Parser().parse(schemaStr);
return keyedRecords.values().stream().map(x -> x.stream().map(Pair::getRight).reduce((rec1, rec2) -> {
@SuppressWarnings("unchecked")
- T reducedData = (T) rec1.getData().preCombine(rec2.getData(), schema, CollectionUtils.emptyProps());
+ HoodieRecord reducedRec = rec2.preCombine(rec1, schema, CollectionUtils.emptyProps());
// we cannot allow the user to change the key or partitionPath, since that will affect
// everything
// so pick it from one of the records.
- return new HoodieAvroRecord<T>(rec1.getKey(), reducedData);
+ return (HoodieRecord<T>) reducedRec.newInstance(rec1.getKey());
}).orElse(null)).filter(Objects::nonNull).collect(Collectors.toList());
}
}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/compact/HoodieJavaMergeOnReadTableCompactor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/compact/HoodieJavaMergeOnReadTableCompactor.java
index 30bdcda759..d1f1a8c24e 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/compact/HoodieJavaMergeOnReadTableCompactor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/compact/HoodieJavaMergeOnReadTableCompactor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -36,7 +35,7 @@ import java.util.List;
* compactions, passes it through a CompactionFilter and executes all the compactions and
* writes a new version of base files and make a normal commit.
*/
-public class HoodieJavaMergeOnReadTableCompactor<T extends HoodieRecordPayload>
+public class HoodieJavaMergeOnReadTableCompactor<T>
extends HoodieCompactor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
@Override
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseJavaDeltaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseJavaDeltaCommitActionExecutor.java
index 0b4a654074..dda399712b 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseJavaDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseJavaDeltaCommitActionExecutor.java
@@ -20,13 +20,12 @@
package org.apache.hudi.table.action.deltacommit;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.commit.BaseJavaCommitActionExecutor;
-public abstract class BaseJavaDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
+public abstract class BaseJavaDeltaCommitActionExecutor<T> extends BaseJavaCommitActionExecutor<T> {
public BaseJavaDeltaCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig config, HoodieTable table,
String instantTime, WriteOperationType operationType) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/JavaUpsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/JavaUpsertPreppedDeltaCommitActionExecutor.java
index f6faa28bbb..ae01312f1f 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/JavaUpsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/deltacommit/JavaUpsertPreppedDeltaCommitActionExecutor.java
@@ -22,7 +22,6 @@ package org.apache.hudi.table.action.deltacommit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
@@ -41,7 +40,7 @@ import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
-public class JavaUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaDeltaCommitActionExecutor<T> {
+public class JavaUpsertPreppedDeltaCommitActionExecutor<T> extends BaseJavaDeltaCommitActionExecutor<T> {
private static final Logger LOG = LogManager.getLogger(JavaUpsertPreppedDeltaCommitActionExecutor.class);
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestJavaBulkInsertInternalPartitioner.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestJavaBulkInsertInternalPartitioner.java
index ee507b6045..4608e94a56 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestJavaBulkInsertInternalPartitioner.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestJavaBulkInsertInternalPartitioner.java
@@ -20,6 +20,7 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
@@ -70,7 +71,7 @@ public class TestJavaBulkInsertInternalPartitioner extends HoodieJavaClientTestB
private Comparator<HoodieRecord> getCustomColumnComparator(Schema schema, String[] sortColumns) {
return Comparator.comparing(
- record -> HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema, false).toString());
+ record -> HoodieAvroUtils.getRecordColumnValues((HoodieAvroRecord)record, sortColumns, schema, false).toString());
}
private void verifyRecordAscendingOrder(List<HoodieRecord> records,
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
index 7277479f64..02f86cdc3e 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
@@ -31,7 +31,7 @@ import org.apache.spark.sql.SQLContext;
* @deprecated This. Use {@link SparkRDDReadClient instead.}
*/
@Deprecated
-public class HoodieReadClient<T extends HoodieRecordPayload<T>> extends SparkRDDReadClient<T> {
+public class HoodieReadClient<T> extends SparkRDDReadClient<T> {
public HoodieReadClient(HoodieSparkEngineContext context, String basePath) {
super(context, basePath);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
index 0812b366aa..73037df40c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
@@ -22,7 +22,6 @@ package org.apache.hudi.client;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -37,7 +36,7 @@ import java.util.stream.Stream;
/**
* Async clustering client for Spark datasource.
*/
-public class HoodieSparkClusteringClient<T extends HoodieRecordPayload> extends
+public class HoodieSparkClusteringClient<T> extends
BaseClusterer<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(HoodieSparkClusteringClient.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
index b3dc27b6fc..ae826a6bf2 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.client;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
@@ -34,7 +33,7 @@ import org.apache.spark.api.java.JavaRDD;
import java.util.List;
-public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCompactor<T,
+public class HoodieSparkCompactor<T> extends BaseCompactor<T,
JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(HoodieSparkCompactor.class);
private transient HoodieEngineContext context;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
index 7110e26bb0..650c617e41 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
@@ -30,7 +30,6 @@ import org.apache.hudi.common.metrics.Registry;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.TableServiceType;
@@ -73,7 +72,7 @@ import java.util.Map;
import java.util.stream.Collectors;
@SuppressWarnings("checkstyle:LineLength")
-public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
+public class SparkRDDWriteClient<T> extends
BaseHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(SparkRDDWriteClient.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSingleFileSortPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSingleFileSortPlanStrategy.java
index acb8dadf0a..ba3f917d04 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSingleFileSortPlanStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSingleFileSortPlanStrategy.java
@@ -22,7 +22,6 @@ package org.apache.hudi.client.clustering.plan.strategy;
import org.apache.hudi.avro.model.HoodieClusteringGroup;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
@@ -36,7 +35,7 @@ import java.util.stream.Stream;
* In this strategy, clustering group for each partition is built in the same way as {@link SparkSizeBasedClusteringPlanStrategy}.
* The difference is that the output groups is 1 and file group id remains the same.
*/
-public class SparkSingleFileSortPlanStrategy<T extends HoodieRecordPayload<T>>
+public class SparkSingleFileSortPlanStrategy<T>
extends SparkSizeBasedClusteringPlanStrategy<T> {
public SparkSingleFileSortPlanStrategy(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig writeConfig) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
index 46d1851d13..5b048c2da7 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -49,7 +48,7 @@ import static org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_C
* 1) Creates clustering groups based on max size allowed per group.
* 2) Excludes files that are greater than 'small.file.limit' from clustering plan.
*/
-public class SparkSizeBasedClusteringPlanStrategy<T extends HoodieRecordPayload<T>>
+public class SparkSizeBasedClusteringPlanStrategy<T>
extends PartitionAwareClusteringPlanStrategy<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(SparkSizeBasedClusteringPlanStrategy.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
index eab98f2f19..cedeb33c8e 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
@@ -55,7 +55,7 @@ import org.apache.hudi.execution.bulkinsert.RDDSpatialCurveSortPartitioner;
import org.apache.hudi.execution.bulkinsert.RowCustomColumnsSortPartitioner;
import org.apache.hudi.execution.bulkinsert.RowSpatialCurveSortPartitioner;
import org.apache.hudi.io.IOUtils;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.keygen.KeyGenUtils;
@@ -97,7 +97,7 @@ import static org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_C
/**
* Clustering strategy to submit multiple spark jobs and union the results.
*/
-public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPayload<T>>
+public abstract class MultipleSparkJobExecutionStrategy<T>
extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(MultipleSparkJobExecutionStrategy.class);
@@ -298,7 +298,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
.withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled())
.build();
- Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
+ Option<HoodieAvroFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
? Option.empty()
: Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
@@ -333,7 +333,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
clusteringOpsPartition.forEachRemaining(clusteringOp -> {
try {
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(writeConfig.getSchema()));
- HoodieFileReader<IndexedRecord> baseFileReader = HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(clusteringOp.getDataFilePath()));
+ HoodieAvroFileReader baseFileReader = HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(clusteringOp.getDataFilePath()));
iteratorsForPartition.add(baseFileReader.getRecordIterator(readerSchema));
} catch (IOException e) {
throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath()
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
index bb6d3df5f1..c268992192 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
@@ -72,7 +72,7 @@ import java.util.stream.StreamSupport;
* Clustering strategy to submit single spark jobs.
* MultipleSparkJobExecution strategy is not ideal for use cases that require large number of clustering groups
*/
-public abstract class SingleSparkJobExecutionStrategy<T extends HoodieRecordPayload<T>>
+public abstract class SingleSparkJobExecutionStrategy<T>
extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(SingleSparkJobExecutionStrategy.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
index f2ae9a922d..902528a675 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSingleFileSortExecutionStrategy.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieClusteringException;
@@ -46,7 +45,7 @@ import java.util.Map;
* This strategy is similar to {@link SparkSortAndSizeExecutionStrategy} with the difference being that
* there should be only one large file group per clustering group.
*/
-public class SparkSingleFileSortExecutionStrategy<T extends HoodieRecordPayload<T>>
+public class SparkSingleFileSortExecutionStrategy<T>
extends MultipleSparkJobExecutionStrategy<T> {
private static final Logger LOG = LogManager.getLogger(SparkSingleFileSortExecutionStrategy.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
index 35c8f288bc..86c603c3ca 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SparkSortAndSizeExecutionStrategy.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.CreateHandleFactory;
@@ -45,7 +44,7 @@ import java.util.Map;
* 1) Spark execution engine.
* 2) Uses bulk_insert to write data into new files.
*/
-public class SparkSortAndSizeExecutionStrategy<T extends HoodieRecordPayload<T>>
+public class SparkSortAndSizeExecutionStrategy<T>
extends MultipleSparkJobExecutionStrategy<T> {
private static final Logger LOG = LogManager.getLogger(SparkSortAndSizeExecutionStrategy.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java
index 8263c20921..751e2a2858 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java
@@ -22,7 +22,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
@@ -33,7 +32,7 @@ import java.util.Set;
* Spark base update strategy, write records to the file groups which are in clustering
* need to check. Spark relate implementations should extend this base class.
*/
-public abstract class BaseSparkUpdateStrategy<T extends HoodieRecordPayload<T>> extends UpdateStrategy<T, HoodieData<HoodieRecord<T>>> {
+public abstract class BaseSparkUpdateStrategy<T> extends UpdateStrategy<T, HoodieData<HoodieRecord<T>>> {
public BaseSparkUpdateStrategy(HoodieEngineContext engineContext, HoodieTable table,
Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
index acb6d82ae1..49d29ee010 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
@@ -22,7 +22,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.table.HoodieTable;
@@ -33,7 +32,7 @@ import java.util.stream.Collectors;
/**
* Allow ingestion commits during clustering job.
*/
-public class SparkAllowUpdateStrategy<T extends HoodieRecordPayload<T>> extends BaseSparkUpdateStrategy<T> {
+public class SparkAllowUpdateStrategy<T> extends BaseSparkUpdateStrategy<T> {
public SparkAllowUpdateStrategy(
HoodieEngineContext engineContext, HoodieTable table, Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
index 2b05942f00..654ddc7be1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
@@ -22,7 +22,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieClusteringUpdateException;
import org.apache.hudi.table.HoodieTable;
@@ -38,7 +37,7 @@ import java.util.Set;
* Update strategy based on following.
* if some file groups have update record, throw exception
*/
-public class SparkRejectUpdateStrategy<T extends HoodieRecordPayload<T>> extends BaseSparkUpdateStrategy<T> {
+public class SparkRejectUpdateStrategy<T> extends BaseSparkUpdateStrategy<T> {
private static final Logger LOG = LogManager.getLogger(SparkRejectUpdateStrategy.class);
public SparkRejectUpdateStrategy(HoodieEngineContext engineContext, HoodieTable table, Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SparkPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SparkPreCommitValidator.java
index f08d11b571..8c4814f3e2 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SparkPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SparkPreCommitValidator.java
@@ -21,7 +21,6 @@ package org.apache.hudi.client.validator;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -42,7 +41,7 @@ import java.util.stream.Collectors;
/**
* Validator can be configured pre-commit.
*/
-public abstract class SparkPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends HoodieData<WriteStatus>> {
+public abstract class SparkPreCommitValidator<T, I, K, O extends HoodieData<WriteStatus>> {
private static final Logger LOG = LogManager.getLogger(SparkPreCommitValidator.class);
private HoodieSparkTable<T> table;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryEqualityPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryEqualityPreCommitValidator.java
index 2506d52b4a..98b930f7a7 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryEqualityPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryEqualityPreCommitValidator.java
@@ -21,7 +21,6 @@ package org.apache.hudi.client.validator;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodiePreCommitValidatorConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieValidationException;
@@ -40,7 +39,7 @@ import org.apache.spark.sql.SQLContext;
*
* Expects both queries to return same result.
*/
-public class SqlQueryEqualityPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
+public class SqlQueryEqualityPreCommitValidator<T, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(SqlQueryEqualityPreCommitValidator.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
index 8a25150651..0821774619 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
@@ -21,7 +21,6 @@ package org.apache.hudi.client.validator;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodiePreCommitValidatorConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieValidationException;
@@ -40,7 +39,7 @@ import org.apache.spark.sql.SQLContext;
* <p>
* Expects query results do not match.
*/
-public class SqlQueryInequalityPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
+public class SqlQueryInequalityPreCommitValidator<T, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(SqlQueryInequalityPreCommitValidator.class);
public SqlQueryInequalityPreCommitValidator(HoodieSparkTable<T> table, HoodieEngineContext engineContext, HoodieWriteConfig config) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryPreCommitValidator.java
index 3a88d54d36..e0120c2961 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryPreCommitValidator.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieValidationException;
@@ -42,7 +41,7 @@ import java.util.concurrent.atomic.AtomicInteger;
/**
* Validator framework to run sql queries and compare table state at different locations.
*/
-public abstract class SqlQueryPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends HoodieData<WriteStatus>> extends SparkPreCommitValidator<T, I, K, O> {
+public abstract class SqlQueryPreCommitValidator<T, I, K, O extends HoodieData<WriteStatus>> extends SparkPreCommitValidator<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(SqlQueryPreCommitValidator.class);
private static final AtomicInteger TABLE_COUNTER = new AtomicInteger(0);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
index b1942244d3..c62187a8f2 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
@@ -21,7 +21,6 @@ package org.apache.hudi.client.validator;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodiePreCommitValidatorConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieValidationException;
@@ -40,7 +39,7 @@ import java.util.List;
* <p>
* Example configuration: "query1#expectedResult1;query2#expectedResult2;"
*/
-public class SqlQuerySingleResultPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
+public class SqlQuerySingleResultPreCommitValidator<T, I, K, O extends HoodieData<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
private static final Logger LOG = LogManager.getLogger(SqlQueryInequalityPreCommitValidator.class);
public SqlQuerySingleResultPreCommitValidator(HoodieSparkTable<T> table, HoodieEngineContext engineContext, HoodieWriteConfig config) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
index df5bd2d3f4..f1ae56d70b 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
@@ -22,7 +22,6 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
@@ -34,7 +33,7 @@ import org.apache.avro.Schema;
import java.util.Iterator;
import java.util.List;
-public class SparkLazyInsertIterable<T extends HoodieRecordPayload> extends HoodieLazyInsertIterable<T> {
+public class SparkLazyInsertIterable<T> extends HoodieLazyInsertIterable<T> {
private boolean useWriterSchema;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/BulkInsertMapFunction.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/BulkInsertMapFunction.java
index 66c3bdddcb..c54d579224 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/BulkInsertMapFunction.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/BulkInsertMapFunction.java
@@ -20,7 +20,6 @@ package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.SparkLazyInsertIterable;
import org.apache.hudi.io.WriteHandleFactory;
@@ -35,7 +34,7 @@ import java.util.List;
/**
* Map function that handles a stream of HoodieRecords.
*/
-public class BulkInsertMapFunction<T extends HoodieRecordPayload>
+public class BulkInsertMapFunction<T>
implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
private String instantTime;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/GlobalSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/GlobalSortPartitioner.java
index a184c009a1..c966781795 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/GlobalSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/GlobalSortPartitioner.java
@@ -19,7 +19,6 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.spark.api.java.JavaRDD;
@@ -31,7 +30,7 @@ import org.apache.spark.api.java.JavaRDD;
*
* @param <T> HoodieRecordPayload type
*/
-public class GlobalSortPartitioner<T extends HoodieRecordPayload>
+public class GlobalSortPartitioner<T>
implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> {
@Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/NonSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/NonSortPartitioner.java
index 19c90ecb1a..75ededbd19 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/NonSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/NonSortPartitioner.java
@@ -19,7 +19,6 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.spark.api.java.JavaRDD;
@@ -30,7 +29,7 @@ import org.apache.spark.api.java.JavaRDD;
*
* @param <T> HoodieRecordPayload type
*/
-public class NonSortPartitioner<T extends HoodieRecordPayload>
+public class NonSortPartitioner<T>
implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> {
@Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDCustomColumnsSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDCustomColumnsSortPartitioner.java
index dc80498c7a..120677f811 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDCustomColumnsSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDCustomColumnsSortPartitioner.java
@@ -20,8 +20,8 @@ package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.config.SerializableSchema;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.BulkInsertPartitioner;
@@ -36,7 +36,7 @@ import java.util.Arrays;
*
* @param <T> HoodieRecordPayload type
*/
-public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
+public class RDDCustomColumnsSortPartitioner<T>
implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> {
private final String[] sortColumnNames;
@@ -63,7 +63,7 @@ public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
final boolean consistentLogicalTimestampEnabled = this.consistentLogicalTimestampEnabled;
return records.sortBy(
record -> {
- Object recordValue = HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema, consistentLogicalTimestampEnabled);
+ Object recordValue = HoodieAvroUtils.getRecordColumnValues((HoodieAvroRecord)record, sortColumns, schema, consistentLogicalTimestampEnabled);
// null values are replaced with empty string for null_first order
if (recordValue == null) {
return StringUtils.EMPTY_STRING;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDPartitionSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDPartitionSortPartitioner.java
index 9526ad5856..6145ea2219 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDPartitionSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDPartitionSortPartitioner.java
@@ -19,7 +19,6 @@
package org.apache.hudi.execution.bulkinsert;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.spark.api.java.JavaRDD;
@@ -37,7 +36,7 @@ import scala.Tuple2;
*
* @param <T> HoodieRecordPayload type
*/
-public class RDDPartitionSortPartitioner<T extends HoodieRecordPayload>
+public class RDDPartitionSortPartitioner<T>
implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> {
@Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
index 2ab9107fa5..428b4f8687 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.config.SerializableSchema;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.RewriteAvroPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieClusteringConfig;
@@ -36,12 +35,16 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
/**
* A partitioner that does spatial curve optimization sorting based on specified column values for each RDD partition.
* support z-curve optimization, hilbert will come soon.
* @param <T> HoodieRecordPayload type
*/
-public class RDDSpatialCurveSortPartitioner<T extends HoodieRecordPayload>
+public class RDDSpatialCurveSortPartitioner<T>
extends SpatialCurveSortPartitionerBase<JavaRDD<HoodieRecord<T>>> {
private final transient HoodieSparkEngineContext sparkEngineContext;
@@ -60,7 +63,7 @@ public class RDDSpatialCurveSortPartitioner<T extends HoodieRecordPayload>
@Override
public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
JavaRDD<GenericRecord> genericRecordsRDD =
- records.map(f -> (GenericRecord) f.getData().getInsertValue(schema.get()).get());
+ records.map(f -> (GenericRecord) f.toIndexedRecord(schema.get(), new Properties()).get());
Dataset<Row> sourceDataset =
AvroConversionUtils.createDataFrame(
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
index aece86a387..76606e316c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
@@ -25,7 +25,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.data.HoodieJavaRDD;
import org.apache.hudi.exception.HoodieIndexException;
@@ -34,7 +33,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.spark.api.java.JavaRDD;
@SuppressWarnings("checkstyle:LineLength")
-public abstract class SparkHoodieIndex<T extends HoodieRecordPayload<T>>
+public abstract class SparkHoodieIndex<T>
extends HoodieIndex<JavaRDD<HoodieRecord<T>>, JavaRDD<WriteStatus>> {
protected SparkHoodieIndex(HoodieWriteConfig config) {
super(config);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
index 9da04f7260..12c444d850 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
@@ -23,6 +23,7 @@ import org.apache.hudi.client.model.HoodieInternalRow;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.util.HoodieTimer;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
index 115aea06f2..014ffe1b01 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
@@ -37,7 +37,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -94,7 +93,7 @@ import java.util.Map;
* <p>
* UPDATES - Produce a new version of the file, just replacing the updated records with new values
*/
-public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload>
+public class HoodieSparkCopyOnWriteTable<T>
extends HoodieSparkTable<T> implements HoodieCompactionHandler<T> {
private static final Logger LOG = LogManager.getLogger(HoodieSparkCopyOnWriteTable.class);
@@ -257,7 +256,7 @@ public class HoodieSparkCopyOnWriteTable<T extends HoodieRecordPayload>
@Override
public Iterator<List<WriteStatus>> handleInsert(
String instantTime, String partitionPath, String fileId,
- Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordMap) {
+ Map<String, HoodieRecord> recordMap) {
HoodieCreateHandle<?, ?, ?, ?> createHandle =
new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordMap, taskContextSupplier);
createHandle.write();
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
index efc667af29..8a68c53c66 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
@@ -28,7 +28,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -73,7 +72,7 @@ import java.util.Map;
* action
* </p>
*/
-public class HoodieSparkMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieSparkCopyOnWriteTable<T> {
+public class HoodieSparkMergeOnReadTable<T> extends HoodieSparkCopyOnWriteTable<T> {
HoodieSparkMergeOnReadTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
super(config, context, metaClient);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
index 66d51c9128..789f044cf5 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.Option;
@@ -44,7 +43,7 @@ import org.apache.spark.TaskContext$;
import java.io.IOException;
-public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
+public abstract class HoodieSparkTable<T>
extends HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
private volatile boolean isMetadataTableExists = false;
@@ -53,7 +52,7 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
super(config, context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
+ public static <T> HoodieSparkTable<T> create(HoodieWriteConfig config, HoodieEngineContext context) {
HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
.setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
@@ -63,9 +62,9 @@ public abstract class HoodieSparkTable<T extends HoodieRecordPayload>
return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient);
}
- public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieWriteConfig config,
- HoodieSparkEngineContext context,
- HoodieTableMetaClient metaClient) {
+ public static <T> HoodieSparkTable<T> create(HoodieWriteConfig config,
+ HoodieSparkEngineContext context,
+ HoodieTableMetaClient metaClient) {
HoodieSparkTable<T> hoodieSparkTable;
switch (metaClient.getTableType()) {
case COPY_ON_WRITE:
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
index 88f6a54e0d..56e8f81405 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
@@ -38,7 +38,6 @@ import org.apache.hudi.common.model.BootstrapFileMapping;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -86,7 +85,7 @@ import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE;
import static org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler;
-public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkBootstrapCommitActionExecutor<T>
extends BaseCommitActionExecutor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, HoodieBootstrapWriteMetadata<HoodieData<WriteStatus>>> {
private static final Logger LOG = LogManager.getLogger(SparkBootstrapCommitActionExecutor.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapDeltaCommitActionExecutor.java
index 0d2ac6ceef..ea30cbf678 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapDeltaCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.bootstrap;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -31,7 +30,7 @@ import org.apache.hudi.table.action.deltacommit.SparkBulkInsertDeltaCommitAction
import java.util.Map;
-public class SparkBootstrapDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkBootstrapDeltaCommitActionExecutor<T>
extends SparkBootstrapCommitActionExecutor<T> {
public SparkBootstrapDeltaCommitActionExecutor(HoodieSparkEngineContext context,
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
index 7d2a4c0baa..ee61b57a10 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.ClusteringUtils;
@@ -33,7 +32,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor;
-public class SparkExecuteClusteringCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkExecuteClusteringCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieClusteringPlan clusteringPlan;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
index 8c7d9e41ea..ac5b8555b0 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
@@ -27,7 +27,6 @@ import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -81,7 +80,7 @@ import scala.Tuple2;
import static org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans;
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE;
-public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayload> extends
+public abstract class BaseSparkCommitActionExecutor<T> extends
BaseCommitActionExecutor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>, HoodieWriteMetadata<HoodieData<WriteStatus>>> {
private static final Logger LOG = LogManager.getLogger(BaseSparkCommitActionExecutor.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertDataInternalWriterHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertDataInternalWriterHelper.java
index 12e9dda81a..009123b4a6 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertDataInternalWriterHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertDataInternalWriterHelper.java
@@ -20,7 +20,7 @@ package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.HoodieInternalWriteStatus;
import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
index 65a45e1c6a..2318258759 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
@@ -33,7 +33,6 @@ import scala.Tuple2;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -46,7 +45,7 @@ import org.apache.hudi.table.WorkloadStat;
/**
* Packs incoming records to be inserted into buckets (1 bucket = 1 RDD partition).
*/
-public class SparkBucketIndexPartitioner<T extends HoodieRecordPayload<T>> extends
+public class SparkBucketIndexPartitioner<T> extends
SparkHoodiePartitioner<T> {
private final int numBuckets;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
index f4b01c887b..9a1d5bf1cc 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -33,7 +32,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.Map;
-public class SparkBulkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseSparkCommitActionExecutor<T> {
+public class SparkBulkInsertCommitActionExecutor<T> extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
private final Option<BulkInsertPartitioner> bulkInsertPartitioner;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
index 5768520a05..51407eb026 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -45,7 +44,7 @@ import java.util.List;
* @param <T>
*/
@SuppressWarnings("checkstyle:LineLength")
-public class SparkBulkInsertHelper<T extends HoodieRecordPayload, R> extends BaseBulkInsertHelper<T, HoodieData<HoodieRecord<T>>,
+public class SparkBulkInsertHelper<T, R> extends BaseBulkInsertHelper<T, HoodieData<HoodieRecord<T>>,
HoodieData<HoodieKey>, HoodieData<WriteStatus>, R> {
private SparkBulkInsertHelper() {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
index 8862981c2a..d84279ad2b 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -31,7 +30,7 @@ import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkBulkInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkBulkInsertPreppedCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedInputRecordRdd;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteCommitActionExecutor.java
index a6fc996b71..9551ecae83 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkDeleteCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkDeleteCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieKey> keys;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
index 149aef03e2..c4239d407a 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
@@ -28,7 +28,6 @@ import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -45,7 +44,7 @@ import org.apache.hudi.table.action.HoodieWriteMetadata;
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
-public class SparkDeletePartitionCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkDeletePartitionCommitActionExecutor<T>
extends SparkInsertOverwriteCommitActionExecutor<T> {
private List<String> partitions;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkHoodiePartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkHoodiePartitioner.java
index 4a5bff4215..3a74578edf 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkHoodiePartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkHoodiePartitioner.java
@@ -18,7 +18,6 @@
package org.apache.hudi.table.action.commit;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.WorkloadProfile;
import org.apache.spark.Partitioner;
@@ -26,7 +25,7 @@ import org.apache.spark.Partitioner;
/**
* Packs incoming records to be inserted into buckets (1 bucket = 1 RDD partition).
*/
-public abstract class SparkHoodiePartitioner<T extends HoodieRecordPayload<T>> extends Partitioner
+public abstract class SparkHoodiePartitioner<T> extends Partitioner
implements org.apache.hudi.table.action.commit.Partitioner {
/**
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertCommitActionExecutor.java
index 479b51322f..93ed6f8879 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java
index 518063ed34..ee3b31cc57 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.collection.Pair;
@@ -39,7 +38,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-public class SparkInsertOverwriteCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertOverwriteCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteTableCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteTableCommitActionExecutor.java
index 93d0a8124c..868c8e2b42 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteTableCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteTableCommitActionExecutor.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -35,7 +34,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
-public class SparkInsertOverwriteTableCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertOverwriteTableCommitActionExecutor<T>
extends SparkInsertOverwriteCommitActionExecutor<T> {
public SparkInsertOverwriteTableCommitActionExecutor(HoodieEngineContext context,
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertPreppedCommitActionExecutor.java
index ff1a7e2b9b..55e05a3205 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertPreppedCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertPreppedCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertCommitActionExecutor.java
index ccee9cf5a7..6b699f25c3 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkUpsertCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkUpsertCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertPreppedCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertPreppedCommitActionExecutor.java
index 73d408593b..08b758e602 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkUpsertPreppedCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkUpsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkUpsertPreppedCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
index 134cfd8d2c..802c9b3b4c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
@@ -25,7 +25,6 @@ import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -61,7 +60,7 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION
/**
* Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition).
*/
-public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends SparkHoodiePartitioner<T> {
+public class UpsertPartitioner<T> extends SparkHoodiePartitioner<T> {
private static final Logger LOG = LogManager.getLogger(UpsertPartitioner.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/compact/HoodieSparkMergeOnReadTableCompactor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/compact/HoodieSparkMergeOnReadTableCompactor.java
index 61cb1ffd27..edbd3b7eb6 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/compact/HoodieSparkMergeOnReadTableCompactor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/compact/HoodieSparkMergeOnReadTableCompactor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -36,7 +35,7 @@ import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVE
* a normal commit
*/
@SuppressWarnings("checkstyle:LineLength")
-public class HoodieSparkMergeOnReadTableCompactor<T extends HoodieRecordPayload>
+public class HoodieSparkMergeOnReadTableCompactor<T>
extends HoodieCompactor<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
@Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
index fe467ddbff..3e55ffeef8 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
@@ -21,7 +21,6 @@ package org.apache.hudi.table.action.deltacommit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -43,7 +42,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-public abstract class BaseSparkDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public abstract class BaseSparkDeltaCommitActionExecutor<T>
extends BaseSparkCommitActionExecutor<T> {
private static final Logger LOG = LogManager.getLogger(BaseSparkDeltaCommitActionExecutor.class);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
index 190a714e44..1855ed47a5 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -34,7 +33,7 @@ import org.apache.hudi.table.action.commit.SparkBulkInsertHelper;
import java.util.Map;
-public class SparkBulkInsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkBulkInsertDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
index c01bce2b9c..fb4f43aa87 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
@@ -22,7 +22,6 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -32,7 +31,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.SparkBulkInsertHelper;
-public class SparkBulkInsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkBulkInsertPreppedDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedInputRecordRdd;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
index 9a5b08df28..2aeb7bd3b1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
@@ -22,14 +22,13 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.HoodieDeleteHelper;
-public class SparkDeleteDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkDeleteDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieKey> keys;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
index 4889460c46..1d6febc666 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
@@ -22,14 +22,13 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.HoodieWriteHelper;
-public class SparkInsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
index dbf0cbc676..d6d155be18 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkInsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkInsertPreppedDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
index 67ecb9a8cb..270ac86401 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
@@ -22,14 +22,13 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.HoodieWriteHelper;
-public class SparkUpsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkUpsertDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
index e498019c41..a5dd997893 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -43,7 +42,7 @@ import java.util.stream.Collectors;
* UpsertPartitioner for MergeOnRead table type, this allows auto correction of small parquet files to larger ones
* without the need for an index in the logFile.
*/
-public class SparkUpsertDeltaCommitPartitioner<T extends HoodieRecordPayload<T>> extends UpsertPartitioner<T> {
+public class SparkUpsertDeltaCommitPartitioner<T> extends UpsertPartitioner<T> {
public SparkUpsertDeltaCommitPartitioner(WorkloadProfile profile, HoodieSparkEngineContext context, HoodieTable table,
HoodieWriteConfig config) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
index 9540030eba..7f8b21c57f 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
@@ -22,13 +22,12 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
-public class SparkUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public class SparkUpsertPreppedDeltaCommitActionExecutor<T>
extends BaseSparkDeltaCommitActionExecutor<T> {
private final HoodieData<HoodieRecord<T>> preppedRecords;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
index a5926196ea..45a3029352 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
@@ -20,6 +20,7 @@ package org.apache.hudi.client;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -133,7 +134,8 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
new Path(updateTable.getConfig().getBasePath() + "/" + insertResult.getStat().getPath()),
mergeHandle.getWriterSchemaWithMetaFields());
for (GenericRecord rec : oldRecords) {
- mergeHandle.write(rec);
+ // TODO create hoodie record with rec can getRecordKey
+ mergeHandle.write(new HoodieAvroIndexedRecord(rec));
}
mergeHandle.close();
};
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index a3f9b639af..e3a90becdd 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -41,6 +41,7 @@ import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
@@ -83,7 +84,7 @@ import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.index.HoodieIndex;
-import org.apache.hudi.io.storage.HoodieHFileReader;
+import org.apache.hudi.io.storage.HoodieAvroHFileReader;
import org.apache.hudi.metadata.FileSystemBackedTableMetadata;
import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter;
import org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader;
@@ -764,9 +765,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
table.getHoodieView().sync();
List<FileSlice> fileSlices = table.getSliceView().getLatestFileSlices("files").collect(Collectors.toList());
HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
- HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(), new Path(baseFile.getPath()),
+ HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), new Path(baseFile.getPath()),
new CacheConfig(context.getHadoopConf().get()));
- List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
+ List<IndexedRecord> records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader);
records.forEach(entry -> {
if (populateMetaFields) {
assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
@@ -1011,9 +1012,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
- try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(indexRecord -> {
- final GenericRecord record = (GenericRecord) indexRecord;
+ final GenericRecord record = (GenericRecord) indexRecord.getData();
if (enableMetaFields) {
// Metadata table records should have meta fields!
assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
@@ -1070,7 +1071,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
logRecordReader.scan();
}, "Metadata log records materialization failed");
- for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
+ for (Map.Entry<String, HoodieRecord> entry : logRecordReader.getRecords().entrySet()) {
assertFalse(entry.getKey().isEmpty());
assertFalse(entry.getValue().getRecordKey().isEmpty());
assertEquals(entry.getKey(), entry.getValue().getRecordKey());
@@ -1094,10 +1095,10 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
- HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
+ HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(),
new Path(baseFile.getPath()),
new CacheConfig(context.getHadoopConf().get()));
- List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
+ List<IndexedRecord> records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader);
records.forEach(entry -> {
if (enableMetaFields) {
assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
@@ -2539,9 +2540,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
- try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(indexRecord -> {
- final GenericRecord record = (GenericRecord) indexRecord;
+ final GenericRecord record = (GenericRecord) indexRecord.getData();
final GenericRecord colStatsRecord = (GenericRecord) record.get(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS);
assertNotNull(colStatsRecord);
assertNotNull(colStatsRecord.get(HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME));
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
index 719f914816..4f69d1e449 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
@@ -23,9 +23,9 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
@@ -37,7 +37,7 @@ import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.collection.ExternalSpillableMap;
import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.io.storage.HoodieHFileReader;
+import org.apache.hudi.io.storage.HoodieAvroHFileReader;
import org.apache.hudi.metadata.HoodieBackedTableMetadata;
import org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader;
import org.apache.hudi.metadata.HoodieMetadataPayload;
@@ -353,9 +353,9 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
- try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(indexRecord -> {
- final GenericRecord record = (GenericRecord) indexRecord;
+ final GenericRecord record = (GenericRecord) indexRecord.getData();
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
@@ -396,7 +396,7 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
logRecordReader.scan();
}, "Metadata log records materialization failed");
- for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
+ for (Map.Entry<String, HoodieRecord> entry : logRecordReader.getRecords().entrySet()) {
assertFalse(entry.getKey().isEmpty());
assertFalse(entry.getValue().getRecordKey().isEmpty());
assertEquals(entry.getKey(), entry.getValue().getRecordKey());
@@ -419,10 +419,10 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
}
final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
- HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
+ HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(),
new Path(baseFile.getPath()),
new CacheConfig(context.getHadoopConf().get()));
- List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
+ List<IndexedRecord> records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader);
records.forEach(entry -> {
assertNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
final String keyInPayload = (String) ((GenericRecord) entry)
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
index 4707a68072..9fb1862c5f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
@@ -44,6 +44,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.Semaphore;
@@ -97,12 +98,12 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
final HoodieAvroRecord originalRecord = (HoodieAvroRecord) originalRecordIterator.next();
final Option<IndexedRecord> originalInsertValue =
originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
- final HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
+ final HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> genResult = queue.iterator().next();
// Ensure that record ordering is guaranteed.
- assertEquals(originalRecord, payload.record);
+ assertEquals(originalRecord, genResult.getResult());
// cached insert value matches the expected insert value.
assertEquals(originalInsertValue,
- ((HoodieAvroRecord) payload.record).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
+ ((HoodieAvroRecord) genResult.getResult()).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
recordsRead++;
}
assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
@@ -189,8 +190,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// Read recs and ensure we have covered all producer recs.
while (queue.iterator().hasNext()) {
- final HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = queue.iterator().next();
- final HoodieRecord rec = payload.record;
+ final HoodieLazyInsertIterable.HoodieInsertValueGenResult genResult = queue.iterator().next();
+ final HoodieRecord rec = genResult.getResult();
Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
Integer lastSeenPos = lastSeenMap.get(producerPos._1());
countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
@@ -218,9 +219,9 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// maximum number of records to keep in memory.
final int recordLimit = 5;
final SizeEstimator<HoodieLazyInsertIterable.HoodieInsertValueGenResult> sizeEstimator = new DefaultSizeEstimator<>();
- HoodieLazyInsertIterable.HoodieInsertValueGenResult payload =
+ HoodieLazyInsertIterable.HoodieInsertValueGenResult genResult =
getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply((HoodieAvroRecord) hoodieRecords.get(0));
- final long objSize = sizeEstimator.sizeEstimate(payload);
+ final long objSize = sizeEstimator.sizeEstimate(genResult);
final long memoryLimitInBytes = recordLimit * objSize;
final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue =
new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
@@ -240,8 +241,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
// try to read 2 records.
- assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
- assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
+ assertEquals(hoodieRecords.get(0), queue.iterator().next().getResult());
+ assertEquals(hoodieRecords.get(1), queue.iterator().next().getResult());
// waiting for permits to expire.
while (!isQueueFull(queue.rateLimiter)) {
@@ -267,9 +268,9 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
// queue memory limit
- HoodieLazyInsertIterable.HoodieInsertValueGenResult payload =
+ HoodieLazyInsertIterable.HoodieInsertValueGenResult genResult =
getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply((HoodieAvroRecord) hoodieRecords.get(0));
- final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(payload.record, payload.insertValue));
+ final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(genResult.getResult(), genResult.getResult().toIndexedRecord(HoodieTestDataGenerator.AVRO_SCHEMA, new Properties())));
final long memoryLimitInBytes = 4 * objSize;
// first let us throw exception from queueIterator reader and test that queueing thread
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieFileWriterFactory.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java
similarity index 81%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieFileWriterFactory.java
rename to hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java
index 66016305d7..d9377fc4e8 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieFileWriterFactory.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java
@@ -18,15 +18,13 @@
package org.apache.hudi.io.storage;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.client.SparkTaskContextSupplier;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.testutils.HoodieClientTestBase;
-
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Test;
import java.io.IOException;
@@ -37,7 +35,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Tests for {@link HoodieFileWriterFactory}.
*/
-public class TestHoodieFileWriterFactory extends HoodieClientTestBase {
+public class TestHoodieAvroFileWriterFactory extends HoodieClientTestBase {
@Test
public void testGetFileWriter() throws IOException {
@@ -47,26 +45,26 @@ public class TestHoodieFileWriterFactory extends HoodieClientTestBase {
final HoodieWriteConfig cfg = getConfig();
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
SparkTaskContextSupplier supplier = new SparkTaskContextSupplier();
- HoodieFileWriter<IndexedRecord> parquetWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
+ HoodieFileWriter parquetWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
assertTrue(parquetWriter instanceof HoodieAvroParquetWriter);
// hfile format.
final Path hfilePath = new Path(basePath + "/partition/path/f1_1-0-1_000.hfile");
- HoodieFileWriter<IndexedRecord> hfileWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
+ HoodieFileWriter hfileWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
hfilePath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
- assertTrue(hfileWriter instanceof HoodieHFileWriter);
+ assertTrue(hfileWriter instanceof HoodieAvroHFileWriter);
// orc file format.
final Path orcPath = new Path(basePath + "/partition/path/f1_1-0-1_000.orc");
- HoodieFileWriter<IndexedRecord> orcFileWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
+ HoodieFileWriter orcFileWriter = HoodieFileWriterFactory.getFileWriter(instantTime,
orcPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
- assertTrue(orcFileWriter instanceof HoodieOrcWriter);
+ assertTrue(orcFileWriter instanceof HoodieAvroOrcWriter);
// other file format exception.
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
- HoodieFileWriter<IndexedRecord> logWriter = HoodieFileWriterFactory.getFileWriter(instantTime, logPath,
+ HoodieFileWriter logWriter = HoodieFileWriterFactory.getFileWriter(instantTime, logPath,
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
}, "should fail since log storage writer is not supported yet.");
assertTrue(thrown.getMessage().contains("format not supported yet."));
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
index ad73a256a6..b4e75a02b2 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
@@ -21,6 +21,7 @@ package org.apache.hudi.io.storage.row;
import org.apache.hudi.client.HoodieInternalWriteStatus;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.StringUtils;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 458af3ad9e..c11be5f4ed 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -67,7 +67,7 @@ import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import static org.apache.hudi.io.storage.HoodieHFileReader.SCHEMA_KEY;
+import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY;
/**
* Utility methods to aid testing inside the HoodieClient module.
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index a352e86b96..9f1c25d647 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -19,9 +19,10 @@
package org.apache.hudi.avro;
import org.apache.hudi.common.config.SerializableSchema;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
@@ -76,6 +77,7 @@ import java.util.Map;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Set;
+import java.util.Properties;
import java.util.TimeZone;
import java.util.stream.Collectors;
@@ -107,6 +109,13 @@ public class HoodieAvroUtils {
public static final Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
+ /**
+ * TODO serialize other type of record.
+ */
+ public static Option<byte[]> recordToBytes(HoodieRecord record, Schema schema) throws IOException {
+ return Option.of(HoodieAvroUtils.indexedRecordToBytes((IndexedRecord) record.toIndexedRecord(schema, new Properties()).get()));
+ }
+
/**
* Convert a given avro record to bytes.
*/
@@ -683,11 +692,11 @@ public class HoodieAvroUtils {
* @param schema {@link Schema} instance.
* @return Column value if a single column, or concatenated String values by comma.
*/
- public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record,
+ public static Object getRecordColumnValues(HoodieAvroRecord record,
String[] columns,
Schema schema, boolean consistentLogicalTimestampEnabled) {
try {
- GenericRecord genericRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
+ GenericRecord genericRecord = (GenericRecord) record.toIndexedRecord(schema, new Properties()).get();
if (columns.length == 1) {
return HoodieAvroUtils.getNestedFieldVal(genericRecord, columns[0], true, consistentLogicalTimestampEnabled);
} else {
@@ -712,7 +721,7 @@ public class HoodieAvroUtils {
* @param schema {@link SerializableSchema} instance.
* @return Column value if a single column, or concatenated String values by comma.
*/
- public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record,
+ public static Object getRecordColumnValues(HoodieAvroRecord record,
String[] columns,
SerializableSchema schema, boolean consistentLogicalTimestampEnabled) {
return getRecordColumnValues(record, columns, schema.get(), consistentLogicalTimestampEnabled);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroIndexedRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroIndexedRecord.java
new file mode 100644
index 0000000000..ac2df00151
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroIndexedRecord.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.keygen.BaseKeyGenerator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * This only use by reader returning.
+ */
+public class HoodieAvroIndexedRecord extends HoodieRecord<IndexedRecord> {
+
+ public HoodieAvroIndexedRecord(IndexedRecord data) {
+ super(null, data);
+ }
+
+ public HoodieAvroIndexedRecord(HoodieKey key, IndexedRecord data) {
+ super(key, data);
+ }
+
+ public HoodieAvroIndexedRecord(HoodieKey key, IndexedRecord data, HoodieOperation operation) {
+ super(key, data, operation);
+ }
+
+ public HoodieAvroIndexedRecord(HoodieRecord<IndexedRecord> record) {
+ super(record);
+ }
+
+ public HoodieAvroIndexedRecord() {
+ }
+
+ @Override
+ public Option<IndexedRecord> toIndexedRecord(Schema schema, Properties prop) {
+ return Option.of(data);
+ }
+
+ public Option<IndexedRecord> toIndexedRecord() {
+ return Option.of(data);
+ }
+
+ @Override
+ public Comparable<?> getOrderingValue() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HoodieRecord newInstance() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HoodieRecord<IndexedRecord> newInstance(HoodieKey key, HoodieOperation op) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HoodieRecord<IndexedRecord> newInstance(HoodieKey key) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getRecordKey(Option<BaseKeyGenerator> keyGeneratorOpt) {
+ return keyGeneratorOpt.isPresent() ? keyGeneratorOpt.get().getRecordKey((GenericRecord) data) : ((GenericRecord) data).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+ }
+
+ @Override
+ public String getRecordKey(String keyFieldName) {
+ return Option.ofNullable(data.getSchema().getField(keyFieldName))
+ .map(keyField -> data.get(keyField.pos()))
+ .map(Object::toString).orElse(null);
+ }
+
+ @Override
+ public HoodieRecord preCombine(HoodieRecord<IndexedRecord> previousRecord) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Option<HoodieRecord> combineAndGetUpdateValue(HoodieRecord previousRecord, Schema schema, Properties props) throws IOException {
+ return Option.empty();
+ }
+
+ @Override
+ public HoodieRecord mergeWith(HoodieRecord other, Schema readerSchema, Schema writerSchema) throws IOException {
+ ValidationUtils.checkState(other instanceof HoodieAvroIndexedRecord);
+ GenericRecord record = HoodieAvroUtils.stitchRecords((GenericRecord) data, (GenericRecord) other.getData(), writerSchema);
+ return new HoodieAvroIndexedRecord(record);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecord(Schema recordSchema, Schema targetSchema, TypedProperties props) throws IOException {
+ GenericRecord avroPayloadInNewSchema =
+ HoodieAvroUtils.rewriteRecord((GenericRecord) data, targetSchema);
+ return new HoodieAvroIndexedRecord(avroPayloadInNewSchema);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecord(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields) throws IOException {
+ GenericRecord rewriteRecord = schemaOnReadEnabled ? HoodieAvroUtils.rewriteRecordWithNewSchema(data, writeSchemaWithMetaFields, new HashMap<>())
+ : HoodieAvroUtils.rewriteRecord((GenericRecord) data, writeSchemaWithMetaFields);
+ return new HoodieAvroIndexedRecord(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithMetadata(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields, String fileName) throws IOException {
+ GenericRecord rewriteRecord = schemaOnReadEnabled ? HoodieAvroUtils.rewriteEvolutionRecordWithMetadata((GenericRecord) data, writeSchemaWithMetaFields, fileName)
+ : HoodieAvroUtils.rewriteRecordWithMetadata((GenericRecord) data, writeSchemaWithMetaFields, fileName);
+ return new HoodieAvroIndexedRecord(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols) throws IOException {
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(data, newSchema, renameCols);
+ return new HoodieAvroIndexedRecord(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols, Mapper mapper) throws IOException {
+ GenericRecord oldRecord = (GenericRecord) getData();
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(oldRecord, newSchema, renameCols);
+ return mapper.apply(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema) throws IOException {
+ GenericRecord oldRecord = (GenericRecord) data;
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecord(oldRecord, newSchema);
+ return new HoodieAvroIndexedRecord(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord addMetadataValues(Schema recordSchema, Properties prop, Map<HoodieMetadataField, String> metadataValues) throws IOException {
+ Arrays.stream(HoodieMetadataField.values()).forEach(metadataField -> {
+ String value = metadataValues.get(metadataField);
+ if (value != null) {
+ ((GenericRecord) data).put(metadataField.getFieldName(), value);
+ }
+ });
+
+ return new HoodieAvroIndexedRecord(data);
+ }
+
+ @Override
+ public HoodieRecord overrideMetadataFieldValue(Schema recordSchema, Properties prop, int pos, String newValue) throws IOException {
+ data.put(pos, newValue);
+ return this;
+ }
+
+ @Override
+ public boolean shouldIgnore(Schema schema, Properties prop) throws IOException {
+ return getData().equals(SENTINEL);
+ }
+
+ @Override
+ public Option<Map<String, String>> getMetadata() {
+ return Option.empty();
+ }
+
+ @Override
+ public boolean isPresent(Schema schema, Properties prop) {
+ return true;
+ }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java
index 9a9bbb2b74..9a9011da37 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java
@@ -19,6 +19,28 @@
package org.apache.hudi.common.model;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.keygen.BaseKeyGenerator;
+import org.apache.hudi.metadata.HoodieMetadataPayload;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+
+import javax.annotation.Nonnull;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import static org.apache.hudi.TypeUtils.unsafeCast;
+
public class HoodieAvroRecord<T extends HoodieRecordPayload> extends HoodieRecord<T> {
public HoodieAvroRecord(HoodieKey key, T data) {
super(key, data);
@@ -40,6 +62,16 @@ public class HoodieAvroRecord<T extends HoodieRecordPayload> extends HoodieRecor
return new HoodieAvroRecord<>(this);
}
+ @Override
+ public HoodieRecord<T> newInstance(HoodieKey key, HoodieOperation op) {
+ return new HoodieAvroRecord<>(key, data, op);
+ }
+
+ @Override
+ public HoodieRecord<T> newInstance(HoodieKey key) {
+ return new HoodieAvroRecord<>(key, data);
+ }
+
@Override
public T getData() {
if (data == null) {
@@ -47,4 +79,175 @@ public class HoodieAvroRecord<T extends HoodieRecordPayload> extends HoodieRecor
}
return data;
}
+
+ @Override
+ public String getRecordKey(Option<BaseKeyGenerator> keyGeneratorOpt) {
+ return getRecordKey();
+ }
+
+ @Override
+ public String getRecordKey(String keyFieldName) {
+ return getRecordKey();
+ }
+
+ @Override
+ public Comparable<?> getOrderingValue() {
+ return data.getOrderingValue();
+ }
+
+ @Override
+ public Option<IndexedRecord> toIndexedRecord(Schema schema, Properties prop) throws IOException {
+ return getData().getInsertValue(schema, prop);
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ //
+ // NOTE: This method duplicates those ones of the HoodieRecordPayload and are placed here
+ // for the duration of RFC-46 implementation, until migration off `HoodieRecordPayload`
+ // is complete
+ //
+ // TODO cleanup
+
+ // NOTE: This method is assuming semantic that `preCombine` operation is bound to pick one or the other
+ // object, and may not create a new one
+ @Override
+ public HoodieRecord<T> preCombine(HoodieRecord<T> previousRecord) {
+ T picked = unsafeCast(getData().preCombine(previousRecord.getData()));
+ if (picked instanceof HoodieMetadataPayload) {
+ // NOTE: HoodieMetadataPayload return a new payload
+ return new HoodieAvroRecord<>(getKey(), picked, getOperation());
+ }
+ return picked.equals(getData()) ? this : previousRecord;
+ }
+
+ // NOTE: This method is assuming semantic that only records bearing the same (partition, key) could
+ // be combined
+ @Override
+ public Option<HoodieRecord> combineAndGetUpdateValue(HoodieRecord previousRecord, Schema schema, Properties props) throws IOException {
+ Option<IndexedRecord> previousRecordAvroPayload = previousRecord.toIndexedRecord(schema, props);
+ if (!previousRecordAvroPayload.isPresent()) {
+ return Option.empty();
+ }
+
+ return getData().combineAndGetUpdateValue(previousRecordAvroPayload.get(), schema, props)
+ .map(combinedAvroPayload -> new HoodieAvroIndexedRecord((IndexedRecord) combinedAvroPayload));
+ }
+
+ @Override
+ public HoodieRecord mergeWith(HoodieRecord other, Schema readerSchema, Schema writerSchema) throws IOException {
+ ValidationUtils.checkState(other instanceof HoodieAvroRecord);
+ GenericRecord mergedPayload = HoodieAvroUtils.stitchRecords(
+ (GenericRecord) toIndexedRecord(readerSchema, new Properties()).get(),
+ (GenericRecord) other.toIndexedRecord(readerSchema, new Properties()).get(),
+ writerSchema);
+ return new HoodieAvroRecord(getKey(), instantiateRecordPayloadWrapper(mergedPayload, getPrecombineValue(getData())), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecord(Schema recordSchema, Schema targetSchema, TypedProperties props) throws IOException {
+ Option<IndexedRecord> avroRecordPayloadOpt = getData().getInsertValue(recordSchema, props);
+ GenericRecord avroPayloadInNewSchema =
+ HoodieAvroUtils.rewriteRecord((GenericRecord) avroRecordPayloadOpt.get(), targetSchema);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(avroPayloadInNewSchema), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecord(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields) throws IOException {
+ GenericRecord record = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+ GenericRecord rewriteRecord = schemaOnReadEnabled ? HoodieAvroUtils.rewriteRecordWithNewSchema(record, writeSchemaWithMetaFields, new HashMap<>())
+ : HoodieAvroUtils.rewriteRecord(record, writeSchemaWithMetaFields);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(rewriteRecord), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithMetadata(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields, String fileName) throws IOException {
+ GenericRecord record = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+ GenericRecord rewriteRecord = schemaOnReadEnabled ? HoodieAvroUtils.rewriteEvolutionRecordWithMetadata(record, writeSchemaWithMetaFields, fileName)
+ : HoodieAvroUtils.rewriteRecordWithMetadata(record, writeSchemaWithMetaFields, fileName);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(rewriteRecord), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols) throws IOException {
+ GenericRecord oldRecord = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(oldRecord, newSchema, renameCols);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(rewriteRecord), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema) throws IOException {
+ GenericRecord oldRecord = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecord(oldRecord, newSchema);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(rewriteRecord), getOperation());
+ }
+
+ @Override
+ public HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols, Mapper mapper) throws IOException {
+ GenericRecord oldRecord = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+ GenericRecord rewriteRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(oldRecord, newSchema, renameCols);
+ return mapper.apply(rewriteRecord);
+ }
+
+ @Override
+ public HoodieRecord overrideMetadataFieldValue(Schema recordSchema, Properties prop, int pos, String newValue) throws IOException {
+ IndexedRecord record = (IndexedRecord) data.getInsertValue(recordSchema, prop).get();
+ record.put(pos, newValue);
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload((GenericRecord) record), getOperation());
+ }
+
+ @Override
+ public HoodieRecord addMetadataValues(Schema recordSchema, Properties prop, Map<HoodieMetadataField, String> metadataValues) throws IOException {
+ // NOTE: RewriteAvroPayload is expected here
+ GenericRecord avroRecordPayload = (GenericRecord) getData().getInsertValue(recordSchema, prop).get();
+
+ Arrays.stream(HoodieMetadataField.values()).forEach(metadataField -> {
+ String value = metadataValues.get(metadataField);
+ if (value != null) {
+ avroRecordPayload.put(metadataField.getFieldName(), value);
+ }
+ });
+
+ return new HoodieAvroRecord<>(getKey(), new RewriteAvroPayload(avroRecordPayload), getOperation());
+ }
+
+ public Option<Map<String, String>> getMetadata() {
+ return getData().getMetadata();
+ }
+
+ @Override
+ public boolean isPresent(Schema schema, Properties prop) throws IOException {
+ return getData().getInsertValue(schema, prop).isPresent();
+ }
+
+ @Override
+ public boolean shouldIgnore(Schema schema, Properties prop) throws IOException {
+ Option<IndexedRecord> insertRecord = getData().getInsertValue(schema, prop);
+ // just skip the ignored record
+ if (insertRecord.isPresent() && insertRecord.get().equals(SENTINEL)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Nonnull
+ private T instantiateRecordPayloadWrapper(Object combinedAvroPayload, Comparable newPreCombineVal) {
+ return unsafeCast(
+ ReflectionUtils.loadPayload(
+ getData().getClass().getCanonicalName(),
+ new Object[]{combinedAvroPayload, newPreCombineVal},
+ GenericRecord.class,
+ Comparable.class));
+ }
+
+ private static <T extends HoodieRecordPayload> Comparable getPrecombineValue(T data) {
+ if (data instanceof BaseAvroPayload) {
+ return ((BaseAvroPayload) data).orderingVal;
+ }
+
+ return -1;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
index 2a3edafb8f..60ba6cfa72 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
@@ -18,15 +18,22 @@
package org.apache.hudi.common.model;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.keygen.BaseKeyGenerator;
+import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
+import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@@ -35,13 +42,50 @@ import java.util.stream.IntStream;
*/
public abstract class HoodieRecord<T> implements Serializable {
- public static final String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
- public static final String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
- public static final String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
- public static final String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
- public static final String FILENAME_METADATA_FIELD = "_hoodie_file_name";
- public static final String OPERATION_METADATA_FIELD = "_hoodie_operation";
- public static final String HOODIE_IS_DELETED = "_hoodie_is_deleted";
+ public static final String COMMIT_TIME_METADATA_FIELD = HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.getFieldName();
+ public static final String COMMIT_SEQNO_METADATA_FIELD = HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.getFieldName();
+ public static final String RECORD_KEY_METADATA_FIELD = HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName();
+ public static final String PARTITION_PATH_METADATA_FIELD = HoodieMetadataField.PARTITION_PATH_METADATA_FIELD.getFieldName();
+ public static final String FILENAME_METADATA_FIELD = HoodieMetadataField.FILENAME_METADATA_FIELD.getFieldName();
+ public static final String OPERATION_METADATA_FIELD = HoodieMetadataField.OPERATION_METADATA_FIELD.getFieldName();
+ public static final String HOODIE_IS_DELETED_FIELD = "_hoodie_is_deleted";
+
+ public enum HoodieMetadataField {
+ COMMIT_TIME_METADATA_FIELD("_hoodie_commit_time"),
+ COMMIT_SEQNO_METADATA_FIELD("_hoodie_commit_seqno"),
+ RECORD_KEY_METADATA_FIELD("_hoodie_record_key"),
+ PARTITION_PATH_METADATA_FIELD("_hoodie_partition_path"),
+ FILENAME_METADATA_FIELD("_hoodie_file_name"),
+ OPERATION_METADATA_FIELD("_hoodie_operation");
+
+ private final String fieldName;
+
+ HoodieMetadataField(String fieldName) {
+ this.fieldName = fieldName;
+ }
+
+ public String getFieldName() {
+ return fieldName;
+ }
+ }
+
+ /**
+ * A special record returned by {@link HoodieRecordPayload}, which means we should just skip this record.
+ * This record is only used for {@link HoodieRecordPayload} currently, so it should not
+ * shuffle though network, we can compare the record locally by the equal method.
+ * The HoodieRecordPayload#combineAndGetUpdateValue and HoodieRecordPayload#getInsertValue
+ * have 3 kind of return:
+ * 1、Option.empty
+ * This means we should delete this record.
+ * 2、IGNORE_RECORD
+ * This means we should not process this record,just skip.
+ * 3、Other non-empty record
+ * This means we should process this record.
+ *
+ * We can see the usage of IGNORE_RECORD in
+ * org.apache.spark.sql.hudi.command.payload.ExpressionPayload
+ */
+ public static final EmptyRecord SENTINEL = new EmptyRecord();
public static final List<String> HOODIE_META_COLUMNS =
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
@@ -120,6 +164,10 @@ public abstract class HoodieRecord<T> implements Serializable {
public abstract HoodieRecord<T> newInstance();
+ public abstract HoodieRecord<T> newInstance(HoodieKey key, HoodieOperation op);
+
+ public abstract HoodieRecord<T> newInstance(HoodieKey key);
+
public HoodieKey getKey() {
return key;
}
@@ -135,6 +183,8 @@ public abstract class HoodieRecord<T> implements Serializable {
return data;
}
+ public abstract Comparable<?> getOrderingValue();
+
/**
* Release the actual payload, to ease memory pressure. To be called after the record has been written to storage.
* Once deflated, cannot be inflated.
@@ -203,10 +253,6 @@ public abstract class HoodieRecord<T> implements Serializable {
return sb.toString();
}
- public static String generateSequenceId(String instantTime, int partitionId, long recordIndex) {
- return instantTime + "_" + partitionId + "_" + recordIndex;
- }
-
public String getPartitionPath() {
assert key != null;
return key.getPartitionPath();
@@ -217,6 +263,10 @@ public abstract class HoodieRecord<T> implements Serializable {
return key.getRecordKey();
}
+ public abstract String getRecordKey(Option<BaseKeyGenerator> keyGeneratorOpt);
+
+ public abstract String getRecordKey(String keyFieldName);
+
public void seal() {
this.sealed = true;
}
@@ -230,4 +280,108 @@ public abstract class HoodieRecord<T> implements Serializable {
throw new UnsupportedOperationException("Not allowed to modify after sealed");
}
}
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ //
+ // NOTE: This method duplicates those ones of the HoodieRecordPayload and are placed here
+ // for the duration of RFC-46 implementation, until migration off `HoodieRecordPayload`
+ // is complete
+ //
+ // TODO cleanup
+
+ // NOTE: This method is assuming semantic that `preCombine` operation is bound to pick one or the other
+ // object, and may not create a new one
+ public abstract HoodieRecord<T> preCombine(HoodieRecord<T> previousRecord);
+
+ // NOTE: This method is assuming semantic that only records bearing the same (partition, key) could
+ // be combined
+ public abstract Option<HoodieRecord> combineAndGetUpdateValue(HoodieRecord previousRecord, Schema schema, Properties props) throws IOException;
+
+ public abstract HoodieRecord mergeWith(HoodieRecord other, Schema readerSchema, Schema writerSchema) throws IOException;
+
+ public abstract HoodieRecord rewriteRecord(Schema recordSchema, Schema targetSchema, TypedProperties props) throws IOException;
+
+ /**
+ * Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields.
+ */
+ public abstract HoodieRecord rewriteRecord(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields) throws IOException;
+
+ public abstract HoodieRecord rewriteRecordWithMetadata(Schema recordSchema, Properties prop, boolean schemaOnReadEnabled, Schema writeSchemaWithMetaFields, String fileName) throws IOException;
+
+ public abstract HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols) throws IOException;
+
+ public abstract HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema, Map<String, String> renameCols, Mapper mapper) throws IOException;
+
+ public abstract HoodieRecord rewriteRecordWithNewSchema(Schema recordSchema, Properties prop, Schema newSchema) throws IOException;
+
+ public abstract HoodieRecord overrideMetadataFieldValue(Schema recordSchema, Properties prop, int pos, String newValue) throws IOException;
+
+ public abstract HoodieRecord addMetadataValues(Schema recordSchema, Properties prop, Map<HoodieMetadataField, String> metadataValues) throws IOException;
+
+ public abstract Option<Map<String, String>> getMetadata();
+
+ public abstract boolean isPresent(Schema schema, Properties prop) throws IOException;
+
+ public abstract boolean shouldIgnore(Schema schema, Properties prop) throws IOException;
+
+ public abstract Option<IndexedRecord> toIndexedRecord(Schema schema, Properties prop) throws IOException;
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ public static String generateSequenceId(String instantTime, int partitionId, long recordIndex) {
+ return instantTime + "_" + partitionId + "_" + recordIndex;
+ }
+
+ /**
+ * NOTE: This is temporary transition construct to be able to construct
+ * HoodieRecord instances w/o excessive wiring into a lot of components
+ * a lot of details that are irrelevant for these
+ * TODO remove
+ */
+ @FunctionalInterface
+ public interface Mapper {
+ HoodieRecord apply(IndexedRecord avroPayload);
+ }
+
+ /**
+ * A special record returned by {@link HoodieRecordPayload}, which means we should just skip this record.
+ * This record is only used for {@link HoodieRecordPayload} currently, so it should not
+ * shuffle though network, we can compare the record locally by the equal method.
+ * The HoodieRecordPayload#combineAndGetUpdateValue and HoodieRecordPayload#getInsertValue
+ * have 3 kind of return:
+ * 1、Option.empty
+ * This means we should delete this record.
+ * 2、IGNORE_RECORD
+ * This means we should not process this record,just skip.
+ * 3、Other non-empty record
+ * This means we should process this record.
+ *
+ * We can see the usage of IGNORE_RECORD in
+ * org.apache.spark.sql.hudi.command.payload.ExpressionPayload
+ */
+ private static class EmptyRecord implements GenericRecord {
+ private EmptyRecord() {}
+
+ @Override
+ public void put(int i, Object v) {}
+
+ @Override
+ public Object get(int i) {
+ return null;
+ }
+
+ @Override
+ public Schema getSchema() {
+ return null;
+ }
+
+ @Override
+ public void put(String key, Object v) {}
+
+ @Override
+ public Object get(String key) {
+ return null;
+ }
+ }
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
index d4e61da9bb..5885c9702a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java
@@ -18,14 +18,13 @@
package org.apache.hudi.common.model;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.ApiMaturityLevel;
import org.apache.hudi.PublicAPIClass;
import org.apache.hudi.PublicAPIMethod;
import org.apache.hudi.common.util.Option;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-
import java.io.IOException;
import java.io.Serializable;
import java.util.Map;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
index d8469ed5a1..5268d76281 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
@@ -85,7 +85,7 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
* @returns {@code true} if record represents a delete record. {@code false} otherwise.
*/
protected boolean isDeleteRecord(GenericRecord genericRecord) {
- final String isDeleteKey = HoodieRecord.HOODIE_IS_DELETED;
+ final String isDeleteKey = HoodieRecord.HOODIE_IS_DELETED_FIELD;
// Modify to be compatible with new version Avro.
// The new version Avro throws for GenericRecord.get if the field name
// does not exist in the schema.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
index 657ac57c63..473e313ab9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -41,8 +41,6 @@ import org.apache.hudi.exception.InvalidTableException;
import org.apache.hudi.internal.schema.InternalSchema;
import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
import org.apache.hudi.internal.schema.utils.SerDeHelper;
-import org.apache.hudi.io.storage.HoodieHFileReader;
-import org.apache.hudi.io.storage.HoodieOrcReader;
import org.apache.hudi.util.Lazy;
import org.apache.avro.JsonProperties;
@@ -447,7 +445,7 @@ public class TableSchemaResolver {
FileSystem fs = metaClient.getRawFs();
CacheConfig cacheConfig = new CacheConfig(fs.getConf());
- HoodieHFileReader<IndexedRecord> hFileReader = new HoodieHFileReader<>(fs.getConf(), hFilePath, cacheConfig);
+ HoodieAvroHFileReader hFileReader = new HoodieAvroHFileReader(fs.getConf(), hFilePath, cacheConfig);
return convertAvroSchemaToParquet(hFileReader.getSchema());
}
@@ -455,7 +453,7 @@ public class TableSchemaResolver {
LOG.info("Reading schema from " + orcFilePath);
FileSystem fs = metaClient.getRawFs();
- HoodieOrcReader<IndexedRecord> orcReader = new HoodieOrcReader<>(fs.getConf(), orcFilePath);
+ HoodieAvroOrcReader orcReader = new HoodieAvroOrcReader(fs.getConf(), orcFilePath);
return convertAvroSchemaToParquet(orcReader.getSchema());
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
index 5bfb395dbc..4b1d4e8a95 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
@@ -19,7 +19,6 @@
package org.apache.hudi.common.table.log;
import org.apache.hudi.common.model.DeleteRecord;
-import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePayloadProps;
@@ -28,6 +27,7 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock.RecordIterator;
import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
@@ -382,13 +382,22 @@ public abstract class AbstractHoodieLogRecordReader {
* handle it.
*/
private void processDataBlock(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt) throws Exception {
- try (ClosableIterator<IndexedRecord> recordIterator = getRecordsIterator(dataBlock, keySpecOpt)) {
+ HoodieRecord.Mapper mapper = (rec) -> createHoodieRecord(rec, this.hoodieTableMetaClient.getTableConfig(),
+ this.payloadClassFQN, this.preCombineField, this.withOperationField, this.simpleKeyGenFields, this.partitionName);
+
+ try (ClosableIterator<HoodieRecord> recordIterator = getRecordsIterator(dataBlock, keySpecOpt, mapper)) {
Option<Schema> schemaOption = getMergedSchema(dataBlock);
+ Schema finalReadSchema;
+ if (recordIterator instanceof RecordIterator) {
+ finalReadSchema = ((RecordIterator) recordIterator).getFinalReadSchema();
+ } else {
+ finalReadSchema = dataBlock.getSchema();
+ }
while (recordIterator.hasNext()) {
- IndexedRecord currentRecord = recordIterator.next();
- IndexedRecord record = schemaOption.isPresent() ? HoodieAvroUtils.rewriteRecordWithNewSchema(currentRecord, schemaOption.get(), Collections.emptyMap()) : currentRecord;
- processNextRecord(createHoodieRecord(record, this.hoodieTableMetaClient.getTableConfig(), this.payloadClassFQN,
- this.preCombineField, this.withOperationField, this.simpleKeyGenFields, this.partitionName));
+ HoodieRecord currentRecord = recordIterator.next();
+ HoodieRecord record = schemaOption.isPresent()
+ ? currentRecord.rewriteRecordWithNewSchema(finalReadSchema, new Properties(), schemaOption.get(), Collections.emptyMap(), mapper) : currentRecord;
+ processNextRecord(record);
totalLogRecords.incrementAndGet();
}
}
@@ -489,13 +498,13 @@ public abstract class AbstractHoodieLogRecordReader {
progress = (numLogFilesSeen - 1) / logFilePaths.size();
}
- private ClosableIterator<IndexedRecord> getRecordsIterator(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt) throws IOException {
+ private ClosableIterator<HoodieRecord> getRecordsIterator(HoodieDataBlock dataBlock, Option<KeySpec> keySpecOpt, HoodieRecord.Mapper mapper) throws IOException {
if (keySpecOpt.isPresent()) {
KeySpec keySpec = keySpecOpt.get();
- return dataBlock.getRecordIterator(keySpec.keys, keySpec.fullKey);
+ return dataBlock.getRecordIterator(keySpec.keys, keySpec.fullKey, mapper);
}
- return dataBlock.getRecordIterator();
+ return dataBlock.getRecordIterator(mapper);
}
/**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
index 2e2af79823..219eb39346 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
@@ -272,7 +272,7 @@ public class HoodieLogFileReader implements HoodieLogFormat.Reader {
private boolean isBlockCorrupted(int blocksize) throws IOException {
long currentPos = inputStream.getPos();
long blockSizeFromFooter;
-
+
try {
// check if the blocksize mentioned in the footer is the same as the header;
// by seeking and checking the length of a long. We do not seek `currentPos + blocksize`
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
index d7e725544a..dd079de364 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
@@ -67,13 +67,13 @@ import static org.apache.hudi.common.util.ValidationUtils.checkState;
*/
public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
- implements Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
+ implements Iterable<HoodieRecord> {
private static final Logger LOG = LogManager.getLogger(HoodieMergedLogRecordScanner.class);
// A timer for calculating elapsed time in millis
public final HoodieTimer timer = new HoodieTimer();
// Final map of compacted/merged records
- protected final ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records;
+ protected final ExternalSpillableMap<String, HoodieRecord> records;
// count of merged records in log
private long numMergedRecordsInLog;
private long maxMemorySizeInBytes;
@@ -122,12 +122,12 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
}
@Override
- public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
+ public Iterator<HoodieRecord> iterator() {
checkState(forceFullScan, "Record reader has to be in full-scan mode to use this API");
return records.iterator();
}
- public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
+ public Map<String, HoodieRecord> getRecords() {
checkState(forceFullScan, "Record reader has to be in full-scan mode to use this API");
return records;
}
@@ -144,7 +144,7 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
}
@Override
- protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
+ protected void processNextRecord(HoodieRecord hoodieRecord) throws IOException {
String key = hoodieRecord.getRecordKey();
if (records.containsKey(key)) {
// Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
@@ -152,7 +152,7 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
HoodieRecordPayload oldValue = oldRecord.getData();
- HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue, readerSchema, this.getPayloadProps());
+ HoodieRecordPayload combinedValue = (HoodieRecordPayload)hoodieRecord.preCombine(oldRecord, readerSchema, this.getPayloadProps()).getData();
// If combinedValue is oldValue, no need rePut oldRecord
if (combinedValue != oldValue) {
HoodieOperation operation = hoodieRecord.getOperation();
@@ -173,7 +173,7 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordReader
// should be deleted or be kept. The old record is kept only if the DELETE record has smaller ordering val.
// For same ordering values, uses the natural order(arrival time semantics).
- Comparable curOrderingVal = oldRecord.getData().getOrderingValue();
+ Comparable curOrderingVal = oldRecord.getOrderingValue();
Comparable deleteOrderingVal = deleteRecord.getOrderingValue();
// Checks the ordering value does not equal to 0
// because we use 0 as the default value which means natural order
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
index 9e74d14c04..2712d000ac 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
@@ -30,6 +30,7 @@ import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hudi.common.fs.SizeAwareDataInputStream;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.Option;
@@ -37,6 +38,7 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.internal.schema.InternalSchema;
import javax.annotation.Nonnull;
+
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
@@ -51,6 +53,8 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
+import java.util.stream.Collectors;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
@@ -85,9 +89,10 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, keyField, false);
}
- public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
- @Nonnull Map<HeaderMetadataType, String> header,
- @Nonnull String keyField) {
+ public HoodieAvroDataBlock(@Nonnull List<HoodieRecord> records,
+ @Nonnull Map<HeaderMetadataType, String> header,
+ @Nonnull String keyField
+ ) {
super(records, header, new HashMap<>(), keyField);
}
@@ -97,7 +102,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
}
@Override
- protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
+ protected byte[] serializeRecords(List<HoodieRecord> records) throws IOException {
Schema schema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
GenericDatumWriter<IndexedRecord> writer = new GenericDatumWriter<>(schema);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -110,13 +115,14 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
output.writeInt(records.size());
// 3. Write the records
- for (IndexedRecord s : records) {
+ for (HoodieRecord s : records) {
ByteArrayOutputStream temp = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(temp, encoderCache.get());
encoderCache.set(encoder);
try {
// Encode the record into bytes
- writer.write(s, encoder);
+ IndexedRecord data = (IndexedRecord) s.toIndexedRecord(schema, new Properties()).get();
+ writer.write(data, encoder);
encoder.flush();
// Get the size of the bytes
@@ -136,22 +142,25 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
// TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
@Override
- protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException {
+ protected ClosableIterator<HoodieRecord> deserializeRecords(byte[] content, HoodieRecord.Mapper mapper) throws IOException {
checkState(this.readerSchema != null, "Reader's schema has to be non-null");
- return RecordIterator.getInstance(this, content, internalSchema);
+ return RecordIterator.getInstance(this, content, internalSchema, mapper);
}
- private static class RecordIterator implements ClosableIterator<IndexedRecord> {
+ public static class RecordIterator implements ClosableIterator<HoodieRecord> {
private byte[] content;
private final SizeAwareDataInputStream dis;
private final GenericDatumReader<IndexedRecord> reader;
private final ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
+ private final HoodieRecord.Mapper mapper;
+ private final Schema finalReadSchema;
private int totalRecords = 0;
private int readRecords = 0;
- private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content, InternalSchema internalSchema) throws IOException {
+ private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content, InternalSchema internalSchema, HoodieRecord.Mapper mapper) throws IOException {
this.content = content;
+ this.mapper = mapper;
this.dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(this.content)));
@@ -168,6 +177,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
finalReadSchema = writerSchema;
}
+ this.finalReadSchema = finalReadSchema;
this.reader = new GenericDatumReader<>(writerSchema, finalReadSchema);
if (logBlockVersion.hasRecordCount()) {
@@ -175,10 +185,14 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
}
}
- public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, byte[] content, InternalSchema internalSchema) throws IOException {
+ public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, byte[] content, InternalSchema internalSchema, HoodieRecord.Mapper mapper) throws IOException {
// Get schema from the header
Schema writerSchema = new Schema.Parser().parse(dataBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
- return new RecordIterator(dataBlock.readerSchema, writerSchema, content, internalSchema);
+ return new RecordIterator(dataBlock.readerSchema, writerSchema, content, internalSchema, mapper);
+ }
+
+ public Schema getFinalReadSchema() {
+ return finalReadSchema;
}
@Override
@@ -198,7 +212,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
}
@Override
- public IndexedRecord next() {
+ public HoodieRecord next() {
try {
int recordLength = this.dis.readInt();
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(this.content, this.dis.getNumberOfBytesRead(),
@@ -207,7 +221,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
IndexedRecord record = this.reader.read(null, decoder);
this.dis.skipBytes(recordLength);
this.readRecords++;
- return record;
+ return mapper.apply(record);
} catch (IOException e) {
throw new HoodieIOException("Unable to convert bytes to record.", e);
}
@@ -226,7 +240,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
* HoodieLogFormat V1.
*/
@Deprecated
- public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema) {
+ public HoodieAvroDataBlock(List<HoodieRecord> records, Schema schema) {
super(records, Collections.singletonMap(HeaderMetadataType.SCHEMA, schema.toString()), new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
}
@@ -271,7 +285,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
dis.skipBytes(recordLength);
}
dis.close();
- return new HoodieAvroDataBlock(records, readerSchema);
+ return new HoodieAvroDataBlock(records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()), readerSchema);
}
private static byte[] compress(String text) {
@@ -313,8 +327,8 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
output.writeInt(schemaContent.length);
output.write(schemaContent);
- List<IndexedRecord> records = new ArrayList<>();
- try (ClosableIterator<IndexedRecord> recordItr = getRecordIterator()) {
+ List<HoodieRecord> records = new ArrayList<>();
+ try (ClosableIterator<HoodieRecord> recordItr = getRecordIterator(HoodieAvroIndexedRecord::new)) {
recordItr.forEachRemaining(records::add);
}
@@ -322,9 +336,9 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
output.writeInt(records.size());
// 3. Write the records
- Iterator<IndexedRecord> itr = records.iterator();
+ Iterator<HoodieRecord> itr = records.iterator();
while (itr.hasNext()) {
- IndexedRecord s = itr.next();
+ IndexedRecord s = (IndexedRecord)itr.next().getData();
ByteArrayOutputStream temp = new ByteArrayOutputStream();
Encoder encoder = EncoderFactory.get().binaryEncoder(temp, null);
try {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
index c83b3bc82d..d88fff750f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
@@ -23,8 +23,8 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.internal.schema.InternalSchema;
import java.io.IOException;
@@ -50,7 +50,7 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
// TODO rebase records/content to leverage Either to warrant
// that they are mutex (used by read/write flows respectively)
- private final Option<List<IndexedRecord>> records;
+ private final Option<List<HoodieRecord>> records;
/**
* Key field's name w/in the record's schema
@@ -66,7 +66,7 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
/**
* NOTE: This ctor is used on the write-path (ie when records ought to be written into the log)
*/
- public HoodieDataBlock(List<IndexedRecord> records,
+ public HoodieDataBlock(List<HoodieRecord> records,
Map<HeaderMetadataType, String> header,
Map<HeaderMetadataType, String> footer,
String keyFieldName) {
@@ -138,13 +138,13 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
/**
* Returns all the records iterator contained w/in this block.
*/
- public final ClosableIterator<IndexedRecord> getRecordIterator() {
+ public final ClosableIterator<HoodieRecord> getRecordIterator(HoodieRecord.Mapper mapper) {
if (records.isPresent()) {
return list2Iterator(records.get());
}
try {
// in case records are absent, read content lazily and then convert to IndexedRecords
- return readRecordsFromBlockPayload();
+ return readRecordsFromBlockPayload(mapper);
} catch (IOException io) {
throw new HoodieIOException("Unable to convert content bytes to records", io);
}
@@ -162,15 +162,15 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
* @return List of IndexedRecords for the keys of interest.
* @throws IOException in case of failures encountered when reading/parsing records
*/
- public final ClosableIterator<IndexedRecord> getRecordIterator(List<String> keys, boolean fullKey) throws IOException {
+ public final ClosableIterator<HoodieRecord> getRecordIterator(List<String> keys, boolean fullKey, HoodieRecord.Mapper mapper) throws IOException {
boolean fullScan = keys.isEmpty();
if (enablePointLookups && !fullScan) {
- return lookupRecords(keys, fullKey);
+ return lookupRecords(keys, fullKey, mapper);
}
// Otherwise, we fetch all the records and filter out all the records, but the
// ones requested
- ClosableIterator<IndexedRecord> allRecords = getRecordIterator();
+ ClosableIterator<HoodieRecord> allRecords = getRecordIterator(mapper);
if (fullScan) {
return allRecords;
}
@@ -179,29 +179,29 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
return FilteringIterator.getInstance(allRecords, keySet, fullKey, this::getRecordKey);
}
- protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws IOException {
+ protected ClosableIterator<HoodieRecord> readRecordsFromBlockPayload(HoodieRecord.Mapper mapper) throws IOException {
if (readBlockLazily && !getContent().isPresent()) {
// read log block contents from disk
inflate();
}
try {
- return deserializeRecords(getContent().get());
+ return deserializeRecords(getContent().get(), mapper);
} finally {
// Free up content to be GC'd by deflating the block
deflate();
}
}
- protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys, boolean fullKey) throws IOException {
+ protected ClosableIterator<HoodieRecord> lookupRecords(List<String> keys, boolean fullKey, HoodieRecord.Mapper mapper) throws IOException {
throw new UnsupportedOperationException(
String.format("Point lookups are not supported by this Data block type (%s)", getBlockType())
);
}
- protected abstract byte[] serializeRecords(List<IndexedRecord> records) throws IOException;
+ protected abstract byte[] serializeRecords(List<HoodieRecord> records) throws IOException;
- protected abstract ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException;
+ protected abstract ClosableIterator<HoodieRecord> deserializeRecords(byte[] content, HoodieRecord.Mapper mapper) throws IOException;
public abstract HoodieLogBlockType getBlockType();
@@ -209,10 +209,8 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
return Option.ofNullable(schema.getField(keyFieldName));
}
- protected Option<String> getRecordKey(IndexedRecord record) {
- return getKeyField(record.getSchema())
- .map(keyField -> record.get(keyField.pos()))
- .map(Object::toString);
+ protected Option<String> getRecordKey(HoodieRecord record) {
+ return Option.ofNullable(record.getRecordKey(keyFieldName));
}
/**
@@ -245,32 +243,30 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
/**
* A {@link ClosableIterator} that supports filtering strategy with given keys.
* User should supply the key extraction function for fetching string format keys.
- *
- * @param <T> the element type
*/
- private static class FilteringIterator<T extends IndexedRecord> implements ClosableIterator<T> {
- private final ClosableIterator<T> nested; // nested iterator
+ private static class FilteringIterator implements ClosableIterator<HoodieRecord> {
+ private final ClosableIterator<HoodieRecord> nested; // nested iterator
private final Set<String> keys; // the filtering keys
private final boolean fullKey;
- private final Function<T, Option<String>> keyExtract; // function to extract the key
+ private final Function<HoodieRecord, Option<String>> keyExtract; // function to extract the key
- private T next;
+ private HoodieRecord next;
- private FilteringIterator(ClosableIterator<T> nested, Set<String> keys, boolean fullKey, Function<T, Option<String>> keyExtract) {
+ private FilteringIterator(ClosableIterator<HoodieRecord> nested, Set<String> keys, boolean fullKey, Function<HoodieRecord, Option<String>> keyExtract) {
this.nested = nested;
this.keys = keys;
this.fullKey = fullKey;
this.keyExtract = keyExtract;
}
- public static <T extends IndexedRecord> FilteringIterator<T> getInstance(
- ClosableIterator<T> nested,
+ public static FilteringIterator getInstance(
+ ClosableIterator<HoodieRecord> nested,
Set<String> keys,
boolean fullKey,
- Function<T, Option<String>> keyExtract) {
- return new FilteringIterator<>(nested, keys, fullKey, keyExtract);
+ Function<HoodieRecord, Option<String>> keyExtract) {
+ return new FilteringIterator(nested, keys, fullKey, keyExtract);
}
@Override
@@ -296,7 +292,7 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
}
@Override
- public T next() {
+ public HoodieRecord next() {
return this.next;
}
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index d923c59270..297d7683f2 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -39,7 +39,7 @@ import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
-import org.apache.hudi.io.storage.HoodieHFileReader;
+import org.apache.hudi.io.storage.HoodieAvroHFileReader;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -51,6 +51,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import java.util.TreeMap;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
@@ -78,16 +79,16 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
Map<HeaderMetadataType, String> footer,
boolean enablePointLookups,
Path pathForReader) {
- super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, HoodieHFileReader.KEY_FIELD_NAME, enablePointLookups);
+ super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, HoodieAvroHFileReader.KEY_FIELD_NAME, enablePointLookups);
this.compressionAlgorithm = Option.empty();
this.pathForReader = pathForReader;
}
- public HoodieHFileDataBlock(List<IndexedRecord> records,
+ public HoodieHFileDataBlock(List<HoodieRecord> records,
Map<HeaderMetadataType, String> header,
Compression.Algorithm compressionAlgorithm,
Path pathForReader) {
- super(records, header, new HashMap<>(), HoodieHFileReader.KEY_FIELD_NAME);
+ super(records, header, new HashMap<>(), HoodieAvroHFileReader.KEY_FIELD_NAME);
this.compressionAlgorithm = Option.of(compressionAlgorithm);
this.pathForReader = pathForReader;
}
@@ -98,7 +99,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
}
@Override
- protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
+ protected byte[] serializeRecords(List<HoodieRecord> records) throws IOException {
HFileContext context = new HFileContextBuilder()
.withBlockSize(DEFAULT_BLOCK_SIZE)
.withCompression(compressionAlgorithm.get())
@@ -117,11 +118,13 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
// Serialize records into bytes
Map<String, byte[]> sortedRecordsMap = new TreeMap<>();
- Iterator<IndexedRecord> itr = records.iterator();
+ // Get writer schema
+ Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
+ Iterator<HoodieRecord> itr = records.iterator();
int id = 0;
while (itr.hasNext()) {
- IndexedRecord record = itr.next();
+ HoodieRecord record = itr.next();
String recordKey;
if (useIntegerKey) {
recordKey = String.format("%" + keyWidth + "s", id++);
@@ -129,7 +132,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
recordKey = getRecordKey(record).get();
}
- final byte[] recordBytes = serializeRecord(record);
+ final byte[] recordBytes = serializeRecord(record, writerSchema);
ValidationUtils.checkState(!sortedRecordsMap.containsKey(recordKey),
"Writing multiple records with same key not supported for " + this.getClass().getName());
sortedRecordsMap.put(recordKey, recordBytes);
@@ -148,7 +151,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
}
});
- writer.appendFileInfo(HoodieHFileReader.SCHEMA_KEY.getBytes(), getSchema().toString().getBytes());
+ writer.appendFileInfo(HoodieAvroHFileReader.SCHEMA_KEY.getBytes(), getSchema().toString().getBytes());
writer.close();
ostream.flush();
@@ -158,16 +161,16 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
}
@Override
- protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException {
+ protected ClosableIterator<HoodieRecord> deserializeRecords(byte[] content, HoodieRecord.Mapper mapper) throws IOException {
checkState(readerSchema != null, "Reader's schema has to be non-null");
// Get schema from the header
Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
// Read the content
- HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(null, pathForReader, content, Option.of(writerSchema));
+ HoodieAvroHFileReader reader = new HoodieAvroHFileReader(null, pathForReader, content, Option.of(writerSchema));
Iterator<IndexedRecord> recordIterator = reader.getRecordIterator(readerSchema);
- return new ClosableIterator<IndexedRecord>() {
+ return new ClosableIterator<HoodieRecord>() {
@Override
public void close() {
reader.close();
@@ -179,15 +182,15 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
}
@Override
- public IndexedRecord next() {
- return recordIterator.next();
+ public HoodieRecord next() {
+ return mapper.apply(recordIterator.next());
}
};
}
// TODO abstract this w/in HoodieDataBlock
@Override
- protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys, boolean fullKey) throws IOException {
+ protected ClosableIterator<HoodieRecord> lookupRecords(List<String> keys, boolean fullKey, HoodieRecord.Mapper mapper) throws IOException {
HoodieLogBlockContentLocation blockContentLoc = getBlockContentLocation().get();
// NOTE: It's important to extend Hadoop configuration here to make sure configuration
@@ -207,22 +210,22 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
List<String> sortedKeys = new ArrayList<>(keys);
Collections.sort(sortedKeys);
- final HoodieHFileReader<IndexedRecord> reader =
- new HoodieHFileReader<>(inlineConf, inlinePath, new CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
+ final HoodieAvroHFileReader reader =
+ new HoodieAvroHFileReader(inlineConf, inlinePath, new CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
// Get writer's schema from the header
final ClosableIterator<IndexedRecord> recordIterator =
fullKey ? reader.getRecordsByKeysIterator(sortedKeys, readerSchema) : reader.getRecordsByKeyPrefixIterator(sortedKeys, readerSchema);
- return new ClosableIterator<IndexedRecord>() {
+ return new ClosableIterator<HoodieRecord>() {
@Override
public boolean hasNext() {
return recordIterator.hasNext();
}
@Override
- public IndexedRecord next() {
- return recordIterator.next();
+ public HoodieRecord next() {
+ return mapper.apply(recordIterator.next());
}
@Override
@@ -233,12 +236,12 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
};
}
- private byte[] serializeRecord(IndexedRecord record) {
- Option<Schema.Field> keyField = getKeyField(record.getSchema());
+ private byte[] serializeRecord(HoodieRecord record, Schema schema) throws IOException {
+ Option<Schema.Field> keyField = getKeyField(schema);
// Reset key value w/in the record to avoid duplicating the key w/in payload
if (keyField.isPresent()) {
- record.put(keyField.get().pos(), StringUtils.EMPTY_STRING);
+ record.overrideMetadataFieldValue(schema, new Properties(), keyField.get().pos(), StringUtils.EMPTY_STRING);
}
- return HoodieAvroUtils.indexedRecordToBytes(record);
+ return HoodieAvroUtils.recordToBytes(record, schema).get();
}
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
index afb448f844..ebe53fe471 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
@@ -28,9 +28,11 @@ import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.fs.inline.InLineFSUtils;
import org.apache.hudi.common.fs.inline.InLineFileSystem;
import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetReaderIterator;
import org.apache.hudi.io.storage.HoodieParquetConfig;
+import org.apache.hudi.io.storage.HoodieAvroFileReader.HoodieRecordTransformIterator;
import org.apache.hudi.io.storage.HoodieParquetStreamWriter;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
@@ -69,7 +71,7 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
}
public HoodieParquetDataBlock(
- @Nonnull List<IndexedRecord> records,
+ @Nonnull List<HoodieRecord> records,
@Nonnull Map<HeaderMetadataType, String> header,
@Nonnull String keyField,
@Nonnull CompressionCodecName compressionCodecName
@@ -85,7 +87,7 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
}
@Override
- protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
+ protected byte[] serializeRecords(List<HoodieRecord> records) throws IOException {
if (records.size() == 0) {
return new byte[0];
}
@@ -108,10 +110,10 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (FSDataOutputStream outputStream = new FSDataOutputStream(baos)) {
- try (HoodieParquetStreamWriter<IndexedRecord> parquetWriter = new HoodieParquetStreamWriter<>(outputStream, avroParquetConfig)) {
- for (IndexedRecord record : records) {
+ try (HoodieParquetStreamWriter parquetWriter = new HoodieParquetStreamWriter(outputStream, avroParquetConfig)) {
+ for (HoodieRecord record : records) {
String recordKey = getRecordKey(record).orElse(null);
- parquetWriter.writeAvro(recordKey, record);
+ parquetWriter.write(recordKey, record, writerSchema);
}
outputStream.flush();
}
@@ -137,7 +139,7 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
* requested by the caller (providing projected Reader's schema)
*/
@Override
- protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws IOException {
+ protected ClosableIterator<HoodieRecord> readRecordsFromBlockPayload(HoodieRecord.Mapper mapper) throws IOException {
HoodieLogBlockContentLocation blockContentLoc = getBlockContentLocation().get();
// NOTE: It's important to extend Hadoop configuration here to make sure configuration
@@ -151,14 +153,15 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
blockContentLoc.getContentPositionInLogFile(),
blockContentLoc.getBlockSize());
- return getProjectedParquetRecordsIterator(
+ ClosableIterator<IndexedRecord> iterator = getProjectedParquetRecordsIterator(
inlineConf,
readerSchema,
HadoopInputFile.fromPath(inlineLogFilePath, inlineConf));
+ return new HoodieRecordTransformIterator(iterator, mapper);
}
@Override
- protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) {
+ protected ClosableIterator<HoodieRecord> deserializeRecords(byte[] content, HoodieRecord.Mapper mapper) throws IOException {
throw new UnsupportedOperationException("Should not be invoked");
}
-}
\ No newline at end of file
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
index 4df30b115e..22dd2b7ee3 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
@@ -22,8 +22,10 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
import org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
@@ -257,10 +259,11 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
// TODO If we can store additional metadata in datablock, we can skip parsing records
// (such as startTime, endTime of records in the block)
- try (ClosableIterator<IndexedRecord> itr = avroBlock.getRecordIterator()) {
+ try (ClosableIterator<HoodieRecord> itr = avroBlock.getRecordIterator(HoodieAvroIndexedRecord::new)) {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
// Filter blocks in desired time window
- .filter(r -> commitsFilter.apply((GenericRecord) r))
+ .map(r -> (GenericRecord) ((HoodieAvroIndexedRecord) r).toIndexedRecord().get())
+ .filter(commitsFilter::apply)
.map(r -> readCommit((GenericRecord) r, loadInstantDetails))
.filter(c -> filter == null || filter.isInRange(c))
.forEach(instantsInRange::add);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
index 90d6e6ae90..0c411b1264 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
@@ -234,6 +234,15 @@ public class CollectionUtils {
return Collections.unmodifiableMap(map);
}
+ @SafeVarargs
+ public static <K,V> HashMap<K, V> createHashMap(final Pair<K, V>... elements) {
+ HashMap<K,V> map = new HashMap<>();
+ for (Pair<K,V> pair: elements) {
+ map.put(pair.getLeft(), pair.getRight());
+ }
+ return map;
+ }
+
@SafeVarargs
public static <T> Set<T> createImmutableSet(final T... elements) {
return Collections.unmodifiableSet(createSet(elements));
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/MappingIterator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/MappingIterator.java
new file mode 100644
index 0000000000..3ab80d9634
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/MappingIterator.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import java.util.function.Function;
+
+public class MappingIterator<T, R> implements ClosableIterator<R> {
+
+ private final ClosableIterator<T> sourceIterator;
+ private final Function<T, R> mapper;
+
+ public MappingIterator(ClosableIterator<T> sourceIterator, Function<T, R> mapper) {
+ this.sourceIterator = sourceIterator;
+ this.mapper = mapper;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return sourceIterator.hasNext();
+ }
+
+ @Override
+ public R next() {
+ return mapper.apply(sourceIterator.next());
+ }
+
+ @Override
+ public void close() {
+ sourceIterator.close();
+ }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReader.java
new file mode 100644
index 0000000000..f454fa28f1
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReader.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecord.Mapper;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.MappingIterator;
+import org.apache.hudi.common.util.Option;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+public interface HoodieAvroFileReader extends HoodieFileReader, AutoCloseable {
+
+ ClosableIterator<IndexedRecord> getRecordIterator(Schema readerSchema) throws IOException;
+
+ default Option<IndexedRecord> getRecordByKey(String key, Schema readerSchema) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ default ClosableIterator<IndexedRecord> getRecordsByKeysIterator(List<String> keys, Schema schema) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ default ClosableIterator<IndexedRecord> getRecordsByKeysIterator(List<String> keys) throws IOException {
+ return getRecordsByKeysIterator(keys, getSchema());
+ }
+
+ default ClosableIterator<IndexedRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema schema) throws IOException {
+ throw new UnsupportedEncodingException();
+ }
+
+ default ClosableIterator<IndexedRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes) throws IOException {
+ return getRecordsByKeyPrefixIterator(keyPrefixes, getSchema());
+ }
+
+ default ClosableIterator<HoodieRecord> getRecordsByKeysIterator(List<String> keys, Schema schema, HoodieRecord.Mapper mapper) throws IOException {
+ ClosableIterator<IndexedRecord> iterator = getRecordsByKeysIterator(keys, schema);
+ return new HoodieRecordTransformIterator(iterator, mapper);
+ }
+
+ default ClosableIterator<HoodieRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema schema, HoodieRecord.Mapper mapper) throws IOException {
+ ClosableIterator<IndexedRecord> iterator = getRecordsByKeyPrefixIterator(keyPrefixes, schema);
+ return new HoodieRecordTransformIterator(iterator, mapper);
+ }
+
+ @Override
+ default ClosableIterator<HoodieRecord> getRecordIterator(Schema schema, HoodieRecord.Mapper mapper) throws IOException {
+ return new MappingIterator<>(getRecordIterator(schema), mapper::apply);
+ }
+
+ @Override
+ default Option<HoodieRecord> getRecordByKey(String key, Schema readerSchema, HoodieRecord.Mapper mapper) throws IOException {
+ return getRecordByKey(key, readerSchema).map(mapper::apply);
+ }
+
+ class HoodieRecordTransformIterator implements ClosableIterator<HoodieRecord> {
+ private final ClosableIterator<IndexedRecord> dataIterator;
+ private final HoodieRecord.Mapper mapper;
+
+ public HoodieRecordTransformIterator(ClosableIterator<IndexedRecord> dataIterator, Mapper mapper) {
+ this.dataIterator = dataIterator;
+ this.mapper = mapper;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return dataIterator.hasNext();
+ }
+
+ @Override
+ public HoodieRecord next() {
+ return mapper.apply(dataIterator.next());
+ }
+
+ @Override
+ public void close() {
+ dataIterator.close();
+ }
+ }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriter.java
similarity index 60%
copy from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
copy to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriter.java
index cce59d3b66..947b83fe86 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriter.java
@@ -18,28 +18,42 @@
package org.apache.hudi.io.storage;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
-import java.io.IOException;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
-public interface HoodieFileWriter<R extends IndexedRecord> {
+import java.io.IOException;
+import java.util.Properties;
- void writeAvroWithMetadata(HoodieKey key, R newRecord) throws IOException;
+public interface HoodieAvroFileWriter extends HoodieFileWriter {
boolean canWrite();
void close() throws IOException;
- void writeAvro(String key, R oldRecord) throws IOException;
+ void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord) throws IOException;
+
+ void writeAvro(String recordKey, IndexedRecord record) throws IOException;
+
+ @Override
+ default void writeWithMetadata(HoodieKey key, HoodieRecord record, Schema schema, Properties props) throws IOException {
+ IndexedRecord avroPayload = (IndexedRecord)record.toIndexedRecord(schema, props).get();
+ writeAvroWithMetadata(key, avroPayload);
+ }
+
+ @Override
+ default void write(String recordKey, HoodieRecord record, Schema schema, Properties props) throws IOException {
+ IndexedRecord avroPayload = (IndexedRecord)record.toIndexedRecord(schema, props).get();
+ writeAvro(recordKey, avroPayload);
+ }
- default void prepRecordWithMetadata(HoodieKey key, R avroRecord, String instantTime, Integer partitionId, long recordIndex, String fileName) {
+ default void prepRecordWithMetadata(HoodieKey key, IndexedRecord avroRecord, String instantTime, Integer partitionId, long recordIndex, String fileName) {
String seqId = HoodieRecord.generateSequenceId(instantTime, partitionId, recordIndex);
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, key.getRecordKey(), key.getPartitionPath(), fileName);
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, instantTime, seqId);
- return;
}
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
similarity index 88%
rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
index 3e5b3ff6ac..6ac2a94932 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
@@ -53,8 +53,8 @@ import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
-import java.util.TreeSet;
import java.util.stream.Collectors;
+import java.util.TreeSet;
import static org.apache.hudi.common.util.CollectionUtils.toStream;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
@@ -64,7 +64,7 @@ import static org.apache.hudi.common.util.ValidationUtils.checkState;
* <p>
* {@link HoodieFileReader} implementation allowing to read from {@link HFile}.
*/
-public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileReader<R> {
+public class HoodieAvroHFileReader implements HoodieAvroFileReader {
// TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling
public static final String SCHEMA_KEY = "schema";
@@ -75,7 +75,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
public static final String KEY_MIN_RECORD = "minRecordKey";
public static final String KEY_MAX_RECORD = "maxRecordKey";
- private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class);
+ private static final Logger LOG = LogManager.getLogger(HoodieAvroHFileReader.class);
private final Path path;
@@ -90,21 +90,21 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
private final Object sharedScannerLock = new Object();
- public HoodieHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) throws IOException {
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) throws IOException {
this(path,
HoodieHFileUtils.createHFileReader(FSUtils.getFs(path.toString(), hadoopConf), path, cacheConfig, hadoopConf),
Option.empty());
}
- public HoodieHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig, hadoopConf), Option.empty());
}
- public HoodieHFileReader(FileSystem fs, Path dummyPath, byte[] content, Option<Schema> schemaOpt) throws IOException {
+ public HoodieAvroHFileReader(FileSystem fs, Path dummyPath, byte[] content, Option<Schema> schemaOpt) throws IOException {
this(null, HoodieHFileUtils.createHFileReader(fs, dummyPath, content), schemaOpt);
}
- public HoodieHFileReader(Path path, HFile.Reader reader, Option<Schema> schemaOpt) throws IOException {
+ public HoodieAvroHFileReader(Path path, HFile.Reader reader, Option<Schema> schemaOpt) throws IOException {
this.path = path;
this.reader = reader;
// For shared scanner, which is primarily used for point-lookups, we're caching blocks
@@ -171,38 +171,36 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
@SuppressWarnings("unchecked")
@Override
- public Option<R> getRecordByKey(String key, Schema readerSchema) throws IOException {
+ public Option<IndexedRecord> getRecordByKey(String key, Schema readerSchema) throws IOException {
synchronized (sharedScannerLock) {
- return (Option<R>) fetchRecordByKeyInternal(sharedScanner, key, getSchema(), readerSchema);
+ return fetchRecordByKeyInternal(sharedScanner, key, getSchema(), readerSchema);
}
}
- @SuppressWarnings("unchecked")
- @Override
- public ClosableIterator<R> getRecordIterator(Schema readerSchema) throws IOException {
+ public ClosableIterator<IndexedRecord> getRecordIterator(Schema readerSchema) throws IOException {
// TODO eval whether seeking scanner would be faster than pread
HFileScanner scanner = getHFileScanner(reader, false);
- return (ClosableIterator<R>) new RecordIterator(scanner, getSchema(), readerSchema);
+ return new RecordIterator(scanner, getSchema(), readerSchema);
}
@SuppressWarnings("unchecked")
@Override
- public ClosableIterator<R> getRecordsByKeysIterator(List<String> keys, Schema readerSchema) throws IOException {
+ public ClosableIterator<IndexedRecord> getRecordsByKeysIterator(List<String> keys, Schema readerSchema) throws IOException {
// We're caching blocks for this scanner to minimize amount of traffic
// to the underlying storage as we fetched (potentially) sparsely distributed
// keys
HFileScanner scanner = getHFileScanner(reader, true);
- return (ClosableIterator<R>) new RecordByKeyIterator(scanner, keys, getSchema(), readerSchema);
+ return new RecordByKeyIterator(scanner, keys, getSchema(), readerSchema);
}
@SuppressWarnings("unchecked")
@Override
- public ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema readerSchema) throws IOException {
+ public ClosableIterator<IndexedRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema readerSchema) throws IOException {
// We're caching blocks for this scanner to minimize amount of traffic
// to the underlying storage as we fetched (potentially) sparsely distributed
// keys
HFileScanner scanner = getHFileScanner(reader, true);
- return (ClosableIterator<R>) new RecordByKeyPrefixIterator(scanner, keyPrefixes, getSchema(), readerSchema);
+ return new RecordByKeyPrefixIterator(scanner, keyPrefixes, getSchema(), readerSchema);
}
@Override
@@ -227,7 +225,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
return keyScanner.seekTo(kv) == 0;
}
- private static Iterator<GenericRecord> getRecordByKeyPrefixIteratorInternal(HFileScanner scanner,
+ private static Iterator<IndexedRecord> getRecordByKeyPrefixIteratorInternal(HFileScanner scanner,
String keyPrefix,
Schema writerSchema,
Schema readerSchema) throws IOException {
@@ -264,8 +262,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
scanner.seekTo();
}
- class KeyPrefixIterator implements Iterator<GenericRecord> {
- private GenericRecord next = null;
+ class KeyPrefixIterator implements Iterator<IndexedRecord> {
+ private IndexedRecord next = null;
private boolean eof = false;
@Override
@@ -298,8 +296,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
@Override
- public GenericRecord next() {
- GenericRecord next = this.next;
+ public IndexedRecord next() {
+ IndexedRecord next = this.next;
this.next = null;
return next;
}
@@ -308,7 +306,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
return new KeyPrefixIterator();
}
- private static Option<GenericRecord> fetchRecordByKeyInternal(HFileScanner scanner, String key, Schema writerSchema, Schema readerSchema) throws IOException {
+ private static Option<IndexedRecord> fetchRecordByKeyInternal(HFileScanner scanner, String key, Schema writerSchema, Schema readerSchema) throws IOException {
KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
if (scanner.seekTo(kv) != 0) {
return Option.empty();
@@ -372,7 +370,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
* <p>
* Reads all the records with given schema
*/
- public static <R extends IndexedRecord> List<R> readAllRecords(HoodieHFileReader<R> reader) throws IOException {
+ public static List<IndexedRecord> readAllRecords(HoodieAvroHFileReader reader) throws IOException {
Schema schema = reader.getSchema();
return toStream(reader.getRecordIterator(schema))
.collect(Collectors.toList());
@@ -383,7 +381,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
* <p>
* Reads all the records with given schema and filtering keys.
*/
- public static <R extends IndexedRecord> List<R> readRecords(HoodieHFileReader<R> reader,
+ public static List<IndexedRecord> readRecords(HoodieAvroHFileReader reader,
List<String> keys) throws IOException {
return readRecords(reader, keys, reader.getSchema());
}
@@ -393,7 +391,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
* <p>
* Reads all the records with given schema and filtering keys.
*/
- public static <R extends IndexedRecord> List<R> readRecords(HoodieHFileReader<R> reader,
+ public static List<IndexedRecord> readRecords(HoodieAvroHFileReader reader,
List<String> keys,
Schema schema) throws IOException {
Collections.sort(keys);
@@ -411,16 +409,16 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
return Option.ofNullable(schema.getField(KEY_FIELD_NAME));
}
- private static class RecordByKeyPrefixIterator implements ClosableIterator<GenericRecord> {
+ private static class RecordByKeyPrefixIterator implements ClosableIterator<IndexedRecord> {
private final Iterator<String> keyPrefixesIterator;
- private Iterator<GenericRecord> recordsIterator;
+ private Iterator<IndexedRecord> recordsIterator;
private final HFileScanner scanner;
private final Schema writerSchema;
private final Schema readerSchema;
- private GenericRecord next = null;
+ private IndexedRecord next = null;
RecordByKeyPrefixIterator(HFileScanner scanner, List<String> keyPrefixes, Schema writerSchema, Schema readerSchema) throws IOException {
this.keyPrefixesIterator = keyPrefixes.iterator();
@@ -456,8 +454,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
@Override
- public GenericRecord next() {
- GenericRecord next = this.next;
+ public IndexedRecord next() {
+ IndexedRecord next = this.next;
this.next = null;
return next;
}
@@ -468,7 +466,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
}
- private static class RecordByKeyIterator implements ClosableIterator<GenericRecord> {
+ private static class RecordByKeyIterator implements ClosableIterator<IndexedRecord> {
private final Iterator<String> keyIterator;
private final HFileScanner scanner;
@@ -476,7 +474,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
private final Schema readerSchema;
private final Schema writerSchema;
- private GenericRecord next = null;
+ private IndexedRecord next = null;
RecordByKeyIterator(HFileScanner scanner, List<String> keys, Schema writerSchema, Schema readerSchema) throws IOException {
this.keyIterator = keys.iterator();
@@ -497,7 +495,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
while (keyIterator.hasNext()) {
- Option<GenericRecord> value = fetchRecordByKeyInternal(scanner, keyIterator.next(), writerSchema, readerSchema);
+ Option<IndexedRecord> value = fetchRecordByKeyInternal(scanner, keyIterator.next(), writerSchema, readerSchema);
if (value.isPresent()) {
next = value.get();
return true;
@@ -510,8 +508,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
@Override
- public GenericRecord next() {
- GenericRecord next = this.next;
+ public IndexedRecord next() {
+ IndexedRecord next = this.next;
this.next = null;
return next;
}
@@ -522,13 +520,13 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
}
- private static class RecordIterator implements ClosableIterator<GenericRecord> {
+ private static class RecordIterator implements ClosableIterator<IndexedRecord> {
private final HFileScanner scanner;
private final Schema writerSchema;
private final Schema readerSchema;
- private GenericRecord next = null;
+ private IndexedRecord next = null;
RecordIterator(HFileScanner scanner, Schema writerSchema, Schema readerSchema) {
this.scanner = scanner;
@@ -563,8 +561,8 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
}
@Override
- public GenericRecord next() {
- GenericRecord next = this.next;
+ public IndexedRecord next() {
+ IndexedRecord next = this.next;
this.next = null;
return next;
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcReader.java
similarity index 92%
rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcReader.java
index 5431bf3782..8f953d25b5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieOrcReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcReader.java
@@ -38,12 +38,12 @@ import org.apache.orc.TypeDescription;
import java.io.IOException;
import java.util.Set;
-public class HoodieOrcReader<R extends IndexedRecord> implements HoodieFileReader {
+public class HoodieAvroOrcReader implements HoodieAvroFileReader {
private Path path;
private Configuration conf;
private final BaseFileUtils orcUtils;
- public HoodieOrcReader(Configuration configuration, Path path) {
+ public HoodieAvroOrcReader(Configuration configuration, Path path) {
this.conf = configuration;
this.path = path;
this.orcUtils = BaseFileUtils.getInstance(HoodieFileFormat.ORC);
@@ -65,7 +65,7 @@ public class HoodieOrcReader<R extends IndexedRecord> implements HoodieFileReade
}
@Override
- public ClosableIterator<R> getRecordIterator(Schema schema) throws IOException {
+ public ClosableIterator<IndexedRecord> getRecordIterator(Schema schema) throws IOException {
try {
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
TypeDescription orcSchema = AvroOrcUtils.createOrcSchema(schema);
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetReader.java
similarity index 85%
rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetReader.java
index 804e4354c7..83ffaf589f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetReader.java
@@ -36,14 +36,14 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Set;
-public class HoodieParquetReader<R extends IndexedRecord> implements HoodieFileReader<R> {
-
+public class HoodieAvroParquetReader implements HoodieAvroFileReader {
+
private final Path path;
private final Configuration conf;
private final BaseFileUtils parquetUtils;
private List<ParquetReaderIterator> readerIterators = new ArrayList<>();
- public HoodieParquetReader(Configuration configuration, Path path) {
+ public HoodieAvroParquetReader(Configuration configuration, Path path) {
this.conf = configuration;
this.path = path;
this.parquetUtils = BaseFileUtils.getInstance(HoodieFileFormat.PARQUET);
@@ -65,10 +65,10 @@ public class HoodieParquetReader<R extends IndexedRecord> implements HoodieFileR
}
@Override
- public ClosableIterator<R> getRecordIterator(Schema schema) throws IOException {
+ public ClosableIterator<IndexedRecord> getRecordIterator(Schema schema) throws IOException {
AvroReadSupport.setAvroReadSchema(conf, schema);
- ParquetReader<R> reader = AvroParquetReader.<R>builder(path).withConf(conf).build();
- ParquetReaderIterator<R> parquetReaderIterator = new ParquetReaderIterator<>(reader);
+ ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(path).withConf(conf).build();
+ ParquetReaderIterator<IndexedRecord> parquetReaderIterator = new ParquetReaderIterator<>(reader);
readerIterators.add(parquetReaderIterator);
return parquetReaderIterator;
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
index 6490425c42..175722bb9b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
@@ -19,8 +19,8 @@
package org.apache.hudi.io.storage;
import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.Option;
@@ -29,7 +29,7 @@ import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Set;
-public interface HoodieFileReader<R extends IndexedRecord> extends AutoCloseable {
+public interface HoodieFileReader extends AutoCloseable {
String[] readMinMaxRecordKeys();
@@ -37,34 +37,34 @@ public interface HoodieFileReader<R extends IndexedRecord> extends AutoCloseable
Set<String> filterRowKeys(Set<String> candidateRowKeys);
- ClosableIterator<R> getRecordIterator(Schema readerSchema) throws IOException;
+ ClosableIterator<HoodieRecord> getRecordIterator(Schema readerSchema, HoodieRecord.Mapper mapper) throws IOException;
- default ClosableIterator<R> getRecordIterator() throws IOException {
- return getRecordIterator(getSchema());
+ default ClosableIterator<HoodieRecord> getRecordIterator(HoodieRecord.Mapper mapper) throws IOException {
+ return getRecordIterator(getSchema(), mapper);
}
- default Option<R> getRecordByKey(String key, Schema readerSchema) throws IOException {
+ default Option<HoodieRecord> getRecordByKey(String key, Schema readerSchema, HoodieRecord.Mapper mapper) throws IOException {
throw new UnsupportedOperationException();
}
- default Option<R> getRecordByKey(String key) throws IOException {
- return getRecordByKey(key, getSchema());
+ default Option<HoodieRecord> getRecordByKey(String key, HoodieRecord.Mapper mapper) throws IOException {
+ return getRecordByKey(key, getSchema(), mapper);
}
- default ClosableIterator<R> getRecordsByKeysIterator(List<String> keys, Schema schema) throws IOException {
+ default ClosableIterator<HoodieRecord> getRecordsByKeysIterator(List<String> keys, Schema schema, HoodieRecord.Mapper mapper) throws IOException {
throw new UnsupportedOperationException();
}
- default ClosableIterator<R> getRecordsByKeysIterator(List<String> keys) throws IOException {
- return getRecordsByKeysIterator(keys, getSchema());
+ default ClosableIterator<HoodieRecord> getRecordsByKeysIterator(List<String> keys, HoodieRecord.Mapper mapper) throws IOException {
+ return getRecordsByKeysIterator(keys, getSchema(), mapper);
}
- default ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema schema) throws IOException {
+ default ClosableIterator<HoodieRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, Schema schema, HoodieRecord.Mapper mapper) throws IOException {
throw new UnsupportedEncodingException();
}
- default ClosableIterator<R> getRecordsByKeyPrefixIterator(List<String> keyPrefixes) throws IOException {
- return getRecordsByKeyPrefixIterator(keyPrefixes, getSchema());
+ default ClosableIterator<HoodieRecord> getRecordsByKeyPrefixIterator(List<String> keyPrefixes, HoodieRecord.Mapper mapper) throws IOException {
+ return getRecordsByKeyPrefixIterator(keyPrefixes, getSchema(), mapper);
}
Schema getSchema();
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
index f913df7e15..d2e35eb450 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java
@@ -18,22 +18,20 @@
package org.apache.hudi.io.storage;
-import org.apache.hudi.common.fs.FSUtils;
-
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hudi.common.fs.FSUtils;
import java.io.IOException;
+import static org.apache.hudi.common.model.HoodieFileFormat.HFILE;
import static org.apache.hudi.common.model.HoodieFileFormat.ORC;
import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
-import static org.apache.hudi.common.model.HoodieFileFormat.HFILE;
public class HoodieFileReaderFactory {
- public static <R extends IndexedRecord> HoodieFileReader<R> getFileReader(Configuration conf, Path path) throws IOException {
+ public static HoodieAvroFileReader getFileReader(Configuration conf, Path path) throws IOException {
final String extension = FSUtils.getFileExtension(path.toString());
if (PARQUET.getFileExtension().equals(extension)) {
return newParquetFileReader(conf, path);
@@ -48,16 +46,16 @@ public class HoodieFileReaderFactory {
throw new UnsupportedOperationException(extension + " format not supported yet.");
}
- private static <R extends IndexedRecord> HoodieFileReader<R> newParquetFileReader(Configuration conf, Path path) {
- return new HoodieParquetReader<>(conf, path);
+ private static HoodieAvroFileReader newParquetFileReader(Configuration conf, Path path) {
+ return new HoodieAvroParquetReader(conf, path);
}
- private static <R extends IndexedRecord> HoodieFileReader<R> newHFileFileReader(Configuration conf, Path path) throws IOException {
+ private static HoodieAvroFileReader newHFileFileReader(Configuration conf, Path path) throws IOException {
CacheConfig cacheConfig = new CacheConfig(conf);
- return new HoodieHFileReader<>(conf, path, cacheConfig);
+ return new HoodieAvroHFileReader(conf, path, cacheConfig);
}
- private static <R extends IndexedRecord> HoodieFileReader<R> newOrcFileReader(Configuration conf, Path path) {
- return new HoodieOrcReader<>(conf, path);
+ private static HoodieAvroFileReader newOrcFileReader(Configuration conf, Path path) {
+ return new HoodieAvroOrcReader(conf, path);
}
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
similarity index 56%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
index cce59d3b66..d0f2ef0251 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java
@@ -18,28 +18,28 @@
package org.apache.hudi.io.storage;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.avro.Schema;
+
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import java.io.IOException;
+import java.util.Properties;
-public interface HoodieFileWriter<R extends IndexedRecord> {
+public interface HoodieFileWriter {
+ boolean canWrite();
- void writeAvroWithMetadata(HoodieKey key, R newRecord) throws IOException;
+ void writeWithMetadata(HoodieKey key, HoodieRecord record, Schema schema, Properties props) throws IOException;
- boolean canWrite();
+ void write(String recordKey, HoodieRecord record, Schema schema, Properties props) throws IOException;
void close() throws IOException;
- void writeAvro(String key, R oldRecord) throws IOException;
+ default void writeWithMetadata(HoodieKey key, HoodieRecord record, Schema schema) throws IOException {
+ writeWithMetadata(key, record, schema, new Properties());
+ }
- default void prepRecordWithMetadata(HoodieKey key, R avroRecord, String instantTime, Integer partitionId, long recordIndex, String fileName) {
- String seqId = HoodieRecord.generateSequenceId(instantTime, partitionId, recordIndex);
- HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, key.getRecordKey(), key.getPartitionPath(), fileName);
- HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, instantTime, seqId);
- return;
+ default void write(String recordKey, HoodieRecord record, Schema schema) throws IOException {
+ write(recordKey, record, schema, new Properties());
}
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java
index 878a3c563b..7e888842e6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java
@@ -70,7 +70,7 @@ public class HoodieHFileUtils {
// Avoid loading default configs, from the FS, since this configuration is mostly
// used as a stub to initialize HFile reader
Configuration conf = new Configuration(false);
- HoodieHFileReader.SeekableByteArrayInputStream bis = new HoodieHFileReader.SeekableByteArrayInputStream(content);
+ HoodieAvroHFileReader.SeekableByteArrayInputStream bis = new HoodieAvroHFileReader.SeekableByteArrayInputStream(content);
FSDataInputStream fsdis = new FSDataInputStream(bis);
FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis);
ReaderContext context = new ReaderContextBuilder()
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
index c8f78c3501..91a202308d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
+import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.parquet.io.OutputStreamBackedOutputFile;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
@@ -32,15 +33,15 @@ import org.apache.parquet.io.OutputFile;
import java.io.IOException;
// TODO(HUDI-3035) unify w/ HoodieParquetWriter
-public class HoodieParquetStreamWriter<R extends IndexedRecord> implements AutoCloseable {
+public class HoodieParquetStreamWriter implements HoodieAvroFileWriter, AutoCloseable {
- private final ParquetWriter<R> writer;
+ private final ParquetWriter<IndexedRecord> writer;
private final HoodieAvroWriteSupport writeSupport;
public HoodieParquetStreamWriter(FSDataOutputStream outputStream,
HoodieParquetConfig<HoodieAvroWriteSupport> parquetConfig) throws IOException {
this.writeSupport = parquetConfig.getWriteSupport();
- this.writer = new Builder<R>(new OutputStreamBackedOutputFile(outputStream), writeSupport)
+ this.writer = new Builder<IndexedRecord>(new OutputStreamBackedOutputFile(outputStream), writeSupport)
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withCompressionCodec(parquetConfig.getCompressionCodecName())
.withRowGroupSize(parquetConfig.getBlockSize())
@@ -52,11 +53,23 @@ public class HoodieParquetStreamWriter<R extends IndexedRecord> implements AutoC
.build();
}
- public void writeAvro(String key, R object) throws IOException {
- writer.write(object);
+ @Override
+ public boolean canWrite() {
+ return true;
+ }
+
+ @Override
+ public void writeAvro(String key, IndexedRecord record) throws IOException {
+ writer.write(record);
writeSupport.add(key);
}
+ @Override
+ public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord) throws IOException {
+ // TODO support populating the metadata
+ this.writeAvro(key.getRecordKey(), avroRecord);
+ }
+
@Override
public void close() throws IOException {
writer.close();
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index cb9fb8da14..2d94a13c0f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -32,6 +32,7 @@ import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableConfig;
@@ -49,7 +50,7 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.exception.TableNotFoundException;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.avro.Schema;
@@ -96,7 +97,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
private final boolean reuse;
// Readers for the latest file slice corresponding to file groups in the metadata partition
- private Map<Pair<String, String>, Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader>> partitionReaders =
+ private Map<Pair<String, String>, Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader>> partitionReaders =
new ConcurrentHashMap<>();
public HoodieBackedTableMetadata(HoodieEngineContext engineContext, HoodieMetadataConfig metadataConfig,
@@ -157,18 +158,18 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
List<FileSlice> partitionFileSlices =
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, partitionName);
- return (shouldLoadInMemory ? HoodieListData.lazy(partitionFileSlices) : engineContext.parallelize(partitionFileSlices))
- .flatMap((SerializableFunction<FileSlice, Iterator<HoodieRecord<HoodieMetadataPayload>>>) fileSlice -> {
- // NOTE: Since this will be executed by executors, we can't access previously cached
- // readers, and therefore have to always open new ones
- Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
- openReaders(partitionName, fileSlice);
+ return engineContext.parallelize(partitionFileSlices)
+ .flatMap(
+ (SerializableFunction<FileSlice, Iterator<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>>>) fileSlice -> {
+ // NOTE: Since this will be executed by executors, we can't access previously cached
+ // readers, and therefore have to always open new ones
+ Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> readers =
+ openReaders(partitionName, fileSlice);
+ try {
+ List<Long> timings = new ArrayList<>();
- try {
- List<Long> timings = new ArrayList<>();
-
- HoodieFileReader baseFileReader = readers.getKey();
- HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
+ HoodieAvroFileReader baseFileReader = readers.getKey();
+ HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
// TODO: what do we do if both does not exist? should we throw an exception and let caller do the fallback ?
@@ -208,11 +209,11 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
AtomicInteger fileSlicesKeysCount = new AtomicInteger();
partitionFileSliceToKeysMap.forEach((partitionFileSlicePair, fileSliceKeys) -> {
- Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
+ Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> readers =
getOrCreateReaders(partitionName, partitionFileSlicePair.getRight());
try {
List<Long> timings = new ArrayList<>();
- HoodieFileReader baseFileReader = readers.getKey();
+ HoodieAvroFileReader baseFileReader = readers.getKey();
HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
return;
@@ -279,7 +280,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
return logRecords;
}
- private List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> readFromBaseAndMergeWithLogRecords(HoodieFileReader baseFileReader,
+ private List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> readFromBaseAndMergeWithLogRecords(HoodieAvroFileReader baseFileReader,
List<String> keys,
boolean fullKeys,
Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords,
@@ -340,17 +341,20 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
}
}
- private Map<String, HoodieRecord<HoodieMetadataPayload>> fetchBaseFileRecordsByKeys(HoodieFileReader<GenericRecord> baseFileReader,
+ private Map<String, HoodieRecord<HoodieMetadataPayload>> fetchBaseFileRecordsByKeys(HoodieAvroFileReader baseFileReader,
List<String> keys,
boolean fullKeys,
String partitionName) throws IOException {
- ClosableIterator<GenericRecord> records = fullKeys ? baseFileReader.getRecordsByKeysIterator(keys)
- : baseFileReader.getRecordsByKeyPrefixIterator(keys);
+ ClosableIterator<HoodieRecord> records = fullKeys ? baseFileReader.getRecordsByKeysIterator(keys, HoodieAvroIndexedRecord::new)
+ : baseFileReader.getRecordsByKeyPrefixIterator(keys, HoodieAvroIndexedRecord::new);
return toStream(records)
- .map(record -> Pair.of(
- (String) record.get(HoodieMetadataPayload.KEY_FIELD_NAME),
- composeRecord(record, partitionName)))
+ .map(record -> {
+ GenericRecord data = (GenericRecord) record.getData();
+ return Pair.of(
+ (String) (data).get(HoodieMetadataPayload.KEY_FIELD_NAME),
+ composeRecord(data, partitionName));
+ })
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
}
@@ -397,7 +401,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
* @param slice - The file slice to open readers for
* @return File reader and the record scanner pair for the requested file slice
*/
- private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> getOrCreateReaders(String partitionName, FileSlice slice) {
+ private Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> getOrCreateReaders(String partitionName, FileSlice slice) {
if (reuse) {
return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> {
return openReaders(partitionName, slice); });
@@ -406,12 +410,12 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
}
}
- private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReaders(String partitionName, FileSlice slice) {
+ private Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> openReaders(String partitionName, FileSlice slice) {
try {
HoodieTimer timer = new HoodieTimer().startTimer();
// Open base file reader
- Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
- HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
+ Pair<HoodieAvroFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
+ HoodieAvroFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
// Open the log record scanner using the log files from the latest file slice
@@ -429,8 +433,8 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
}
}
- private Pair<HoodieFileReader, Long> getBaseFileReader(FileSlice slice, HoodieTimer timer) throws IOException {
- HoodieFileReader baseFileReader = null;
+ private Pair<HoodieAvroFileReader, Long> getBaseFileReader(FileSlice slice, HoodieTimer timer) throws IOException {
+ HoodieAvroFileReader baseFileReader = null;
Long baseFileOpenMs;
// If the base file is present then create a reader
Option<HoodieBaseFile> basefile = slice.getBaseFile();
@@ -567,7 +571,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
* @param partitionFileSlicePair - Partition and FileSlice
*/
private synchronized void close(Pair<String, String> partitionFileSlicePair) {
- Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
+ Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> readers =
partitionReaders.remove(partitionFileSlicePair);
closeReader(readers);
}
@@ -582,7 +586,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
partitionReaders.clear();
}
- private void closeReader(Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers) {
+ private void closeReader(Pair<HoodieAvroFileReader, HoodieMetadataMergedLogRecordReader> readers) {
if (readers != null) {
try {
if (readers.getKey() != null) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index 0575177800..ca77c98c12 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -54,7 +54,7 @@ import org.apache.hudi.common.util.hash.FileIndexID;
import org.apache.hudi.common.util.hash.PartitionIndexID;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.hadoop.CachingPath;
-import org.apache.hudi.io.storage.HoodieHFileReader;
+import org.apache.hudi.io.storage.HoodieAvroHFileReader;
import org.apache.hudi.util.Lazy;
import java.io.IOException;
@@ -120,7 +120,7 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
protected static final int METADATA_TYPE_BLOOM_FILTER = 4;
// HoodieMetadata schema field ids
- public static final String KEY_FIELD_NAME = HoodieHFileReader.KEY_FIELD_NAME;
+ public static final String KEY_FIELD_NAME = HoodieAvroHFileReader.KEY_FIELD_NAME;
public static final String SCHEMA_FIELD_NAME_TYPE = "type";
public static final String SCHEMA_FIELD_NAME_METADATA = "filesystemMetadata";
public static final String SCHEMA_FIELD_ID_COLUMN_STATS = "ColumnStatsMetadata";
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index 1fb872f683..20938b38d8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -53,7 +53,7 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.util.Lazy;
@@ -417,7 +417,7 @@ public class HoodieTableMetadataUtil {
}
final Path writeFilePath = new Path(recordsGenerationParams.getDataMetaClient().getBasePath(), pathWithPartition);
- try (HoodieFileReader<IndexedRecord> fileReader =
+ try (HoodieAvroFileReader fileReader =
HoodieFileReaderFactory.getFileReader(recordsGenerationParams.getDataMetaClient().getHadoopConf(), writeFilePath)) {
try {
final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
@@ -877,7 +877,7 @@ public class HoodieTableMetadataUtil {
}
final String pathWithPartition = partitionName + "/" + appendedFile;
final Path appendedFilePath = new Path(recordsGenerationParams.getDataMetaClient().getBasePath(), pathWithPartition);
- try (HoodieFileReader<IndexedRecord> fileReader =
+ try (HoodieAvroFileReader fileReader =
HoodieFileReaderFactory.getFileReader(recordsGenerationParams.getDataMetaClient().getHadoopConf(), appendedFilePath)) {
final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
if (fileBloomFilter == null) {
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
index f87e5a41b8..237dd9fd45 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -35,8 +35,10 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.DeleteRecord;
import org.apache.hudi.common.model.HoodieArchivedLogFile;
import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode;
import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
@@ -1169,7 +1171,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
scanner.forEach(s -> {
try {
- if (!s.getData().getInsertValue(schema).isPresent()) {
+ if (!((HoodieRecordPayload)s.getData()).getInsertValue(schema).isPresent()) {
emptyPayloads.add(true);
}
} catch (IOException io) {
@@ -1310,7 +1312,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
scanner.forEach(s -> readKeys.add(s.getRecordKey()));
scanner.forEach(s -> {
try {
- if (!s.getData().getInsertValue(schema).isPresent()) {
+ if (!((HoodieRecordPayload)s.getData()).getInsertValue(schema).isPresent()) {
emptyPayloadKeys.add(s.getRecordKey());
}
} catch (IOException io) {
@@ -2031,7 +2033,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
List<IndexedRecord> recordsCopy = new ArrayList<>(records);
assertEquals(100, records.size());
assertEquals(100, recordsCopy.size());
- HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, schema);
+ HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()), schema);
byte[] content = dataBlock.getBytes(schema);
assertTrue(content.length > 0);
@@ -2118,10 +2120,10 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<IndexedRecord> records,
Map<HeaderMetadataType, String> header) {
- return getDataBlock(dataBlockType, records, header, new Path("dummy_path"));
+ return getDataBlock(dataBlockType, records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()), header, new Path("dummy_path"));
}
- private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<IndexedRecord> records,
+ private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<HoodieRecord> records,
Map<HeaderMetadataType, String> header, Path pathForReader) {
switch (dataBlockType) {
case CDC_DATA_BLOCK:
@@ -2155,10 +2157,10 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
* Utility to convert the given iterator to a List.
*/
private static List<IndexedRecord> getRecords(HoodieDataBlock dataBlock) {
- ClosableIterator<IndexedRecord> itr = dataBlock.getRecordIterator();
+ ClosableIterator<HoodieRecord> itr = dataBlock.getRecordIterator(HoodieAvroIndexedRecord::new);
List<IndexedRecord> elements = new ArrayList<>();
- itr.forEachRemaining(elements::add);
+ itr.forEachRemaining(r -> elements.add((IndexedRecord) r.getData()));
return elements;
}
}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
index 6c4d69a05b..77a01bd024 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
@@ -19,6 +19,7 @@
package org.apache.hudi.common.functional;
import org.apache.hudi.common.model.HoodieArchivedLogFile;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
@@ -28,7 +29,6 @@ import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.common.testutils.minicluster.MiniClusterUtil;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
@@ -52,6 +52,7 @@ import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
@@ -101,7 +102,7 @@ public class TestHoodieLogFormatAppendFailure {
fs.mkdirs(testPath);
// Some data & append.
- List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10);
+ List<HoodieRecord> records = SchemaTestUtil.generateTestRecords(0, 10).stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(2);
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieFileReaderFactory.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java
similarity index 78%
rename from hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieFileReaderFactory.java
rename to hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java
index ec334bde1e..f049033688 100644
--- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieFileReaderFactory.java
+++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java
@@ -18,7 +18,6 @@
package org.apache.hudi.io.storage;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Test;
@@ -32,7 +31,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Tests for {@link HoodieFileReaderFactory}.
*/
-public class TestHoodieFileReaderFactory {
+public class TestHoodieAvroFileReaderFactory {
@TempDir
public java.nio.file.Path tempDir;
@@ -41,19 +40,19 @@ public class TestHoodieFileReaderFactory {
// parquet file format.
final Configuration hadoopConf = new Configuration();
final Path parquetPath = new Path("/partition/path/f1_1-0-1_000.parquet");
- HoodieFileReader<IndexedRecord> parquetReader = HoodieFileReaderFactory.getFileReader(hadoopConf, parquetPath);
- assertTrue(parquetReader instanceof HoodieParquetReader);
+ HoodieAvroFileReader parquetReader = HoodieFileReaderFactory.getFileReader(hadoopConf, parquetPath);
+ assertTrue(parquetReader instanceof HoodieAvroParquetReader);
// log file format.
final Path logPath = new Path("/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
- HoodieFileReader<IndexedRecord> logWriter = HoodieFileReaderFactory.getFileReader(hadoopConf, logPath);
+ HoodieAvroFileReader logWriter = HoodieFileReaderFactory.getFileReader(hadoopConf, logPath);
}, "should fail since log storage reader is not supported yet.");
assertTrue(thrown.getMessage().contains("format not supported yet."));
// Orc file format.
final Path orcPath = new Path("/partition/path/f1_1-0-1_000.orc");
- HoodieFileReader<IndexedRecord> orcReader = HoodieFileReaderFactory.getFileReader(hadoopConf, orcPath);
- assertTrue(orcReader instanceof HoodieOrcReader);
+ HoodieAvroFileReader orcReader = HoodieFileReaderFactory.getFileReader(hadoopConf, orcPath);
+ assertTrue(orcReader instanceof HoodieAvroOrcReader);
}
}
diff --git a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
index 67691a3ec7..7fc93c776f 100644
--- a/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
+++ b/hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestQuickstartData.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.examples.quickstart.utils.QuickstartConfigurations;
import org.apache.parquet.Strings;
@@ -322,7 +323,7 @@ public class TestQuickstartData {
.map(hoodieRecord -> {
try {
// in case it is a delete
- GenericRecord record = (GenericRecord) hoodieRecord.getData()
+ GenericRecord record = (GenericRecord) ((HoodieAvroRecord)hoodieRecord).getData()
.getInsertValue(schema, new Properties())
.orElse(null);
return record == null ? (String) null : filterOutVariables(record);
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
index e30a3577f0..7e1aa7217c 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
@@ -38,7 +38,7 @@ import org.apache.hudi.configuration.OptionsResolver;
import org.apache.hudi.exception.HoodieClusteringException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.IOUtils;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.sink.bulk.BulkInsertWriterHelper;
import org.apache.hudi.sink.bulk.sort.SortOperatorGen;
@@ -80,6 +80,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Properties;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Collectors;
@@ -251,7 +252,7 @@ public class ClusteringOperator extends TableStreamOperator<ClusteringCommitEven
for (ClusteringOperation clusteringOp : clusteringOps) {
try {
- Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
+ Option<HoodieAvroFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath())
? Option.empty()
: Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
@@ -278,7 +279,7 @@ public class ClusteringOperator extends TableStreamOperator<ClusteringCommitEven
recordIterators.add(StreamSupport.stream(Spliterators.spliteratorUnknownSize(hoodieFileSliceReader, Spliterator.NONNULL), false).map(hoodieRecord -> {
try {
- return this.transform((IndexedRecord) hoodieRecord.getData().getInsertValue(readerSchema).get());
+ return this.transform(hoodieRecord.toIndexedRecord(readerSchema, new Properties()).get());
} catch (IOException e) {
throw new HoodieIOException("Failed to read next record", e);
}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java
index 53ccb7413f..4fa62e79ea 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java
@@ -18,11 +18,8 @@
package org.apache.hudi.hadoop;
-import java.io.IOException;
-import java.util.Iterator;
-
import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -34,20 +31,23 @@ import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
-import org.apache.hudi.io.storage.HoodieHFileReader;
+import org.apache.hudi.io.storage.HoodieAvroHFileReader;
+
+import java.io.IOException;
+import java.util.Iterator;
public class HoodieHFileRecordReader implements RecordReader<NullWritable, ArrayWritable> {
private long count = 0;
private ArrayWritable valueObj;
- private HoodieHFileReader reader;
- private Iterator<GenericRecord> recordIterator;
+ private HoodieAvroHFileReader reader;
+ private Iterator<IndexedRecord> recordIterator;
private Schema schema;
public HoodieHFileRecordReader(Configuration conf, InputSplit split, JobConf job) throws IOException {
FileSplit fileSplit = (FileSplit) split;
Path path = fileSplit.getPath();
- reader = new HoodieHFileReader(conf, path, new CacheConfig(conf));
+ reader = new HoodieAvroHFileReader(conf, path, new CacheConfig(conf));
schema = reader.getSchema();
valueObj = new ArrayWritable(Writable.class, new Writable[schema.getFields().size()]);
@@ -63,7 +63,7 @@ public class HoodieHFileRecordReader implements RecordReader<NullWritable, Array
return false;
}
- GenericRecord record = recordIterator.next();
+ IndexedRecord record = recordIterator.next();
ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(record, schema);
value.set(aWritable.get());
count++;
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java
index e485e72c25..ae550cb335 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java
@@ -18,19 +18,20 @@
package org.apache.hudi.hadoop;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
public class InputSplitUtils {
public static void writeString(String str, DataOutput out) throws IOException {
@@ -63,7 +64,7 @@ public class InputSplitUtils {
public static Schema getBaseFileSchema(FileSplit split, Configuration conf) {
try {
if (split instanceof BootstrapBaseFileSplit) {
- HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf,
+ HoodieAvroFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf,
((BootstrapBaseFileSplit)(split)).getBootstrapFileSplit().getPath());
return HoodieAvroUtils.addMetadataFields(storageReader.getSchema());
}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
index b917f004bc..4fa3781aea 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
@@ -27,9 +27,7 @@ import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
@@ -50,7 +48,7 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
private static final Logger LOG = LogManager.getLogger(AbstractRealtimeRecordReader.class);
protected final RecordReader<NullWritable, ArrayWritable> parquetReader;
- private final Map<String, HoodieRecord<? extends HoodieRecordPayload>> deltaRecordMap;
+ private final Map<String, HoodieRecord> deltaRecordMap;
private final Set<String> deltaRecordKeys;
private final HoodieMergedLogRecordScanner mergedLogRecordScanner;
@@ -96,9 +94,9 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
private Option<GenericRecord> buildGenericRecordwithCustomPayload(HoodieRecord record) throws IOException {
if (usesCustomPayload) {
- return ((HoodieAvroRecord) record).getData().getInsertValue(getWriterSchema(), payloadProps);
+ return record.toIndexedRecord(getWriterSchema(), payloadProps);
} else {
- return ((HoodieAvroRecord) record).getData().getInsertValue(getReaderSchema(), payloadProps);
+ return record.toIndexedRecord(getReaderSchema(), payloadProps);
}
}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
index 84c8088650..e418bebb50 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
@@ -92,7 +92,7 @@ class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
.withBufferSize(this.jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
.withLogRecordScannerCallback(record -> {
// convert Hoodie log record to Hadoop AvroWritable and buffer
- GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema(), payloadProps).get();
+ GenericRecord rec = (GenericRecord) record.toIndexedRecord(getReaderSchema(), payloadProps).get();
ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(rec, getHiveSchema());
this.executor.getQueue().insertRecord(aWritable);
})
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
index bf4cbff666..0e7dce2b0f 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
@@ -45,7 +45,7 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -72,7 +72,7 @@ public class HoodieRealtimeRecordReaderUtils {
*/
public static Schema readSchema(Configuration conf, Path filePath) {
try {
- HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
+ HoodieAvroFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
return storageReader.getSchema();
} catch (IOException e) {
throw new HoodieIOException("Failed to read schema from " + filePath, e);
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
index a4471845c3..1241de1de8 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
@@ -21,6 +21,7 @@ package org.apache.hudi.hadoop.testutils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
@@ -395,13 +396,14 @@ public class InputFormatTestUtil {
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, newCommit);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
HoodieDataBlock dataBlock = null;
+ List<HoodieRecord> hoodieRecords = records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
if (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) {
dataBlock = new HoodieHFileDataBlock(
- records, header, Compression.Algorithm.GZ, writer.getLogFile().getPath());
+ hoodieRecords, header, Compression.Algorithm.GZ, writer.getLogFile().getPath());
} else if (logBlockType == HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK) {
- dataBlock = new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
+ dataBlock = new HoodieParquetDataBlock(hoodieRecords, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
} else {
- dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
+ dataBlock = new HoodieAvroDataBlock(hoodieRecords, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
}
writer.appendBlock(dataBlock);
return writer;
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
index 2648740f54..db11bd6717 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
@@ -18,6 +18,10 @@
package org.apache.hudi.integ.testsuite.reader;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.HoodieCommonConfig;
@@ -25,6 +29,7 @@ import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieFileFormat;
+import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -34,19 +39,15 @@ import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieMemoryConfig;
-import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import scala.Tuple2;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -61,8 +62,6 @@ import java.util.NoSuchElementException;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
-import scala.Tuple2;
-
import static java.util.Map.Entry.comparingByValue;
import static java.util.stream.Collectors.toMap;
@@ -264,7 +263,7 @@ public class DFSHoodieDatasetInputReader extends DFSDeltaInputReader {
if (fileSlice.getBaseFile().isPresent()) {
// Read the base files using the latest writer schema.
Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr));
- HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(metaClient.getHadoopConf(),
+ HoodieAvroFileReader reader = HoodieFileReaderFactory.getFileReader(metaClient.getHadoopConf(),
new Path(fileSlice.getBaseFile().get().getPath()));
return reader.getRecordIterator(schema);
} else {
@@ -287,12 +286,12 @@ public class DFSHoodieDatasetInputReader extends DFSDeltaInputReader {
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
.build();
// readAvro log files
- Iterable<HoodieRecord<? extends HoodieRecordPayload>> iterable = () -> scanner.iterator();
+ Iterable<HoodieRecord> iterable = () -> scanner.iterator();
Schema schema = new Schema.Parser().parse(schemaStr);
return StreamSupport.stream(iterable.spliterator(), false)
.map(e -> {
try {
- return (IndexedRecord) e.getData().getInsertValue(schema).get();
+ return (IndexedRecord) ((HoodieAvroRecord)e).getData().getInsertValue(schema).get();
} catch (IOException io) {
throw new UncheckedIOException(io);
}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
index afc0781eb1..bff829cff6 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
@@ -43,7 +43,9 @@ import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper}
import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema}
import org.apache.hudi.io.storage.HoodieHFileReader
+import org.apache.hudi.io.storage.HoodieAvroHFileReader
+import org.apache.spark.SerializableWritable
import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
@@ -734,7 +736,7 @@ object HoodieBaseRelation extends SparkAdapterSupport {
partitionedFile => {
val hadoopConf = hadoopConfBroadcast.value.get()
- val reader = new HoodieHFileReader[GenericRecord](hadoopConf, new Path(partitionedFile.filePath),
+ val reader = new HoodieAvroHFileReader(hadoopConf, new Path(partitionedFile.filePath),
new CacheConfig(hadoopConf))
val requiredRowSchema = requiredDataSchema.structTypeSchema
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
index f122c9f9d6..ad587ab444 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -30,7 +30,6 @@ import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompac
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
-
import java.io.Closeable
case class HoodieMergeOnReadPartition(index: Int, split: HoodieMergeOnReadFileSplit) extends Partition
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index 7bbc64782c..0c0d1e2feb 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -520,10 +520,10 @@ object HoodieSparkSqlWriter {
}
def validateSchemaForHoodieIsDeleted(schema: Schema): Unit = {
- if (schema.getField(HoodieRecord.HOODIE_IS_DELETED) != null &&
- AvroConversionUtils.resolveAvroTypeNullability(schema.getField(HoodieRecord.HOODIE_IS_DELETED).schema())._2.getType != Schema.Type.BOOLEAN) {
- throw new HoodieException(HoodieRecord.HOODIE_IS_DELETED + " has to be BOOLEAN type. Passed in dataframe's schema has type "
- + schema.getField(HoodieRecord.HOODIE_IS_DELETED).schema().getType)
+ if (schema.getField(HoodieRecord.HOODIE_IS_DELETED_FIELD) != null &&
+ AvroConversionUtils.resolveAvroTypeNullability(schema.getField(HoodieRecord.HOODIE_IS_DELETED_FIELD).schema())._2.getType != Schema.Type.BOOLEAN) {
+ throw new HoodieException(HoodieRecord.HOODIE_IS_DELETED_FIELD + " has to be BOOLEAN type. Passed in dataframe's schema has type "
+ + schema.getField(HoodieRecord.HOODIE_IS_DELETED_FIELD).schema().getType)
}
}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java
index b9f77bccfd..f94540d3b0 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java
@@ -21,6 +21,7 @@ package org.apache.hudi.internal;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.client.HoodieInternalWriteStatus;
import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
@@ -155,13 +156,12 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn
if (populateMetaColumns) {
// verify 3 meta fields that are filled in within create handle
actualRows.collectAsList().forEach(entry -> {
- assertEquals(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD)).toString(), instantTime);
- assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD)));
+ assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime);
+ assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()));
if (fileNames.isPresent()) {
- assertTrue(fileNames.get().contains(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS
- .get(HoodieRecord.FILENAME_METADATA_FIELD))));
+ assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())));
}
- assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD)));
+ assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal()));
});
// after trimming 2 of the meta fields, rest of the fields should match
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
index 55932237d9..0f11cbf954 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
@@ -131,7 +131,7 @@ class ExpressionPayload(record: GenericRecord,
if (resultRecordOpt == null) {
// If there is no condition matched, just filter this record.
// here we return a IGNORE_RECORD, HoodieMergeHandle will not handle it.
- HOption.of(HoodieWriteHandle.IGNORE_RECORD)
+ HOption.of(HoodieRecord.SENTINEL)
} else {
resultRecordOpt
}
@@ -169,7 +169,7 @@ class ExpressionPayload(record: GenericRecord,
} else {
// If there is no condition matched, just filter this record.
// Here we return a IGNORE_RECORD, HoodieCreateHandle will not handle it.
- HOption.of(HoodieWriteHandle.IGNORE_RECORD)
+ HOption.of(HoodieRecord.SENTINEL)
}
}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index 8ba5db5e5f..1b8e312192 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -136,7 +136,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
// Insert Operation
val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
- val df = inputDF.withColumn(HoodieRecord.HOODIE_IS_DELETED, lit("abc"))
+ val df = inputDF.withColumn(HoodieRecord.HOODIE_IS_DELETED_FIELD, lit("abc"))
assertThrows(classOf[HoodieException], new Executable {
override def execute(): Unit = {
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
index a58f835dab..131fdab6db 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
@@ -28,6 +28,7 @@ import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
+import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
@@ -84,6 +85,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
+import java.util.stream.Collectors;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_BATCH_SYNC_PARTITION_NUM;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS;
@@ -448,8 +450,8 @@ public class HiveTestUtil {
Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(parquetFilePath.getParent())
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(dataFile.getFileId())
.overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
- List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
- : SchemaTestUtil.generateEvolvedTestRecords(100, 100));
+ List<HoodieRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
+ : SchemaTestUtil.generateEvolvedTestRecords(100, 100)).stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList());
Map<HeaderMetadataType, String> header = new HashMap<>(2);
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
index 9d1b0cbd87..cccee5fe1a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
@@ -63,7 +63,6 @@ import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import jline.internal.Log;
import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
@@ -1046,7 +1045,7 @@ public class HoodieMetadataTableValidator implements Serializable {
private Option<BloomFilterData> readBloomFilterFromFile(String partitionPath, String filename) {
Path path = new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partitionPath), filename);
- HoodieFileReader<IndexedRecord> fileReader;
+ HoodieFileReader fileReader;
try {
fileReader = HoodieFileReaderFactory.getFileReader(metaClient.getHadoopConf(), path);
} catch (IOException e) {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/DeleteSupportSchemaPostProcessor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/DeleteSupportSchemaPostProcessor.java
index dd1084d2e1..66fff680ad 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/DeleteSupportSchemaPostProcessor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/DeleteSupportSchemaPostProcessor.java
@@ -45,8 +45,8 @@ public class DeleteSupportSchemaPostProcessor extends SchemaPostProcessor {
@Override
public Schema processSchema(Schema schema) {
- if (schema.getField(HoodieRecord.HOODIE_IS_DELETED) != null) {
- LOG.warn(String.format("column %s already exists!", HoodieRecord.HOODIE_IS_DELETED));
+ if (schema.getField(HoodieRecord.HOODIE_IS_DELETED_FIELD) != null) {
+ LOG.warn(String.format("column %s already exists!", HoodieRecord.HOODIE_IS_DELETED_FIELD));
return schema;
}
@@ -57,7 +57,7 @@ public class DeleteSupportSchemaPostProcessor extends SchemaPostProcessor {
targetFields.add(new Schema.Field(sourceField.name(), sourceField.schema(), sourceField.doc(), sourceField.defaultVal()));
}
// add _hoodie_is_deleted column
- targetFields.add(new Schema.Field(HoodieRecord.HOODIE_IS_DELETED, Schema.create(Schema.Type.BOOLEAN), null, false));
+ targetFields.add(new Schema.Field(HoodieRecord.HOODIE_IS_DELETED_FIELD, Schema.create(Schema.Type.BOOLEAN), null, false));
return Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false, targetFields);
}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
index e008c04e33..be15aa6d12 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/processor/maxwell/MaxwellJsonKafkaSourcePostProcessor.java
@@ -119,7 +119,7 @@ public class MaxwellJsonKafkaSourcePostProcessor extends JsonKafkaSourcePostProc
// insert or update
if (INSERT.equals(type) || UPDATE.equals(type)) {
... 37 lines suppressed ...