You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/08/04 03:19:34 UTC
[hudi] branch master updated (266bce1 -> 539621b)
This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git.
from 266bce1 [MINOR] Fixing usage of right config value for parallelism to dedup in Bulk Insert (#1905)
add 539621b [HUDI-242] Support for RFC-12/Bootstrapping of external datasets to hudi (#1876)
No new revisions were added by this update.
Summary of changes:
LICENSE | 1 +
docker/demo/compaction.commands | 5 +-
docker/demo/hive-batch1.commands | 8 +
docker/demo/hive-batch2-after-compaction.commands | 6 +
docker/demo/hive-incremental-cow.commands | 6 +
docker/demo/hive-incremental-mor-ro.commands | 6 +
docker/demo/hive-incremental-mor-rt.commands | 6 +
docker/demo/sparksql-batch1.commands | 10 +
docker/demo/sparksql-batch2.commands | 10 +
.../demo/sparksql-bootstrap-prep-source.commands | 20 +-
docker/demo/sparksql-incremental.commands | 34 +-
hudi-cli/hudi-cli.sh | 5 +-
hudi-cli/pom.xml | 26 +-
.../hudi/cli/commands/CompactionCommand.java | 2 +-
.../hudi/cli/commands/FileSystemViewCommand.java | 5 +-
.../hudi/cli/commands/HoodieLogFileCommand.java | 6 +-
.../org/apache/hudi/cli/commands/StatsCommand.java | 2 +-
.../org/apache/hudi/cli/commands/TableCommand.java | 3 +-
.../cli/commands/TestArchivedCommitsCommand.java | 1 +
.../org/apache/hudi/client/HoodieWriteClient.java | 37 +-
.../hudi/client/bootstrap/BootstrapMode.java | 18 +-
.../client/bootstrap/BootstrapRecordPayload.java | 23 +-
.../client/bootstrap/BootstrapSchemaProvider.java | 77 +++
.../bootstrap/BootstrapWriteStatus.java} | 33 +-
.../bootstrap/FullRecordBootstrapDataProvider.java | 58 ++
.../bootstrap/selector/BootstrapModeSelector.java | 48 ++
.../selector/BootstrapRegexModeSelector.java | 56 ++
.../selector/FullRecordBootstrapModeSelector.java} | 13 +-
.../MetadataOnlyBootstrapModeSelector.java} | 13 +-
.../selector/UniformBootstrapModeSelector.java | 48 ++
.../BootstrapPartitionPathTranslator.java | 28 +-
.../IdentityBootstrapPartitionPathTranslator.java} | 23 +-
.../apache/hudi/client/utils/MergingIterator.java | 52 ++
.../apache/hudi/config/HoodieBootstrapConfig.java | 135 +++++
.../org/apache/hudi/config/HoodieWriteConfig.java | 76 ++-
.../org/apache/hudi/io/HoodieAppendHandle.java | 4 +-
.../org/apache/hudi/io/HoodieBootstrapHandle.java | 49 ++
.../org/apache/hudi/io/HoodieCreateHandle.java | 21 +-
.../java/org/apache/hudi/io/HoodieMergeHandle.java | 27 +-
.../java/org/apache/hudi/io/HoodieWriteHandle.java | 33 +-
.../apache/hudi/keygen/BuiltinKeyGenerator.java | 88 +++
.../java/org/apache/hudi/keygen/KeyGenUtils.java | 110 ++++
.../java/org/apache/hudi/keygen/KeyGenerator.java | 13 +-
.../apache/hudi/table/HoodieCopyOnWriteTable.java | 50 +-
.../apache/hudi/table/HoodieMergeOnReadTable.java | 18 +-
.../java/org/apache/hudi/table/HoodieTable.java | 15 +
.../bootstrap/BootstrapCommitActionExecutor.java | 356 +++++++++++++
.../BootstrapDeltaCommitActionExecutor.java} | 33 +-
.../action/bootstrap/BootstrapRecordConsumer.java | 55 ++
.../table/action/bootstrap/BootstrapUtils.java | 77 +++
.../bootstrap/HoodieBootstrapWriteMetadata.java | 35 +-
.../action/commit/BaseCommitActionExecutor.java | 49 +-
.../commit/BulkInsertCommitActionExecutor.java | 31 +-
.../hudi/table/action/commit/BulkInsertHelper.java | 9 +-
.../BulkInsertPreppedCommitActionExecutor.java | 4 +-
.../table/action/commit/CommitActionExecutor.java | 67 +--
.../hudi/table/action/commit/MergeHelper.java | 185 +++++++
.../compact/ScheduleCompactionActionExecutor.java | 13 +-
.../BulkInsertDeltaCommitActionExecutor.java | 24 +-
...BulkInsertPreppedDeltaCommitActionExecutor.java | 4 +-
.../deltacommit/DeltaCommitActionExecutor.java | 15 +-
.../rollback/BaseRollbackActionExecutor.java | 8 +
.../CopyOnWriteRollbackActionExecutor.java | 3 +
.../MergeOnReadRollbackActionExecutor.java | 2 +
.../TestHoodieClientOnCopyOnWriteStorage.java | 4 +-
.../hudi/client/TestUpdateSchemaEvolution.java | 2 +-
.../bootstrap/TestBootstrapRegexModeSelector.java | 75 +++
.../TestUniformBootstrapModeSelector.java | 66 +++
.../table/action/bootstrap/TestBootstrapUtils.java | 85 +++
.../strategy/TestHoodieCompactionStrategy.java | 5 +-
.../hudi/testutils/HoodieMergeOnReadTestUtils.java | 84 ++-
.../test/resources/log4j-surefire-quiet.properties | 1 +
.../src/test/resources/log4j-surefire.properties | 1 +
hudi-common/pom.xml | 29 +-
....avsc => HoodieBootstrapFilePartitionInfo.avsc} | 41 +-
...Metadata.avsc => HoodieBootstrapIndexInfo.avsc} | 41 +-
.../avro/HoodieBootstrapPartitionMetadata.avsc | 45 +-
.../src/main/avro/HoodieCompactionOperation.avsc | 5 +
...estoreMetadata.avsc => HoodieFSPermission.avsc} | 46 +-
hudi-common/src/main/avro/HoodieFileStatus.avsc | 84 +++
.../simple-test.avsc => main/avro/HoodiePath.avsc} | 23 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 122 ++++-
.../hudi/common/bootstrap/FileStatusUtils.java | 123 +++++
.../common/bootstrap/index/BootstrapIndex.java | 161 ++++++
.../bootstrap/index/HFileBootstrapIndex.java | 534 +++++++++++++++++++
.../java/org/apache/hudi/common/fs/FSUtils.java | 44 +-
.../model/{HoodieBaseFile.java => BaseFile.java} | 29 +-
...dLogFile.java => BootstrapBaseFileMapping.java} | 31 +-
.../hudi/common/model/BootstrapFileMapping.java | 110 ++++
.../hudi/common/model/CompactionOperation.java | 22 +-
.../org/apache/hudi/common/model/FileSlice.java | 8 +
.../apache/hudi/common/model/HoodieBaseFile.java | 82 +--
.../apache/hudi/common/model/HoodieFileFormat.java | 4 +-
.../apache/hudi/common/model/HoodieFileGroup.java | 11 +
.../hudi/common/model/HoodieFileGroupId.java | 11 +-
.../apache/hudi/common/model/HoodieLogFile.java | 6 +
.../hudi/common/model/WriteOperationType.java | 1 +
.../hudi/common/table/HoodieTableConfig.java | 22 +
.../hudi/common/table/HoodieTableMetaClient.java | 104 +++-
.../hudi/common/table/timeline/HoodieInstant.java | 10 +-
.../hudi/common/table/timeline/HoodieTimeline.java | 7 +
.../common/table/timeline/dto/BaseFileDTO.java | 34 +-
.../common/table/timeline/dto/CompactionOpDTO.java | 8 +-
.../common/table/timeline/dto/FileStatusDTO.java | 2 +-
.../table/view/AbstractTableFileSystemView.java | 166 +++++-
.../table/view/FileSystemViewStorageConfig.java | 20 +-
.../table/view/HoodieTableFileSystemView.java | 55 ++
.../IncrementalTimelineSyncFileSystemView.java | 1 -
.../table/view/RocksDbBasedFileSystemView.java | 57 ++
.../view/SpillableMapBasedFileSystemView.java | 37 +-
.../apache/hudi/common/util/CompactionUtils.java | 4 +
.../org/apache/hudi/common/util/ParquetUtils.java | 2 +-
.../hudi/common/util/RocksDBSchemaHelper.java | 13 +-
.../org/apache/hudi/common/util/StringUtils.java | 4 +
.../hudi/common/bootstrap/TestBootstrapIndex.java | 180 +++++++
.../table/view/TestHoodieTableFileSystemView.java | 160 +++++-
.../common/testutils/HoodieCommonTestHarness.java | 8 +-
.../common/testutils/HoodieTestDataGenerator.java | 37 +-
.../hudi/common/testutils/HoodieTestUtils.java | 6 +
.../test/resources/log4j-surefire-quiet.properties | 1 +
.../src/test/resources/log4j-surefire.properties | 1 +
.../hadoop/hive/HoodieCombineHiveInputFormat.java | 3 +-
.../apache/hudi/hadoop/BootstrapBaseFileSplit.java | 71 +++
.../BootstrapColumnStichingRecordReader.java | 110 ++++
.../hadoop/FileStatusWithBootstrapBaseFile.java | 23 +-
.../hudi/hadoop/HoodieColumnProjectionUtils.java | 300 +++++++++++
.../hudi/hadoop/HoodieParquetInputFormat.java | 100 +++-
.../org/apache/hudi/hadoop/InputSplitUtils.java | 67 +++
.../LocatedFileStatusWithBootstrapBaseFile.java | 27 +-
.../hudi/hadoop/PathWithBootstrapFileStatus.java | 28 +-
.../realtime/AbstractRealtimeRecordReader.java | 15 +-
.../realtime/HoodieParquetRealtimeInputFormat.java | 32 +-
.../hadoop/realtime/HoodieRealtimeFileSplit.java | 35 +-
.../realtime/HoodieRealtimeRecordReader.java | 4 +-
.../realtime/RealtimeBootstrapBaseFileSplit.java | 94 ++++
.../realtime/RealtimeCompactedRecordReader.java | 7 +-
.../apache/hudi/hadoop/realtime/RealtimeSplit.java | 107 ++++
.../realtime/RealtimeUnmergedRecordReader.java | 2 +-
.../hudi/hadoop/utils/HoodieInputFormatUtils.java | 28 +-
.../utils/HoodieRealtimeInputFormatUtils.java | 30 +-
.../testsuite/HoodieDeltaStreamerWrapper.java | 13 +-
.../hudi/integ/testsuite/HoodieTestSuiteJob.java | 7 +-
.../integ/testsuite/generator/DeltaGenerator.java | 10 +-
.../java/org/apache/hudi/integ/ITTestBase.java | 9 +-
.../org/apache/hudi/integ/ITTestHoodieDemo.java | 139 +++--
.../main/java/org/apache/hudi/DataSourceUtils.java | 20 +-
.../SparkParquetBootstrapDataProvider.java | 82 +++
.../apache/hudi/keygen/ComplexKeyGenerator.java | 64 +--
.../org/apache/hudi/keygen/CustomKeyGenerator.java | 30 +-
.../hudi/keygen/GlobalDeleteKeyGenerator.java | 46 +-
.../hudi/keygen/NonpartitionedKeyGenerator.java | 21 +-
.../org/apache/hudi/keygen/SimpleKeyGenerator.java | 57 +-
.../hudi/keygen/TimestampBasedKeyGenerator.java | 40 +-
.../scala/org/apache/hudi/DataSourceOptions.scala | 3 +-
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 10 +-
.../java/org/apache/hudi/TestDataSourceUtils.java | 11 +-
.../java/org/apache/hudi/client/TestBootstrap.java | 589 +++++++++++++++++++++
.../test/resources/log4j-surefire-quiet.properties | 1 +
.../src/test/resources/log4j-surefire.properties | 1 +
.../checkpointing/InitialCheckPointProvider.java | 14 +-
...heckpointFromAnotherHoodieTimelineProvider.java | 64 +++
.../utilities/deltastreamer/BootstrapExecutor.java | 181 +++++++
.../hudi/utilities/deltastreamer/DeltaSync.java | 31 +-
.../deltastreamer/HoodieDeltaStreamer.java | 85 ++-
.../functional/TestHoodieDeltaStreamer.java | 32 +-
.../resources/delta-streamer-config/source.avsc | 3 +-
.../resources/delta-streamer-config/target.avsc | 3 +-
.../test/resources/log4j-surefire-quiet.properties | 1 +
.../src/test/resources/log4j-surefire.properties | 1 +
packaging/hudi-hadoop-mr-bundle/pom.xml | 56 ++
packaging/hudi-presto-bundle/pom.xml | 13 +
packaging/hudi-spark-bundle/pom.xml | 50 ++
packaging/hudi-timeline-server-bundle/pom.xml | 4 +
packaging/hudi-utilities-bundle/pom.xml | 51 ++
pom.xml | 1 +
175 files changed, 6949 insertions(+), 1050 deletions(-)
copy hudi-cli/src/main/java/org/apache/hudi/cli/utils/TempViewProvider.java => docker/demo/sparksql-bootstrap-prep-source.commands (68%)
copy hudi-common/src/main/java/org/apache/hudi/common/util/SizeEstimator.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapMode.java (66%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/EmptyHoodieRecordPayload.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapRecordPayload.java (71%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSchemaProvider.java
copy hudi-client/src/main/java/org/apache/hudi/{io/HoodieRangeInfoHandle.java => client/bootstrap/BootstrapWriteStatus.java} (50%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/FullRecordBootstrapDataProvider.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/BootstrapModeSelector.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/BootstrapRegexModeSelector.java
copy hudi-client/src/main/java/org/apache/hudi/{exception/HoodieRollbackException.java => client/bootstrap/selector/FullRecordBootstrapModeSelector.java} (68%)
copy hudi-client/src/main/java/org/apache/hudi/{exception/HoodieRollbackException.java => client/bootstrap/selector/MetadataOnlyBootstrapModeSelector.java} (68%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/UniformBootstrapModeSelector.java
copy hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/translator/BootstrapPartitionPathTranslator.java (63%)
copy hudi-client/src/main/java/org/apache/hudi/{metrics/InMemoryMetricsReporter.java => client/bootstrap/translator/IdentityBootstrapPartitionPathTranslator.java} (64%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/utils/MergingIterator.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
rename {hudi-spark => hudi-client}/src/main/java/org/apache/hudi/keygen/KeyGenerator.java (76%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
copy hudi-client/src/main/java/org/apache/hudi/table/action/{commit/UpsertCommitActionExecutor.java => bootstrap/BootstrapDeltaCommitActionExecutor.java} (52%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapUtils.java
copy hudi-common/src/main/java/org/apache/hudi/common/model/EmptyHoodieRecordPayload.java => hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/HoodieBootstrapWriteMetadata.java (51%)
create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeHelper.java
create mode 100644 hudi-client/src/test/java/org/apache/hudi/client/bootstrap/TestBootstrapRegexModeSelector.java
create mode 100644 hudi-client/src/test/java/org/apache/hudi/client/bootstrap/TestUniformBootstrapModeSelector.java
create mode 100644 hudi-client/src/test/java/org/apache/hudi/table/action/bootstrap/TestBootstrapUtils.java
copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieBootstrapFilePartitionInfo.avsc} (61%)
copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieBootstrapIndexInfo.avsc} (61%)
copy hudi-client/src/test/resources/exampleSchema.txt => hudi-common/src/main/avro/HoodieBootstrapPartitionMetadata.avsc (55%)
copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieFSPermission.avsc} (60%)
create mode 100644 hudi-common/src/main/avro/HoodieFileStatus.avsc
copy hudi-common/src/{test/resources/simple-test.avsc => main/avro/HoodiePath.avsc} (72%)
create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/FileStatusUtils.java
create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java
create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
copy hudi-common/src/main/java/org/apache/hudi/common/model/{HoodieBaseFile.java => BaseFile.java} (79%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/{HoodieArchivedLogFile.java => BootstrapBaseFileMapping.java} (50%)
create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java
create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapColumnStichingRecordReader.java
copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/FileStatusWithBootstrapBaseFile.java (62%)
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java
copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/LocatedFileStatusWithBootstrapBaseFile.java (57%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/PathWithBootstrapFileStatus.java (58%)
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeBootstrapBaseFileSplit.java
create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java
create mode 100644 hudi-spark/src/main/java/org/apache/hudi/bootstrap/SparkParquetBootstrapDataProvider.java
create mode 100644 hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java
create mode 100644 hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckpointFromAnotherHoodieTimelineProvider.java
create mode 100644 hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java