You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/08/04 03:19:34 UTC

[hudi] branch master updated (266bce1 -> 539621b)

This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git.


    from 266bce1  [MINOR] Fixing usage of right config value for parallelism to dedup in Bulk Insert (#1905)
     add 539621b  [HUDI-242] Support for RFC-12/Bootstrapping of external datasets to hudi (#1876)

No new revisions were added by this update.

Summary of changes:
 LICENSE                                            |   1 +
 docker/demo/compaction.commands                    |   5 +-
 docker/demo/hive-batch1.commands                   |   8 +
 docker/demo/hive-batch2-after-compaction.commands  |   6 +
 docker/demo/hive-incremental-cow.commands          |   6 +
 docker/demo/hive-incremental-mor-ro.commands       |   6 +
 docker/demo/hive-incremental-mor-rt.commands       |   6 +
 docker/demo/sparksql-batch1.commands               |  10 +
 docker/demo/sparksql-batch2.commands               |  10 +
 .../demo/sparksql-bootstrap-prep-source.commands   |  20 +-
 docker/demo/sparksql-incremental.commands          |  34 +-
 hudi-cli/hudi-cli.sh                               |   5 +-
 hudi-cli/pom.xml                                   |  26 +-
 .../hudi/cli/commands/CompactionCommand.java       |   2 +-
 .../hudi/cli/commands/FileSystemViewCommand.java   |   5 +-
 .../hudi/cli/commands/HoodieLogFileCommand.java    |   6 +-
 .../org/apache/hudi/cli/commands/StatsCommand.java |   2 +-
 .../org/apache/hudi/cli/commands/TableCommand.java |   3 +-
 .../cli/commands/TestArchivedCommitsCommand.java   |   1 +
 .../org/apache/hudi/client/HoodieWriteClient.java  |  37 +-
 .../hudi/client/bootstrap/BootstrapMode.java       |  18 +-
 .../client/bootstrap/BootstrapRecordPayload.java   |  23 +-
 .../client/bootstrap/BootstrapSchemaProvider.java  |  77 +++
 .../bootstrap/BootstrapWriteStatus.java}           |  33 +-
 .../bootstrap/FullRecordBootstrapDataProvider.java |  58 ++
 .../bootstrap/selector/BootstrapModeSelector.java  |  48 ++
 .../selector/BootstrapRegexModeSelector.java       |  56 ++
 .../selector/FullRecordBootstrapModeSelector.java} |  13 +-
 .../MetadataOnlyBootstrapModeSelector.java}        |  13 +-
 .../selector/UniformBootstrapModeSelector.java     |  48 ++
 .../BootstrapPartitionPathTranslator.java          |  28 +-
 .../IdentityBootstrapPartitionPathTranslator.java} |  23 +-
 .../apache/hudi/client/utils/MergingIterator.java  |  52 ++
 .../apache/hudi/config/HoodieBootstrapConfig.java  | 135 +++++
 .../org/apache/hudi/config/HoodieWriteConfig.java  |  76 ++-
 .../org/apache/hudi/io/HoodieAppendHandle.java     |   4 +-
 .../org/apache/hudi/io/HoodieBootstrapHandle.java  |  49 ++
 .../org/apache/hudi/io/HoodieCreateHandle.java     |  21 +-
 .../java/org/apache/hudi/io/HoodieMergeHandle.java |  27 +-
 .../java/org/apache/hudi/io/HoodieWriteHandle.java |  33 +-
 .../apache/hudi/keygen/BuiltinKeyGenerator.java    |  88 +++
 .../java/org/apache/hudi/keygen/KeyGenUtils.java   | 110 ++++
 .../java/org/apache/hudi/keygen/KeyGenerator.java  |  13 +-
 .../apache/hudi/table/HoodieCopyOnWriteTable.java  |  50 +-
 .../apache/hudi/table/HoodieMergeOnReadTable.java  |  18 +-
 .../java/org/apache/hudi/table/HoodieTable.java    |  15 +
 .../bootstrap/BootstrapCommitActionExecutor.java   | 356 +++++++++++++
 .../BootstrapDeltaCommitActionExecutor.java}       |  33 +-
 .../action/bootstrap/BootstrapRecordConsumer.java  |  55 ++
 .../table/action/bootstrap/BootstrapUtils.java     |  77 +++
 .../bootstrap/HoodieBootstrapWriteMetadata.java    |  35 +-
 .../action/commit/BaseCommitActionExecutor.java    |  49 +-
 .../commit/BulkInsertCommitActionExecutor.java     |  31 +-
 .../hudi/table/action/commit/BulkInsertHelper.java |   9 +-
 .../BulkInsertPreppedCommitActionExecutor.java     |   4 +-
 .../table/action/commit/CommitActionExecutor.java  |  67 +--
 .../hudi/table/action/commit/MergeHelper.java      | 185 +++++++
 .../compact/ScheduleCompactionActionExecutor.java  |  13 +-
 .../BulkInsertDeltaCommitActionExecutor.java       |  24 +-
 ...BulkInsertPreppedDeltaCommitActionExecutor.java |   4 +-
 .../deltacommit/DeltaCommitActionExecutor.java     |  15 +-
 .../rollback/BaseRollbackActionExecutor.java       |   8 +
 .../CopyOnWriteRollbackActionExecutor.java         |   3 +
 .../MergeOnReadRollbackActionExecutor.java         |   2 +
 .../TestHoodieClientOnCopyOnWriteStorage.java      |   4 +-
 .../hudi/client/TestUpdateSchemaEvolution.java     |   2 +-
 .../bootstrap/TestBootstrapRegexModeSelector.java  |  75 +++
 .../TestUniformBootstrapModeSelector.java          |  66 +++
 .../table/action/bootstrap/TestBootstrapUtils.java |  85 +++
 .../strategy/TestHoodieCompactionStrategy.java     |   5 +-
 .../hudi/testutils/HoodieMergeOnReadTestUtils.java |  84 ++-
 .../test/resources/log4j-surefire-quiet.properties |   1 +
 .../src/test/resources/log4j-surefire.properties   |   1 +
 hudi-common/pom.xml                                |  29 +-
 ....avsc => HoodieBootstrapFilePartitionInfo.avsc} |  41 +-
 ...Metadata.avsc => HoodieBootstrapIndexInfo.avsc} |  41 +-
 .../avro/HoodieBootstrapPartitionMetadata.avsc     |  45 +-
 .../src/main/avro/HoodieCompactionOperation.avsc   |   5 +
 ...estoreMetadata.avsc => HoodieFSPermission.avsc} |  46 +-
 hudi-common/src/main/avro/HoodieFileStatus.avsc    |  84 +++
 .../simple-test.avsc => main/avro/HoodiePath.avsc} |  23 +-
 .../java/org/apache/hudi/avro/HoodieAvroUtils.java | 122 ++++-
 .../hudi/common/bootstrap/FileStatusUtils.java     | 123 +++++
 .../common/bootstrap/index/BootstrapIndex.java     | 161 ++++++
 .../bootstrap/index/HFileBootstrapIndex.java       | 534 +++++++++++++++++++
 .../java/org/apache/hudi/common/fs/FSUtils.java    |  44 +-
 .../model/{HoodieBaseFile.java => BaseFile.java}   |  29 +-
 ...dLogFile.java => BootstrapBaseFileMapping.java} |  31 +-
 .../hudi/common/model/BootstrapFileMapping.java    | 110 ++++
 .../hudi/common/model/CompactionOperation.java     |  22 +-
 .../org/apache/hudi/common/model/FileSlice.java    |   8 +
 .../apache/hudi/common/model/HoodieBaseFile.java   |  82 +--
 .../apache/hudi/common/model/HoodieFileFormat.java |   4 +-
 .../apache/hudi/common/model/HoodieFileGroup.java  |  11 +
 .../hudi/common/model/HoodieFileGroupId.java       |  11 +-
 .../apache/hudi/common/model/HoodieLogFile.java    |   6 +
 .../hudi/common/model/WriteOperationType.java      |   1 +
 .../hudi/common/table/HoodieTableConfig.java       |  22 +
 .../hudi/common/table/HoodieTableMetaClient.java   | 104 +++-
 .../hudi/common/table/timeline/HoodieInstant.java  |  10 +-
 .../hudi/common/table/timeline/HoodieTimeline.java |   7 +
 .../common/table/timeline/dto/BaseFileDTO.java     |  34 +-
 .../common/table/timeline/dto/CompactionOpDTO.java |   8 +-
 .../common/table/timeline/dto/FileStatusDTO.java   |   2 +-
 .../table/view/AbstractTableFileSystemView.java    | 166 +++++-
 .../table/view/FileSystemViewStorageConfig.java    |  20 +-
 .../table/view/HoodieTableFileSystemView.java      |  55 ++
 .../IncrementalTimelineSyncFileSystemView.java     |   1 -
 .../table/view/RocksDbBasedFileSystemView.java     |  57 ++
 .../view/SpillableMapBasedFileSystemView.java      |  37 +-
 .../apache/hudi/common/util/CompactionUtils.java   |   4 +
 .../org/apache/hudi/common/util/ParquetUtils.java  |   2 +-
 .../hudi/common/util/RocksDBSchemaHelper.java      |  13 +-
 .../org/apache/hudi/common/util/StringUtils.java   |   4 +
 .../hudi/common/bootstrap/TestBootstrapIndex.java  | 180 +++++++
 .../table/view/TestHoodieTableFileSystemView.java  | 160 +++++-
 .../common/testutils/HoodieCommonTestHarness.java  |   8 +-
 .../common/testutils/HoodieTestDataGenerator.java  |  37 +-
 .../hudi/common/testutils/HoodieTestUtils.java     |   6 +
 .../test/resources/log4j-surefire-quiet.properties |   1 +
 .../src/test/resources/log4j-surefire.properties   |   1 +
 .../hadoop/hive/HoodieCombineHiveInputFormat.java  |   3 +-
 .../apache/hudi/hadoop/BootstrapBaseFileSplit.java |  71 +++
 .../BootstrapColumnStichingRecordReader.java       | 110 ++++
 .../hadoop/FileStatusWithBootstrapBaseFile.java    |  23 +-
 .../hudi/hadoop/HoodieColumnProjectionUtils.java   | 300 +++++++++++
 .../hudi/hadoop/HoodieParquetInputFormat.java      | 100 +++-
 .../org/apache/hudi/hadoop/InputSplitUtils.java    |  67 +++
 .../LocatedFileStatusWithBootstrapBaseFile.java    |  27 +-
 .../hudi/hadoop/PathWithBootstrapFileStatus.java   |  28 +-
 .../realtime/AbstractRealtimeRecordReader.java     |  15 +-
 .../realtime/HoodieParquetRealtimeInputFormat.java |  32 +-
 .../hadoop/realtime/HoodieRealtimeFileSplit.java   |  35 +-
 .../realtime/HoodieRealtimeRecordReader.java       |   4 +-
 .../realtime/RealtimeBootstrapBaseFileSplit.java   |  94 ++++
 .../realtime/RealtimeCompactedRecordReader.java    |   7 +-
 .../apache/hudi/hadoop/realtime/RealtimeSplit.java | 107 ++++
 .../realtime/RealtimeUnmergedRecordReader.java     |   2 +-
 .../hudi/hadoop/utils/HoodieInputFormatUtils.java  |  28 +-
 .../utils/HoodieRealtimeInputFormatUtils.java      |  30 +-
 .../testsuite/HoodieDeltaStreamerWrapper.java      |  13 +-
 .../hudi/integ/testsuite/HoodieTestSuiteJob.java   |   7 +-
 .../integ/testsuite/generator/DeltaGenerator.java  |  10 +-
 .../java/org/apache/hudi/integ/ITTestBase.java     |   9 +-
 .../org/apache/hudi/integ/ITTestHoodieDemo.java    | 139 +++--
 .../main/java/org/apache/hudi/DataSourceUtils.java |  20 +-
 .../SparkParquetBootstrapDataProvider.java         |  82 +++
 .../apache/hudi/keygen/ComplexKeyGenerator.java    |  64 +--
 .../org/apache/hudi/keygen/CustomKeyGenerator.java |  30 +-
 .../hudi/keygen/GlobalDeleteKeyGenerator.java      |  46 +-
 .../hudi/keygen/NonpartitionedKeyGenerator.java    |  21 +-
 .../org/apache/hudi/keygen/SimpleKeyGenerator.java |  57 +-
 .../hudi/keygen/TimestampBasedKeyGenerator.java    |  40 +-
 .../scala/org/apache/hudi/DataSourceOptions.scala  |   3 +-
 .../org/apache/hudi/HoodieSparkSqlWriter.scala     |  10 +-
 .../java/org/apache/hudi/TestDataSourceUtils.java  |  11 +-
 .../java/org/apache/hudi/client/TestBootstrap.java | 589 +++++++++++++++++++++
 .../test/resources/log4j-surefire-quiet.properties |   1 +
 .../src/test/resources/log4j-surefire.properties   |   1 +
 .../checkpointing/InitialCheckPointProvider.java   |  14 +-
 ...heckpointFromAnotherHoodieTimelineProvider.java |  64 +++
 .../utilities/deltastreamer/BootstrapExecutor.java | 181 +++++++
 .../hudi/utilities/deltastreamer/DeltaSync.java    |  31 +-
 .../deltastreamer/HoodieDeltaStreamer.java         |  85 ++-
 .../functional/TestHoodieDeltaStreamer.java        |  32 +-
 .../resources/delta-streamer-config/source.avsc    |   3 +-
 .../resources/delta-streamer-config/target.avsc    |   3 +-
 .../test/resources/log4j-surefire-quiet.properties |   1 +
 .../src/test/resources/log4j-surefire.properties   |   1 +
 packaging/hudi-hadoop-mr-bundle/pom.xml            |  56 ++
 packaging/hudi-presto-bundle/pom.xml               |  13 +
 packaging/hudi-spark-bundle/pom.xml                |  50 ++
 packaging/hudi-timeline-server-bundle/pom.xml      |   4 +
 packaging/hudi-utilities-bundle/pom.xml            |  51 ++
 pom.xml                                            |   1 +
 175 files changed, 6949 insertions(+), 1050 deletions(-)
 copy hudi-cli/src/main/java/org/apache/hudi/cli/utils/TempViewProvider.java => docker/demo/sparksql-bootstrap-prep-source.commands (68%)
 copy hudi-common/src/main/java/org/apache/hudi/common/util/SizeEstimator.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapMode.java (66%)
 copy hudi-common/src/main/java/org/apache/hudi/common/model/EmptyHoodieRecordPayload.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapRecordPayload.java (71%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSchemaProvider.java
 copy hudi-client/src/main/java/org/apache/hudi/{io/HoodieRangeInfoHandle.java => client/bootstrap/BootstrapWriteStatus.java} (50%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/FullRecordBootstrapDataProvider.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/BootstrapModeSelector.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/BootstrapRegexModeSelector.java
 copy hudi-client/src/main/java/org/apache/hudi/{exception/HoodieRollbackException.java => client/bootstrap/selector/FullRecordBootstrapModeSelector.java} (68%)
 copy hudi-client/src/main/java/org/apache/hudi/{exception/HoodieRollbackException.java => client/bootstrap/selector/MetadataOnlyBootstrapModeSelector.java} (68%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/bootstrap/selector/UniformBootstrapModeSelector.java
 copy hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java => hudi-client/src/main/java/org/apache/hudi/client/bootstrap/translator/BootstrapPartitionPathTranslator.java (63%)
 copy hudi-client/src/main/java/org/apache/hudi/{metrics/InMemoryMetricsReporter.java => client/bootstrap/translator/IdentityBootstrapPartitionPathTranslator.java} (64%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/client/utils/MergingIterator.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/io/HoodieBootstrapHandle.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
 rename {hudi-spark => hudi-client}/src/main/java/org/apache/hudi/keygen/KeyGenerator.java (76%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
 copy hudi-client/src/main/java/org/apache/hudi/table/action/{commit/UpsertCommitActionExecutor.java => bootstrap/BootstrapDeltaCommitActionExecutor.java} (52%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapUtils.java
 copy hudi-common/src/main/java/org/apache/hudi/common/model/EmptyHoodieRecordPayload.java => hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/HoodieBootstrapWriteMetadata.java (51%)
 create mode 100644 hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeHelper.java
 create mode 100644 hudi-client/src/test/java/org/apache/hudi/client/bootstrap/TestBootstrapRegexModeSelector.java
 create mode 100644 hudi-client/src/test/java/org/apache/hudi/client/bootstrap/TestUniformBootstrapModeSelector.java
 create mode 100644 hudi-client/src/test/java/org/apache/hudi/table/action/bootstrap/TestBootstrapUtils.java
 copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieBootstrapFilePartitionInfo.avsc} (61%)
 copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieBootstrapIndexInfo.avsc} (61%)
 copy hudi-client/src/test/resources/exampleSchema.txt => hudi-common/src/main/avro/HoodieBootstrapPartitionMetadata.avsc (55%)
 copy hudi-common/src/main/avro/{HoodieRestoreMetadata.avsc => HoodieFSPermission.avsc} (60%)
 create mode 100644 hudi-common/src/main/avro/HoodieFileStatus.avsc
 copy hudi-common/src/{test/resources/simple-test.avsc => main/avro/HoodiePath.avsc} (72%)
 create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/FileStatusUtils.java
 create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java
 create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
 copy hudi-common/src/main/java/org/apache/hudi/common/model/{HoodieBaseFile.java => BaseFile.java} (79%)
 copy hudi-common/src/main/java/org/apache/hudi/common/model/{HoodieArchivedLogFile.java => BootstrapBaseFileMapping.java} (50%)
 create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/model/BootstrapFileMapping.java
 create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/bootstrap/TestBootstrapIndex.java
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapColumnStichingRecordReader.java
 copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/FileStatusWithBootstrapBaseFile.java (62%)
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java
 copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/LocatedFileStatusWithBootstrapBaseFile.java (57%)
 copy hudi-common/src/main/java/org/apache/hudi/common/model/HoodieArchivedLogFile.java => hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/PathWithBootstrapFileStatus.java (58%)
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeBootstrapBaseFileSplit.java
 create mode 100644 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java
 create mode 100644 hudi-spark/src/main/java/org/apache/hudi/bootstrap/SparkParquetBootstrapDataProvider.java
 create mode 100644 hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java
 create mode 100644 hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckpointFromAnotherHoodieTimelineProvider.java
 create mode 100644 hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java