You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2019/11/27 13:32:50 UTC
[incubator-hudi] branch master updated: [HUDI-366] Refactor some
module codes based on new ImportOrder code style rule (#1055)
This is an automated email from the ASF dual-hosted git repository.
vinoyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push:
new f9139c0 [HUDI-366] Refactor some module codes based on new ImportOrder code style rule (#1055)
f9139c0 is described below
commit f9139c0f616775f4b3d0df95772f2621f0e7c9f1
Author: 谢磊 <la...@163.com>
AuthorDate: Wed Nov 27 21:32:43 2019 +0800
[HUDI-366] Refactor some module codes based on new ImportOrder code style rule (#1055)
[HUDI-366] Refactor hudi-hadoop-mr / hudi-timeline-service / hudi-spark / hudi-integ-test / hudi- utilities based on new ImportOrder code style rule
---
.../hudi/hadoop/HoodieParquetInputFormat.java | 34 +++++++------
.../hudi/hadoop/HoodieROTablePathFilter.java | 20 ++++----
.../hudi/hadoop/RecordReaderValueIterator.java | 10 ++--
.../hadoop/SafeParquetRecordReaderWrapper.java | 3 +-
.../hadoop/hive/HoodieCombineHiveInputFormat.java | 38 +++++++-------
.../realtime/AbstractRealtimeRecordReader.java | 34 +++++++------
.../realtime/HoodieParquetRealtimeInputFormat.java | 46 +++++++++--------
.../hadoop/realtime/HoodieRealtimeFileSplit.java | 3 +-
.../realtime/HoodieRealtimeRecordReader.java | 6 ++-
.../realtime/RealtimeCompactedRecordReader.java | 18 ++++---
.../realtime/RealtimeUnmergedRecordReader.java | 20 ++++----
.../apache/hudi/hadoop/InputFormatTestUtil.java | 28 +++++-----
.../org/apache/hudi/hadoop/TestAnnotation.java | 5 +-
.../apache/hudi/hadoop/TestHoodieInputFormat.java | 10 ++--
.../hudi/hadoop/TestHoodieROTablePathFilter.java | 16 +++---
.../hudi/hadoop/TestRecordReaderValueIterator.java | 12 +++--
.../realtime/TestHoodieRealtimeRecordReader.java | 59 ++++++++++++----------
.../java/org/apache/hudi/integ/ITTestBase.java | 18 ++++---
.../org/apache/hudi/integ/ITTestHoodieDemo.java | 6 ++-
.../org/apache/hudi/integ/ITTestHoodieSanity.java | 1 +
.../main/java/org/apache/hudi/BaseAvroPayload.java | 8 +--
.../java/org/apache/hudi/ComplexKeyGenerator.java | 8 +--
.../main/java/org/apache/hudi/DataSourceUtils.java | 18 ++++---
.../org/apache/hudi/HoodieDataSourceHelpers.java | 10 ++--
.../main/java/org/apache/hudi/KeyGenerator.java | 6 ++-
.../apache/hudi/NonpartitionedKeyGenerator.java | 3 +-
.../hudi/OverwriteWithLatestAvroPayload.java | 10 ++--
.../main/java/org/apache/hudi/QuickstartUtils.java | 18 ++++---
.../java/org/apache/hudi/SimpleKeyGenerator.java | 3 +-
hudi-spark/src/test/java/DataSourceTestUtils.java | 7 +--
hudi-spark/src/test/java/HoodieJavaApp.java | 12 +++--
.../src/test/java/HoodieJavaStreamingApp.java | 18 ++++---
.../timeline/service/FileSystemViewHandler.java | 24 +++++----
.../hudi/timeline/service/TimelineService.java | 16 +++---
.../timeline/service/handlers/DataFileHandler.java | 8 +--
.../service/handlers/FileSliceHandler.java | 12 +++--
.../hudi/timeline/service/handlers/Handler.java | 6 ++-
.../timeline/service/handlers/TimelineHandler.java | 10 ++--
.../view/TestRemoteHoodieTableFileSystemView.java | 1 +
.../apache/hudi/utilities/HDFSParquetImporter.java | 43 ++++++++--------
.../hudi/utilities/HiveIncrementalPuller.java | 32 ++++++------
.../org/apache/hudi/utilities/HoodieCleaner.java | 18 ++++---
.../hudi/utilities/HoodieCompactionAdminTool.java | 18 ++++---
.../org/apache/hudi/utilities/HoodieCompactor.java | 16 +++---
.../hudi/utilities/HoodieSnapshotCopier.java | 25 +++++----
.../hudi/utilities/HoodieWithTimelineServer.java | 11 ++--
.../org/apache/hudi/utilities/UtilHelpers.java | 26 +++++-----
.../adhoc/UpgradePayloadFromUberToApache.java | 20 ++++----
.../AbstractDeltaStreamerService.java | 8 +--
.../hudi/utilities/deltastreamer/Compactor.java | 6 ++-
.../hudi/utilities/deltastreamer/DeltaSync.java | 42 ++++++++-------
.../deltastreamer/HoodieDeltaStreamer.java | 50 +++++++++---------
.../deltastreamer/HoodieDeltaStreamerMetrics.java | 3 +-
.../deltastreamer/SchedulerConfGenerator.java | 12 +++--
.../deltastreamer/SourceFormatAdapter.java | 11 ++--
.../exception/HoodieIncrementalPullException.java | 3 +-
.../keygen/TimestampBasedKeyGenerator.java | 18 ++++---
.../hudi/utilities/perf/TimelineServerPerf.java | 36 ++++++-------
.../utilities/schema/FilebasedSchemaProvider.java | 12 +++--
.../schema/NullTargetSchemaRegistryProvider.java | 3 +-
.../utilities/schema/RowBasedSchemaProvider.java | 3 +-
.../hudi/utilities/schema/SchemaProvider.java | 6 ++-
.../utilities/schema/SchemaRegistryProvider.java | 12 +++--
.../hudi/utilities/sources/AvroDFSSource.java | 9 ++--
.../hudi/utilities/sources/AvroKafkaSource.java | 7 +--
.../apache/hudi/utilities/sources/AvroSource.java | 3 +-
.../hudi/utilities/sources/HiveIncrPullSource.java | 28 +++++-----
.../hudi/utilities/sources/HoodieIncrSource.java | 4 +-
.../hudi/utilities/sources/JsonDFSSource.java | 1 +
.../hudi/utilities/sources/JsonKafkaSource.java | 3 +-
.../apache/hudi/utilities/sources/JsonSource.java | 1 +
.../hudi/utilities/sources/ParquetDFSSource.java | 3 +-
.../hudi/utilities/sources/ParquetSource.java | 3 +-
.../apache/hudi/utilities/sources/RowSource.java | 1 +
.../org/apache/hudi/utilities/sources/Source.java | 4 +-
.../utilities/sources/helpers/AvroConvertor.java | 8 +--
.../utilities/sources/helpers/DFSPathSelector.java | 26 +++++-----
.../sources/helpers/IncrSourceHelper.java | 3 +-
.../utilities/sources/helpers/KafkaOffsetGen.java | 17 ++++---
.../utilities/transform/FlatteningTransformer.java | 4 +-
.../utilities/transform/IdentityTransformer.java | 1 +
.../transform/SqlQueryBasedTransformer.java | 4 +-
.../hudi/utilities/transform/Transformer.java | 1 +
.../hudi/utilities/TestFlatteningTransformer.java | 5 +-
.../hudi/utilities/TestHDFSParquetImporter.java | 40 ++++++++-------
.../hudi/utilities/TestHoodieDeltaStreamer.java | 36 ++++++-------
.../hudi/utilities/TestHoodieSnapshotCopier.java | 20 ++++----
.../hudi/utilities/TestSchedulerConfGenerator.java | 10 ++--
.../apache/hudi/utilities/UtilitiesTestBase.java | 32 ++++++------
.../utilities/sources/AbstractBaseTestSource.java | 16 +++---
.../sources/DistributedTestDataSource.java | 10 ++--
.../hudi/utilities/sources/TestDFSSource.java | 22 ++++----
.../hudi/utilities/sources/TestDataSource.java | 8 +--
.../hudi/utilities/sources/TestKafkaSource.java | 14 ++---
94 files changed, 765 insertions(+), 598 deletions(-)
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
index 5dde86a..bd9672c 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
@@ -18,12 +18,17 @@
package org.apache.hudi.hadoop;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
+import org.apache.hudi.common.model.HoodieDataFile;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTimeline;
+import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+import org.apache.hudi.exception.DatasetNotFoundException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.InvalidDatasetException;
+
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -37,19 +42,16 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
-import org.apache.hudi.common.model.HoodieDataFile;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.HoodieTimeline;
-import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.exception.DatasetNotFoundException;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.exception.InvalidDatasetException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
/**
* HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths
* that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus()
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
index bbd53ec..51a9805 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -18,24 +18,26 @@
package org.apache.hudi.hadoop;
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
import org.apache.hudi.common.model.HoodieDataFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.HoodieException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then
* always accept
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java
index d68afb4..4201470 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java
@@ -18,14 +18,16 @@
package org.apache.hudi.hadoop;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.exception.HoodieException;
+
+import org.apache.hadoop.mapred.RecordReader;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
/**
* Provides Iterator Interface to iterate value entries read from record reader
*
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java
index 8a42b13..15b9a2b 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java
@@ -18,12 +18,13 @@
package org.apache.hudi.hadoop;
-import java.io.IOException;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.RecordReader;
+import java.io.IOException;
+
/**
* Record Reader for parquet. Records read from this reader is safe to be buffered for concurrent processing.
*
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
index 5a39590..6512d94 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
@@ -18,23 +18,10 @@
package org.apache.hudi.hadoop.hive;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
+
import com.google.common.annotations.VisibleForTesting;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -70,11 +57,26 @@ import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hudi.hadoop.HoodieParquetInputFormat;
-import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
/**
* This is just a copy of the org.apache.hadoop.hive.ql.io.CombineHiveInputFormat from Hive 2.x Search for **MOD** to
* see minor modifications to support custom inputformat in CombineHiveInputFormat. See
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
index 215193f..68bf517 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
@@ -18,15 +18,14 @@
package org.apache.hudi.hadoop.realtime;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.stream.Collectors;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.HoodieAvroUtils;
+import org.apache.hudi.common.util.LogReaderUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericArray;
@@ -46,19 +45,22 @@ import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hudi.common.model.HoodieAvroPayload;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.util.HoodieAvroUtils;
-import org.apache.hudi.common.util.LogReaderUtils;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.schema.MessageType;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
/**
* Record Reader implementation to merge fresh avro data with base parquet data, to support real time queries.
*/
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index 7a3492e..6d46728 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -18,17 +18,22 @@
package org.apache.hudi.hadoop.realtime;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
+
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -41,22 +46,19 @@ import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.HoodieTimeline;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.common.util.FSUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.hadoop.HoodieParquetInputFormat;
-import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
/**
* Input Format, that provides a real-time view of data in a Hoodie dataset
*/
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
index 5ae344e..0a050be 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
@@ -18,13 +18,14 @@
package org.apache.hudi.hadoop.realtime;
+import org.apache.hadoop.mapred.FileSplit;
+
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.mapred.FileSplit;
/**
* Filesplit that wraps the base split and a list of log files to merge deltas from.
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java
index fb11d39..e156316 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java
@@ -18,15 +18,17 @@
package org.apache.hudi.hadoop.realtime;
-import java.io.IOException;
+import org.apache.hudi.exception.HoodieException;
+
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+
/**
* Realtime Record Reader which can do compacted (merge-on-read) record reading or unmerged reading (parquet and log
* files read in parallel) based on job configuration.
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
index 7019907..deeaaf4 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
@@ -18,23 +18,25 @@
package org.apache.hudi.hadoop.realtime;
-import java.io.IOException;
-import java.util.Map;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.util.Map;
+
class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
implements RecordReader<NullWritable, ArrayWritable> {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
index f7a51e8..cd6f41d 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
@@ -18,15 +18,6 @@
package org.apache.hudi.hadoop.realtime;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
import org.apache.hudi.common.util.DefaultSizeEstimator;
import org.apache.hudi.common.util.FSUtils;
@@ -38,6 +29,17 @@ import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
import org.apache.hudi.hadoop.RecordReaderValueIterator;
import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
implements RecordReader<NullWritable, ArrayWritable> {
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java
index 7a5b7d4..8dd2f07 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java
@@ -18,28 +18,30 @@
package org.apache.hudi.hadoop;
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.UUID;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.SchemaTestUtil;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.parquet.avro.AvroParquetWriter;
import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+
public class InputFormatTestUtil {
private static String TEST_WRITE_TOKEN = "1-0-1";
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
index 7e8f46d..3bdaa64 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
@@ -18,10 +18,11 @@
package org.apache.hudi.hadoop;
-import static org.junit.Assert.assertTrue;
+import org.junit.Test;
import java.lang.annotation.Annotation;
-import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
public class TestAnnotation {
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java
index 3bcf97d..0c3a002 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java
@@ -18,10 +18,8 @@
package org.apache.hudi.hadoop;
-import static org.junit.Assert.assertEquals;
+import org.apache.hudi.common.util.FSUtils;
-import java.io.File;
-import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.ArrayWritable;
@@ -30,12 +28,16 @@ import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hudi.common.util.FSUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
public class TestHoodieInputFormat {
private HoodieParquetInputFormat inputFormat;
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
index 3d17140..8d46b10 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
@@ -18,20 +18,22 @@
package org.apache.hudi.hadoop;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.HoodieCommonTestHarness;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
+
+import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
/**
*
*/
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java
index 9c050cd..2dfd036 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java
@@ -18,17 +18,19 @@
package org.apache.hudi.hadoop;
-import java.io.IOException;
-import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
+import org.apache.hudi.common.util.collection.Pair;
+
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hudi.common.util.collection.Pair;
import org.junit.Assert;
import org.junit.Test;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
public class TestRecordReaderValueIterator {
@Test
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
index 021bf78..69c3385 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
@@ -18,18 +18,26 @@
package org.apache.hudi.hadoop.realtime;
-import static org.junit.Assert.assertTrue;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieTestUtils;
+import org.apache.hudi.common.table.HoodieTimeline;
+import org.apache.hudi.common.table.log.HoodieLogFormat;
+import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
+import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
+import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
+import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.util.HoodieAvroUtils;
+import org.apache.hudi.common.util.SchemaTestUtil;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.hadoop.InputFormatTestUtil;
import com.google.common.collect.Maps;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.IndexedRecord;
@@ -51,30 +59,25 @@ import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.model.HoodieTestUtils;
-import org.apache.hudi.common.table.HoodieTimeline;
-import org.apache.hudi.common.table.log.HoodieLogFormat;
-import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
-import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
-import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
-import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum;
-import org.apache.hudi.common.table.log.block.HoodieLogBlock;
-import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
-import org.apache.hudi.common.util.FSUtils;
-import org.apache.hudi.common.util.HoodieAvroUtils;
-import org.apache.hudi.common.util.SchemaTestUtil;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.hadoop.InputFormatTestUtil;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+
+import static org.junit.Assert.assertTrue;
+
public class TestHoodieRealtimeRecordReader {
private static final String PARTITION_COLUMN = "datestr";
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index 647a390..62e9711 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -18,8 +18,8 @@
package org.apache.hudi.integ;
-import static java.util.concurrent.TimeUnit.SECONDS;
-import static org.awaitility.Awaitility.await;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.common.util.collection.Pair;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.command.DockerCmdExecFactory;
@@ -31,6 +31,11 @@ import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.core.command.ExecStartResultCallback;
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Before;
+
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
@@ -38,12 +43,9 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-import org.apache.hudi.common.util.FileIOUtils;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.junit.Assert;
-import org.junit.Before;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.awaitility.Awaitility.await;
public abstract class ITTestBase {
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
index e92ef7b..99082ec 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
@@ -18,11 +18,13 @@
package org.apache.hudi.integ;
-import com.google.common.collect.ImmutableList;
-import java.util.List;
import org.apache.hudi.common.util.collection.Pair;
+
+import com.google.common.collect.ImmutableList;
import org.junit.Test;
+import java.util.List;
+
/**
* Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html
*
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
index 7e73460..ac54df9 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
@@ -20,6 +20,7 @@ package org.apache.hudi.integ;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.model.HoodieTableType;
+
import org.junit.Assert;
import org.junit.Test;
diff --git a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java
index 30c3fdd..1268362 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java
@@ -18,13 +18,15 @@
package org.apache.hudi;
-import java.io.IOException;
-import java.io.Serializable;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
+import org.apache.avro.generic.GenericRecord;
+
+import java.io.IOException;
+import java.io.Serializable;
+
/**
* Base class for all AVRO record based payloads, that can be ordered based on a field
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java
index 15f47d3..9441947 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java
@@ -18,13 +18,15 @@
package org.apache.hudi;
-import java.util.Arrays;
-import java.util.List;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.avro.generic.GenericRecord;
+
+import java.util.Arrays;
+import java.util.List;
+
/**
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
index 6938e4b..e7cfcc8 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -18,14 +18,6 @@
package org.apache.hudi;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema.Field;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
@@ -43,9 +35,19 @@ import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.PartitionValueExtractor;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.index.HoodieIndex;
+
+import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
/**
* Utilities used throughout the data source
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java
index d310368..f9df30a 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java
@@ -18,16 +18,18 @@
package org.apache.hudi;
-import com.google.common.collect.Sets;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.fs.FileSystem;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* List of helpers to aid, construction of instanttime for read and write operations using datasource
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java
index c35663e..4b8084b 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java
@@ -18,11 +18,13 @@
package org.apache.hudi;
-import java.io.Serializable;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties;
+import org.apache.avro.generic.GenericRecord;
+
+import java.io.Serializable;
+
/**
* Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java
index 4cfbd55..8c0a664 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java
@@ -18,11 +18,12 @@
package org.apache.hudi;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.avro.generic.GenericRecord;
+
/**
* Simple Key generator for unpartitioned Hive Tables
*/
diff --git a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java
index f2646cc..e860837 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java
@@ -18,14 +18,16 @@
package org.apache.hudi;
-import java.io.IOException;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.HoodieAvroUtils;
import org.apache.hudi.common.util.Option;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+
+import java.io.IOException;
+
/**
* Default payload used for delta streamer.
* <p>
diff --git a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
index 504ad18..d09716d 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
@@ -18,6 +18,16 @@
package org.apache.hudi;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.HoodieAvroUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -29,14 +39,6 @@ import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.util.HoodieAvroUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieIOException;
/**
* Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber
diff --git a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java
index 8f59484..f458906 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java
@@ -18,11 +18,12 @@
package org.apache.hudi;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieKeyException;
+import org.apache.avro.generic.GenericRecord;
+
/**
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
diff --git a/hudi-spark/src/test/java/DataSourceTestUtils.java b/hudi-spark/src/test/java/DataSourceTestUtils.java
index 15fea33..ab76444 100644
--- a/hudi-spark/src/test/java/DataSourceTestUtils.java
+++ b/hudi-spark/src/test/java/DataSourceTestUtils.java
@@ -16,14 +16,15 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.List;
-import java.util.stream.Collectors;
import org.apache.hudi.common.TestRawTripPayload;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* Test utils for data source tests.
*/
diff --git a/hudi-spark/src/test/java/HoodieJavaApp.java b/hudi-spark/src/test/java/HoodieJavaApp.java
index 50ac65c..389c015 100644
--- a/hudi-spark/src/test/java/HoodieJavaApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaApp.java
@@ -16,11 +16,6 @@
* limitations under the License.
*/
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers;
@@ -33,6 +28,10 @@ import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
import org.apache.hudi.hive.NonPartitionedExtractor;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
@@ -42,6 +41,9 @@ import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Sample program that writes & reads hoodie datasets via the Spark datasource
*/
diff --git a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
index 492cc98..c448abb 100644
--- a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
@@ -16,14 +16,6 @@
* limitations under the License.
*/
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers;
@@ -31,6 +23,11 @@ import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
@@ -42,6 +39,11 @@ import org.apache.spark.sql.streaming.DataStreamWriter;
import org.apache.spark.sql.streaming.OutputMode;
import org.apache.spark.sql.streaming.ProcessingTime;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
/**
* Sample program that writes & reads hoodie datasets via the Spark datasource streaming
*/
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java
index cb3d8a7..da2f305 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java
@@ -18,17 +18,6 @@
package org.apache.hudi.timeline.service;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Preconditions;
-import io.javalin.Context;
-import io.javalin.Handler;
-import io.javalin.Javalin;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.SyncableFileSystemView;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
@@ -42,10 +31,23 @@ import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.timeline.service.handlers.DataFileHandler;
import org.apache.hudi.timeline.service.handlers.FileSliceHandler;
import org.apache.hudi.timeline.service.handlers.TimelineHandler;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Preconditions;
+import io.javalin.Context;
+import io.javalin.Handler;
+import io.javalin.Javalin;
+import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* Main REST Handler class that handles local view staleness and delegates calls to slice/data-file/timeline handlers
*/
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
index 2272757..16ff2f7 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
@@ -18,21 +18,23 @@
package org.apache.hudi.timeline.service;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import io.javalin.Javalin;
-import java.io.IOException;
-import java.io.Serializable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hudi.common.SerializableConfiguration;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.util.FSUtils;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import io.javalin.Javalin;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.io.Serializable;
+
/**
* A stand alone timeline service exposing File-System View interfaces to clients
*/
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java
index f336449..2b92bbf 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java
@@ -18,14 +18,16 @@
package org.apache.hudi.timeline.service.handlers;
+import org.apache.hudi.common.table.timeline.dto.DataFileDTO;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+
+import org.apache.hadoop.conf.Configuration;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hudi.common.table.timeline.dto.DataFileDTO;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
/**
* REST Handler servicing data-file requests
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
index eb283fb..e58f835 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java
@@ -18,16 +18,18 @@
package org.apache.hudi.timeline.service.handlers;
+import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
+import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
+import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+
+import org.apache.hadoop.conf.Configuration;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
-import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
-import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
/**
* REST Handler servicing file-slice requests
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java
index 03be706..884bc42 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java
@@ -18,10 +18,12 @@
package org.apache.hudi.timeline.service.handlers;
-import java.io.IOException;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
+
+import java.io.IOException;
public abstract class Handler {
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java
index 49fc2ce..faa81a4 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java
@@ -18,14 +18,16 @@
package org.apache.hudi.timeline.service.handlers;
+import org.apache.hudi.common.table.timeline.dto.InstantDTO;
+import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+
+import org.apache.hadoop.conf.Configuration;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hudi.common.table.timeline.dto.InstantDTO;
-import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
/**
* REST Handler servicing timeline requests
diff --git a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java
index 7c55949..d69a828 100644
--- a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java
+++ b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java
@@ -27,6 +27,7 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
import org.apache.hudi.common.table.view.TestHoodieTableFileSystemView;
import org.apache.hudi.timeline.service.TimelineService;
+
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
index 34552bb..62d2aa7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
@@ -18,26 +18,6 @@
package org.apache.hudi.utilities;
-import com.beust.jcommander.IValueValidator;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.ParameterException;
-import com.google.common.annotations.VisibleForTesting;
-import java.io.IOException;
-import java.io.Serializable;
-import java.time.Instant;
-import java.time.ZoneId;
-import java.time.format.DateTimeFormatter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.HoodieJsonPayload;
@@ -50,12 +30,35 @@ import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException;
+
+import com.beust.jcommander.IValueValidator;
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
import scala.Tuple2;
/**
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java
index 2cba5fe..62ae769 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java
@@ -18,8 +18,25 @@
package org.apache.hudi.utilities;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.utilities.exception.HoodieIncrementalPullException;
+import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
+
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.stringtemplate.v4.ST;
+
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -32,21 +49,6 @@ import java.sql.Statement;
import java.util.List;
import java.util.Scanner;
import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.util.FileIOUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.utilities.exception.HoodieIncrementalPullException;
-import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.stringtemplate.v4.ST;
/**
* Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java
index 8ca8cd1..27c3220 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java
@@ -18,22 +18,24 @@
package org.apache.hudi.utilities;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.config.HoodieWriteConfig;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
public class HoodieCleaner {
private static volatile Logger log = LogManager.getLogger(HoodieCleaner.class);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java
index 63bf441..d42a45d 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java
@@ -18,22 +18,24 @@
package org.apache.hudi.utilities;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.util.List;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.CompactionAdminClient;
import org.apache.hudi.CompactionAdminClient.RenameOpResult;
import org.apache.hudi.CompactionAdminClient.ValidationOpResult;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.FSUtils;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.List;
+
public class HoodieCompactionAdminTool {
private final Config cfg;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
index 4f72b24..48fbbaf 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
@@ -18,23 +18,25 @@
package org.apache.hudi.utilities;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
public class HoodieCompactor {
private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
index a37f7da..4f76f43 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java
@@ -18,17 +18,6 @@
package org.apache.hudi.utilities;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Stream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.SerializableConfiguration;
import org.apache.hudi.common.model.HoodieDataFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -40,10 +29,24 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
import scala.Tuple2;
/**
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
index dc49ebd..16ccb14 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java
@@ -22,6 +22,12 @@ import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.google.common.base.Preconditions;
import io.javalin.Javalin;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.spark.api.java.JavaSparkContext;
+
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -31,11 +37,6 @@ import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.IntStream;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.spark.api.java.JavaSparkContext;
public class HoodieWithTimelineServer implements Serializable {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
index bbb18af..1f33ca6 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
@@ -18,18 +18,6 @@
package org.apache.hudi.utilities;
-import com.google.common.base.Preconditions;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.nio.ByteBuffer;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.util.DFSPropertiesConfiguration;
@@ -44,6 +32,11 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.Source;
import org.apache.hudi.utilities.transform.Transformer;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Accumulator;
@@ -52,6 +45,15 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
/**
* Bunch of helper methods
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java
index 8c04f67..6793d94 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java
@@ -18,8 +18,19 @@
package org.apache.hudi.utilities.adhoc;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.config.HoodieCompactionConfig;
+
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
@@ -27,15 +38,6 @@ import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableConfig;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.util.FSUtils;
-import org.apache.hudi.config.HoodieCompactionConfig;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
/**
* This is an one-time use class meant for migrating the configuration for "hoodie.compaction.payload.class" in
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java
index ceb745a..bd4d8a2 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java
@@ -18,6 +18,11 @@
package org.apache.hudi.utilities.deltastreamer;
+import org.apache.hudi.common.util.collection.Pair;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
import java.io.Serializable;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
@@ -25,9 +30,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
/**
* Base Class for running delta-sync/compaction in separate thread and controlling their life-cyle
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java
index a72b68a..3285ba7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java
@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.deltastreamer;
-import java.io.IOException;
-import java.io.Serializable;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
+
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.IOException;
+import java.io.Serializable;
+
/**
* Run one round of compaction
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index 73ef8a3..532ec69 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -18,25 +18,6 @@
package org.apache.hudi.utilities.deltastreamer;
-import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
-import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
-
-import com.codahale.metrics.Timer;
-import com.google.common.base.Preconditions;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Objects;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.HoodieWriteClient;
@@ -66,6 +47,15 @@ import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.InputBatch;
import org.apache.hudi.utilities.transform.Transformer;
+
+import com.codahale.metrics.Timer;
+import com.google.common.base.Preconditions;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
@@ -73,8 +63,22 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
import scala.collection.JavaConversions;
+import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
+import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
+
+
/**
* Sync's one batch of data to hoodie dataset
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
index db8b40a..9893f0d 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -18,30 +18,6 @@
package org.apache.hudi.utilities.deltastreamer;
-import com.beust.jcommander.IStringConverter;
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.ParameterException;
-import com.google.common.base.Preconditions;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.model.HoodieTableType;
@@ -60,11 +36,37 @@ import org.apache.hudi.utilities.HiveIncrementalPuller;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.JsonDFSSource;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
/**
* An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target
* dataset. Does not maintain any state, queries at runtime to see how far behind the target dataset is from the source
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java
index 4132f5b..19f8e10 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java
@@ -18,10 +18,11 @@
package org.apache.hudi.utilities.deltastreamer;
-import com.codahale.metrics.Timer;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.metrics.Metrics;
+import com.codahale.metrics.Timer;
+
public class HoodieDeltaStreamerMetrics {
private HoodieWriteConfig config = null;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
index cb4477a..66d2c47 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
@@ -18,6 +18,13 @@
package org.apache.hudi.utilities.deltastreamer;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
@@ -25,11 +32,6 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.util.Option;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.SparkConf;
/**
* Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java
index ab3b070..b41efcd 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java
@@ -18,11 +18,6 @@
package org.apache.hudi.utilities.deltastreamer;
-import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
-import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.utilities.sources.AvroSource;
@@ -32,11 +27,17 @@ import org.apache.hudi.utilities.sources.ParquetSource;
import org.apache.hudi.utilities.sources.RowSource;
import org.apache.hudi.utilities.sources.Source;
import org.apache.hudi.utilities.sources.helpers.AvroConvertor;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType;
+import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
+import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
+
/**
* Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer)
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java
index 58cad2c..bab12b0 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java
@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.exception;
-import java.sql.SQLException;
import org.apache.hudi.exception.HoodieException;
+import java.sql.SQLException;
+
public class HoodieIncrementalPullException extends HoodieException {
public HoodieIncrementalPullException(String msg, SQLException e) {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
index 4f91f95..04c6ece 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
@@ -18,14 +18,6 @@
package org.apache.hudi.utilities.keygen;
-import java.io.Serializable;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Date;
-import java.util.TimeZone;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.SimpleKeyGenerator;
import org.apache.hudi.common.model.HoodieKey;
@@ -34,6 +26,16 @@ import org.apache.hudi.exception.HoodieKeyException;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
+import org.apache.avro.generic.GenericRecord;
+
+import java.io.Serializable;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.TimeZone;
+
/**
* Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
index ef1e986..a840ff4 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
@@ -18,11 +18,30 @@
package org.apache.hudi.utilities.perf;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.SyncableFileSystemView;
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.table.view.FileSystemViewStorageType;
+import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
+import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.timeline.service.TimelineService;
+import org.apache.hudi.utilities.UtilHelpers;
+
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
@@ -36,23 +55,6 @@ import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.SyncableFileSystemView;
-import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
-import org.apache.hudi.common.table.view.FileSystemViewStorageType;
-import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView;
-import org.apache.hudi.common.util.FSUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.timeline.service.TimelineService;
-import org.apache.hudi.utilities.UtilHelpers;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
public class TimelineServerPerf implements Serializable {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java
index 3eb3a44..5776984 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java
@@ -18,17 +18,19 @@
package org.apache.hudi.utilities.schema;
-import java.io.IOException;
-import java.util.Collections;
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.IOException;
+import java.util.Collections;
+
/**
* A simple schema provider, that reads off files on DFS
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java
index d7415a0..7b3172c 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java
@@ -18,8 +18,9 @@
package org.apache.hudi.utilities.schema;
-import org.apache.avro.Schema;
import org.apache.hudi.common.util.TypedProperties;
+
+import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaSparkContext;
/**
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java
index 3cec79c..4b708fa 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java
@@ -18,8 +18,9 @@
package org.apache.hudi.utilities.schema;
-import org.apache.avro.Schema;
import org.apache.hudi.AvroConversionUtils;
+
+import org.apache.avro.Schema;
import org.apache.spark.sql.types.StructType;
public class RowBasedSchemaProvider extends SchemaProvider {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java
index c1f7c34..8378383 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java
@@ -18,11 +18,13 @@
package org.apache.hudi.utilities.schema;
-import java.io.Serializable;
-import org.apache.avro.Schema;
import org.apache.hudi.common.util.TypedProperties;
+
+import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.Serializable;
+
/**
* Class to provide schema for reading data and also writing into a Hoodie table
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
index 7e2ac16..d03c6da 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
@@ -18,16 +18,18 @@
package org.apache.hudi.utilities.schema;
+import org.apache.hudi.DataSourceUtils;
+import org.apache.hudi.common.util.TypedProperties;
+import org.apache.hudi.exception.HoodieIOException;
+
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.avro.Schema;
+import org.apache.spark.api.java.JavaSparkContext;
+
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
-import org.apache.avro.Schema;
-import org.apache.hudi.DataSourceUtils;
-import org.apache.hudi.common.util.TypedProperties;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.spark.api.java.JavaSparkContext;
/**
* Obtains latest schema from the Confluent/Kafka schema-registry
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java
index b7f6f8c..4cd8f07 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java
@@ -18,15 +18,16 @@
package org.apache.hudi.utilities.sources;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.hadoop.io.NullWritable;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.hadoop.io.NullWritable;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
index 2ce8b43..9588a81 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
@@ -18,14 +18,15 @@
package org.apache.hudi.utilities.sources;
-import io.confluent.kafka.serializers.KafkaAvroDecoder;
-import kafka.serializer.StringDecoder;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
+
+import io.confluent.kafka.serializers.KafkaAvroDecoder;
+import kafka.serializer.StringDecoder;
+import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java
index 3137cd6..24d7878 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java
@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.sources;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java
index b37a219..4e4d603 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java
@@ -18,19 +18,6 @@
package org.apache.hudi.utilities.sources;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
@@ -38,6 +25,14 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.utilities.HiveIncrementalPuller;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
@@ -45,6 +40,13 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* Source to read deltas produced by {@link HiveIncrementalPuller}, commit by commit and apply to the target table
* <p>
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
index f5ed4e3..3edb296 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
@@ -18,7 +18,6 @@
package org.apache.hudi.utilities.sources;
-import java.util.Arrays;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.model.HoodieRecord;
@@ -28,12 +27,15 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
+
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
+import java.util.Arrays;
+
public class HoodieIncrSource extends RowSource {
protected static class Config {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java
index 6be3a54..ed9b82a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java
@@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
+
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
index 8e95a8c..ba68ac9 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources;
-import kafka.serializer.StringDecoder;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
+
+import kafka.serializer.StringDecoder;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java
index 5c9db18..fb569e9 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java
@@ -20,6 +20,7 @@ package org.apache.hudi.utilities.sources;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java
index 22ac3f9..3695fce 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java
@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
+
+import org.apache.avro.generic.GenericRecord;
import org.apache.parquet.avro.AvroParquetInputFormat;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java
index edcc688..58fe5ad 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java
@@ -18,9 +18,10 @@
package org.apache.hudi.utilities.sources;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
index 467c667..9e289f1 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java
@@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
index 0ed1e6c..48e3bd7 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java
@@ -18,15 +18,17 @@
package org.apache.hudi.utilities.sources;
-import java.io.Serializable;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.io.Serializable;
+
/**
* Represents a source from which we can tail data. Assumes a constructor that takes properties.
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java
index d5c4621..97bb937 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java
@@ -18,13 +18,15 @@
package org.apache.hudi.utilities.sources.helpers;
+import org.apache.hudi.avro.MercifulJsonConverter;
+
import com.twitter.bijection.Injection;
import com.twitter.bijection.avro.GenericAvroCodecs;
-import java.io.IOException;
-import java.io.Serializable;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
-import org.apache.hudi.avro.MercifulJsonConverter;
+
+import java.io.IOException;
+import java.io.Serializable;
/**
* Convert a variety of datum into Avro GenericRecords. Has a bunch of lazy fields to circumvent issues around
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java
index ce979d6..2bc0466 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java
@@ -18,18 +18,6 @@
package org.apache.hudi.utilities.sources.helpers;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
@@ -38,6 +26,20 @@ import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
public class DFSPathSelector {
/**
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
index 62d35cd..c6430ea 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
@@ -18,12 +18,13 @@
package org.apache.hudi.utilities.sources.helpers;
-import com.google.common.base.Preconditions;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
+
+import com.google.common.base.Preconditions;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Row;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
index 873e793..6bde10f 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java
@@ -18,23 +18,26 @@
package org.apache.hudi.utilities.sources.helpers;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.stream.Collectors;
-import kafka.common.TopicAndPartition;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
+
+import kafka.common.TopicAndPartition;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.streaming.kafka.KafkaCluster;
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset;
import org.apache.spark.streaming.kafka.OffsetRange;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.stream.Collectors;
+
import scala.Predef;
import scala.collection.JavaConverters;
import scala.collection.immutable.Map;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java
index 858fbd8..7c41e8d 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java
@@ -18,8 +18,8 @@
package org.apache.hudi.utilities.transform;
-import java.util.UUID;
import org.apache.hudi.common.util.TypedProperties;
+
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
@@ -29,6 +29,8 @@ import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
+import java.util.UUID;
+
/**
* Transformer that can flatten nested objects. It currently doesn't unnest arrays.
*/
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java
index ffc0fad..f74291f 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java
@@ -19,6 +19,7 @@
package org.apache.hudi.utilities.transform;
import org.apache.hudi.common.util.TypedProperties;
+
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
index b5e2e37..ff563a1 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
@@ -18,8 +18,8 @@
package org.apache.hudi.utilities.transform;
-import java.util.UUID;
import org.apache.hudi.common.util.TypedProperties;
+
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
@@ -27,6 +27,8 @@ import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
+import java.util.UUID;
+
/**
* A transformer that allows a sql-query template be used to transform the source before writing to Hudi data-set.
*
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java
index 7433f6f..f97b302 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java
@@ -19,6 +19,7 @@
package org.apache.hudi.utilities.transform;
import org.apache.hudi.common.util.TypedProperties;
+
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java
index 3e7d217..d119102 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java
@@ -18,15 +18,16 @@
package org.apache.hudi.utilities;
-import static org.junit.Assert.assertEquals;
-
import org.apache.hudi.utilities.transform.FlatteningTransformer;
+
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
public class TestFlatteningTransformer {
@Test
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java
index 045042f..6ed937c 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java
@@ -18,19 +18,14 @@
package org.apache.hudi.utilities;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import org.apache.hudi.HoodieReadClient;
+import org.apache.hudi.HoodieWriteClient;
+import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.minicluster.HdfsTestService;
+import org.apache.hudi.common.model.HoodieTestUtils;
+import org.apache.hudi.common.table.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import java.io.IOException;
-import java.io.Serializable;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
@@ -38,13 +33,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hudi.HoodieReadClient;
-import org.apache.hudi.HoodieWriteClient;
-import org.apache.hudi.common.HoodieTestDataGenerator;
-import org.apache.hudi.common.minicluster.HdfsTestService;
-import org.apache.hudi.common.model.HoodieTestUtils;
-import org.apache.hudi.common.table.HoodieTimeline;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.spark.SparkConf;
@@ -54,6 +42,20 @@ import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
+import java.io.IOException;
+import java.io.Serializable;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
public class TestHDFSParquetImporter implements Serializable {
private static String dfsBasePath;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java
index 5cdb532..d2be913 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java
@@ -18,23 +18,6 @@
package org.apache.hudi.utilities;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.SimpleKeyGenerator;
import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -62,6 +45,11 @@ import org.apache.hudi.utilities.sources.TestDataSource;
import org.apache.hudi.utilities.sources.config.TestSourceConfig;
import org.apache.hudi.utilities.transform.SqlQueryBasedTransformer;
import org.apache.hudi.utilities.transform.Transformer;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
@@ -80,6 +68,20 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
/**
* Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end.
*/
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java
index f64c38b..349b5aa 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java
@@ -18,25 +18,27 @@
package org.apache.hudi.utilities;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.HoodieCommonTestHarness;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.util.FSUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
public class TestHoodieSnapshotCopier extends HoodieCommonTestHarness {
private static String TEST_WRITE_TOKEN = "1-0-1";
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java
index 3f83752..af1f566 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java
@@ -18,15 +18,17 @@
package org.apache.hudi.utilities;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-
-import java.util.Map;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
import org.apache.hudi.utilities.deltastreamer.SchedulerConfGenerator;
+
import org.junit.Test;
+import java.util.Map;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
public class TestSchedulerConfGenerator {
@Test
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java
index 46b0dab..cfe6798 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java
@@ -18,21 +18,6 @@
package org.apache.hudi.utilities;
-import com.google.common.collect.ImmutableList;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hive.service.server.HiveServer2;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.TestRawTripPayload;
import org.apache.hudi.common.minicluster.HdfsTestService;
@@ -46,6 +31,16 @@ import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HoodieHiveClient;
import org.apache.hudi.hive.util.HiveTestService;
import org.apache.hudi.utilities.sources.TestDataSource;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hive.service.server.HiveServer2;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.spark.api.java.JavaSparkContext;
@@ -56,6 +51,13 @@ import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Abstract test that provides a dfs & spark contexts.
*
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java
index 58a7d58..3dc6b33 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java
@@ -18,13 +18,6 @@
package org.apache.hudi.utilities.sources;
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.stream.Stream;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
@@ -33,9 +26,18 @@ import org.apache.hudi.common.util.collection.RocksDBBasedMap;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.config.TestSourceConfig;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Stream;
+
public abstract class AbstractBaseTestSource extends AvroSource {
static final int DEFAULT_PARTITION_NUM = 0;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java
index 8e87767..9e8b3c4 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java
@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.sources;
-import java.util.Iterator;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.config.TestSourceConfig;
+
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.util.Iterator;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
/**
* A Test DataSource which scales test-data generation by using spark parallelism.
*/
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java
index 4d4fafb..f8b4869 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java
@@ -18,16 +18,6 @@
package org.apache.hudi.utilities.sources;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.List;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
@@ -35,6 +25,12 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.UtilitiesTestBase;
import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
@@ -45,6 +41,12 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
+import java.io.IOException;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
/**
* Basic tests against all subclasses of {@link JsonDFSSource} and {@link ParquetDFSSource}
*/
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java
index 1ba75ea..c6130a6 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java
@@ -18,18 +18,20 @@
package org.apache.hudi.utilities.sources;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.avro.generic.GenericRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* An implementation of {@link Source}, that emits test upserts.
*/
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java
index c1ca1f0..f2aa794 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java
@@ -18,12 +18,6 @@
package org.apache.hudi.utilities.sources;
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.HashMap;
-import kafka.common.TopicAndPartition;
-import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
@@ -33,6 +27,9 @@ import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config;
+
+import kafka.common.TopicAndPartition;
+import org.apache.avro.generic.GenericRecord;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
@@ -45,6 +42,11 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
+import java.io.IOException;
+import java.util.HashMap;
+
+import static org.junit.Assert.assertEquals;
+
/**
* Tests against {@link AvroKafkaSource}
*/