You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/08/13 07:33:48 UTC
[hudi] branch master updated: [HUDI-1013] Adding Bulk Insert V2 implementation (#1834)

This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 379cf07  [HUDI-1013] Adding Bulk Insert V2 implementation (#1834)
379cf07 is described below

commit 379cf0786fe9fea94ec8c0da7d467ae2fb30dd0b
Author: Sivabalan Narayanan <si...@uber.com>
AuthorDate: Thu Aug 13 03:33:39 2020 -0400

    [HUDI-1013] Adding Bulk Insert V2 implementation (#1834)
    
     - Adding ability to use native spark row writing for bulk_insert
     - Controlled by `ENABLE_ROW_WRITER_OPT_KEY` datasource write option
     - Introduced KeyGeneratorInterface in hudi-client, moved KeyGenerator back to hudi-spark
     - Simplified the new API additions to just two new methods : getRecordKey(row), getPartitionPath(row)
     - Fixed all built-in key generators with new APIs
     - Made the field position map lazily created upon the first call to row based apis
     - Implemented native row based key generators for CustomKeyGenerator
     - Fixed all the tests, with these new APIs
    
    Co-authored-by: Balaji Varadarajan <va...@uber.com>
    Co-authored-by: Vinoth Chandar <vi...@apache.org>
---
 .../scala/org/apache/hudi/cli/SparkHelpers.scala   |   4 +-
 .../hudi/client/AbstractHoodieWriteClient.java     |  11 +-
 .../hudi/client/HoodieInternalWriteStatus.java     | 150 ++++++++++
 .../hudi/client/model/HoodieInternalRow.java       | 243 ++++++++++++++++
 .../org/apache/hudi/io/HoodieRowCreateHandle.java  | 203 +++++++++++++
 .../hudi/io/storage/HoodieAvroParquetConfig.java   |  28 +-
 ...uetConfig.java => HoodieBaseParquetConfig.java} |  23 +-
 .../hudi/io/storage/HoodieFileWriterFactory.java   |   2 +-
 .../io/storage/HoodieInternalRowFileWriter.java    |  40 +--
 .../HoodieInternalRowFileWriterFactory.java        |  79 +++++
 .../io/storage/HoodieInternalRowParquetWriter.java |  72 +++++
 .../hudi/io/storage/HoodieParquetWriter.java       |   2 +-
 .../hudi/io/storage/HoodieRowParquetConfig.java    |  28 +-
 .../io/storage/HoodieRowParquetWriteSupport.java   |  89 ++++++
 .../apache/hudi/keygen/BuiltinKeyGenerator.java    |  88 ------
 .../apache/hudi/keygen/KeyGeneratorInterface.java  |  35 +--
 .../bootstrap/BootstrapCommitActionExecutor.java   |   6 +-
 .../hudi/client/TestHoodieInternalWriteStatus.java |  87 ++++++
 .../hudi/client/model/TestHoodieInternalRow.java   | 239 +++++++++++++++
 .../apache/hudi/io/TestHoodieRowCreateHandle.java  | 231 +++++++++++++++
 .../TestHoodieInternalRowParquetWriter.java        | 117 ++++++++
 .../hudi/testutils/HoodieClientTestUtils.java      |   4 +-
 .../hudi/testutils/SparkDatasetTestUtils.java      | 175 +++++++++++
 .../org/apache/hudi/common/model/HoodieRecord.java |   8 +
 .../src/test/resources/timestamp-test-evolved.avsc |   2 +-
 .../main/java/org/apache/hudi/DataSourceUtils.java |  57 ++--
 .../apache/hudi/HoodieDatasetBulkInsertHelper.java | 108 +++++++
 .../org/apache/hudi/internal/DefaultSource.java    |  91 ++++++
 .../HoodieBulkInsertDataInternalWriter.java        | 119 ++++++++
 .../HoodieBulkInsertDataInternalWriterFactory.java |  52 ++++
 .../internal/HoodieDataSourceInternalWriter.java   | 119 ++++++++
 .../HoodieWriterCommitMessage.java}                |  33 +--
 .../apache/hudi/keygen/BuiltinKeyGenerator.java    | 130 +++++++++
 .../apache/hudi/keygen/ComplexKeyGenerator.java    |  33 +--
 .../org/apache/hudi/keygen/CustomKeyGenerator.java |  75 +++--
 .../hudi/keygen/GlobalDeleteKeyGenerator.java      |  28 +-
 .../java/org/apache/hudi/keygen/KeyGenUtils.java   |   0
 .../java/org/apache/hudi/keygen/KeyGenerator.java  |  35 ++-
 .../hudi/keygen/NonpartitionedKeyGenerator.java    |  15 +-
 .../apache/hudi/keygen/RowKeyGeneratorHelper.java  | 202 +++++++++++++
 .../org/apache/hudi/keygen/SimpleKeyGenerator.java |  48 ++-
 .../hudi/keygen/TimestampBasedKeyGenerator.java    | 117 +++++---
 .../scala/org/apache/hudi/DataSourceOptions.scala  |  11 +-
 .../main/scala/org/apache/hudi/DefaultSource.scala |   6 +-
 .../org/apache/hudi/HoodieSparkSqlWriter.scala     | 100 +++----
 .../scala/org/apache/hudi/HoodieWriterUtils.scala  |  77 +++++
 hudi-spark/src/test/java/HoodieJavaApp.java        |   6 +-
 .../src/test/java/HoodieJavaStreamingApp.java      |   2 +-
 .../hudi/TestHoodieDatasetBulkInsertHelper.java    | 156 ++++++++++
 .../TestHoodieBulkInsertDataInternalWriter.java    | 213 ++++++++++++++
 .../TestHoodieDataSourceInternalWriter.java        | 321 +++++++++++++++++++++
 .../hudi/keygen/TestComplexKeyGenerator.java       |  12 +-
 .../apache/hudi/keygen/TestCustomKeyGenerator.java |  67 ++++-
 .../hudi/keygen/TestGlobalDeleteKeyGenerator.java  |  13 +-
 .../apache/hudi/keygen/TestSimpleKeyGenerator.java |  29 +-
 .../keygen/TestTimestampBasedKeyGenerator.java     | 236 ++++++++++-----
 .../apache/hudi/testutils/DataSourceTestUtils.java | 106 +++++++
 .../KeyGeneratorTestUtilities.java}                |  34 ++-
 .../src/test/resources/exampleSchema.txt           |  26 +-
 .../org/apache/hudi/TestDataSourceDefaults.scala   | 301 +++++++++++++++++--
 .../functional/HoodieSparkSqlWriterSuite.scala     | 139 ++++++++-
 style/checkstyle-suppressions.xml                  |   2 +
 62 files changed, 4522 insertions(+), 563 deletions(-)

diff --git a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
index 7de5f42..6859f70 100644
--- a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
+++ b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
@@ -28,7 +28,7 @@ import org.apache.hudi.common.bloom.{BloomFilter, BloomFilterFactory}
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.util.ParquetUtils
 import org.apache.hudi.config.{HoodieIndexConfig, HoodieStorageConfig}
-import org.apache.hudi.io.storage.{HoodieParquetConfig, HoodieParquetWriter}
+import org.apache.hudi.io.storage.{HoodieAvroParquetConfig, HoodieParquetWriter}
 import org.apache.parquet.avro.AvroSchemaConverter
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.spark.sql.{DataFrame, SQLContext}
@@ -45,7 +45,7 @@ object SparkHelpers {
     val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.DEFAULT_BLOOM_FILTER_NUM_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_FILTER_FPP.toDouble,
       HoodieIndexConfig.DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_INDEX_FILTER_TYPE);
     val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter)
-    val parquetConfig: HoodieParquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble)
+    val parquetConfig: HoodieAvroParquetConfig = new HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble)
 
     // Add current classLoad for config, if not will throw classNotFound of 'HoodieWrapperFileSystem'.
     parquetConfig.getHadoopConf().setClassLoader(Thread.currentThread.getContextClassLoader)
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
index 81b0161..d0526b0 100644
--- a/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
@@ -95,20 +95,19 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
    */
   public boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
       Option<Map<String, String>> extraMetadata) {
-    HoodieTableMetaClient metaClient = createMetaClient(false);
-    return commit(instantTime, writeStatuses, extraMetadata, metaClient.getCommitActionType());
+    List<HoodieWriteStat> stats = writeStatuses.map(WriteStatus::getStat).collect();
+    return commitStats(instantTime, stats, extraMetadata);
   }
 
-  private boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
-      Option<Map<String, String>> extraMetadata, String actionType) {
-
+  public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Option<Map<String, String>> extraMetadata) {
     LOG.info("Committing " + instantTime);
+    HoodieTableMetaClient metaClient = createMetaClient(false);
+    String actionType = metaClient.getCommitActionType();
     // Create a Hoodie table which encapsulated the commits and files visible
     HoodieTable<T> table = HoodieTable.create(config, hadoopConf);
 
     HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
     HoodieCommitMetadata metadata = new HoodieCommitMetadata();
-    List<HoodieWriteStat> stats = writeStatuses.map(WriteStatus::getStat).collect();
     stats.forEach(stat -> metadata.addWriteStat(stat.getPartitionPath(), stat));
 
     // Finalize write
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/HoodieInternalWriteStatus.java b/hudi-client/src/main/java/org/apache/hudi/client/HoodieInternalWriteStatus.java
new file mode 100644
index 0000000..87a117b
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/client/HoodieInternalWriteStatus.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.util.collection.Pair;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Hoodie's internal write status used in datasource implementation of bulk insert.
+ */
+public class HoodieInternalWriteStatus implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+  private static final long RANDOM_SEED = 9038412832L;
+
+  private String fileId;
+  private String partitionPath;
+  private List<String> successRecordKeys = new ArrayList<>();
+  private List<Pair<String, Throwable>> failedRecordKeys = new ArrayList<>();
+
+  private HoodieWriteStat stat;
+
+  private long totalRecords = 0;
+  private long totalErrorRecords = 0;
+  private Throwable globalError = null;
+
+  private final double failureFraction;
+  private final boolean trackSuccessRecords;
+  private final transient Random random;
+
+  public HoodieInternalWriteStatus(Boolean trackSuccessRecords, Double failureFraction) {
+    this.trackSuccessRecords = trackSuccessRecords;
+    this.failureFraction = failureFraction;
+    this.random = new Random(RANDOM_SEED);
+  }
+
+  public void markSuccess(String recordKey) {
+    if (trackSuccessRecords) {
+      this.successRecordKeys.add(recordKey);
+    }
+    totalRecords++;
+  }
+
+  public void markFailure(String recordKey, Throwable t) {
+    if (failedRecordKeys.isEmpty() || (random.nextDouble() <= failureFraction)) {
+      failedRecordKeys.add(Pair.of(recordKey, t));
+    }
+    totalRecords++;
+  }
+
+  public boolean hasErrors() {
+    return failedRecordKeys.size() != 0;
+  }
+
+  public HoodieWriteStat getStat() {
+    return stat;
+  }
+
+  public void setStat(HoodieWriteStat stat) {
+    this.stat = stat;
+  }
+
+  public String getFileId() {
+    return fileId;
+  }
+
+  public void setFileId(String fileId) {
+    this.fileId = fileId;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  public void setPartitionPath(String partitionPath) {
+    this.partitionPath = partitionPath;
+  }
+
+  public List<String> getSuccessRecordKeys() {
+    return successRecordKeys;
+  }
+
+  public long getFailedRowsSize() {
+    return failedRecordKeys.size();
+  }
+
+  public List<Pair<String, Throwable>> getFailedRecordKeys() {
+    return failedRecordKeys;
+  }
+
+  public void setFailedRecordKeys(List<Pair<String, Throwable>> failedRecordKeys) {
+    this.failedRecordKeys = failedRecordKeys;
+  }
+
+  public long getTotalRecords() {
+    return totalRecords;
+  }
+
+  public void setTotalRecords(long totalRecords) {
+    this.totalRecords = totalRecords;
+  }
+
+  public long getTotalErrorRecords() {
+    return totalErrorRecords;
+  }
+
+  public void setTotalErrorRecords(long totalErrorRecords) {
+    this.totalErrorRecords = totalErrorRecords;
+  }
+
+  public Throwable getGlobalError() {
+    return globalError;
+  }
+
+  public void setGlobalError(Throwable globalError) {
+    this.globalError = globalError;
+  }
+
+  public void setSuccessRecordKeys(List<String> successRecordKeys) {
+    this.successRecordKeys = successRecordKeys;
+  }
+
+  @Override
+  public String toString() {
+    return "PartitionPath " + partitionPath + ", FileID " + fileId + ", Success records "
+        + totalRecords + ", errored Rows " + totalErrorRecords
+        + ", global error " + (globalError != null);
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/model/HoodieInternalRow.java b/hudi-client/src/main/java/org/apache/hudi/client/model/HoodieInternalRow.java
new file mode 100644
index 0000000..5fcd1dc
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/client/model/HoodieInternalRow.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.model;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.util.ArrayData;
+import org.apache.spark.sql.catalyst.util.MapData;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.unsafe.types.CalendarInterval;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * Internal Row implementation for Hoodie Row. It wraps an {@link InternalRow} and keeps meta columns locally. But the {@link InternalRow}
+ * does include the meta columns as well just that {@link HoodieInternalRow} will intercept queries for meta columns and serve from its
+ * copy rather than fetching from {@link InternalRow}.
+ */
+public class HoodieInternalRow extends InternalRow {
+
+  private String commitTime;
+  private String commitSeqNumber;
+  private String recordKey;
+  private String partitionPath;
+  private String fileName;
+  private InternalRow row;
+
+  public HoodieInternalRow(String commitTime, String commitSeqNumber, String recordKey, String partitionPath,
+      String fileName, InternalRow row) {
+    this.commitTime = commitTime;
+    this.commitSeqNumber = commitSeqNumber;
+    this.recordKey = recordKey;
+    this.partitionPath = partitionPath;
+    this.fileName = fileName;
+    this.row = row;
+  }
+
+  @Override
+  public int numFields() {
+    return row.numFields();
+  }
+
+  @Override
+  public void setNullAt(int i) {
+    if (i < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      switch (i) {
+        case 0: {
+          this.commitTime = null;
+          break;
+        }
+        case 1: {
+          this.commitSeqNumber = null;
+          break;
+        }
+        case 2: {
+          this.recordKey = null;
+          break;
+        }
+        case 3: {
+          this.partitionPath = null;
+          break;
+        }
+        case 4: {
+          this.fileName = null;
+          break;
+        }
+        default: throw new IllegalArgumentException("Not expected");
+      }
+    } else {
+      row.setNullAt(i);
+    }
+  }
+
+  @Override
+  public void update(int i, Object value) {
+    if (i < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      switch (i) {
+        case 0: {
+          this.commitTime = value.toString();
+          break;
+        }
+        case 1: {
+          this.commitSeqNumber = value.toString();
+          break;
+        }
+        case 2: {
+          this.recordKey = value.toString();
+          break;
+        }
+        case 3: {
+          this.partitionPath = value.toString();
+          break;
+        }
+        case 4: {
+          this.fileName = value.toString();
+          break;
+        }
+        default: throw new IllegalArgumentException("Not expected");
+      }
+    } else {
+      row.update(i, value);
+    }
+  }
+
+  private String getMetaColumnVal(int ordinal) {
+    switch (ordinal) {
+      case 0: {
+        return commitTime;
+      }
+      case 1: {
+        return commitSeqNumber;
+      }
+      case 2: {
+        return recordKey;
+      }
+      case 3: {
+        return partitionPath;
+      }
+      case 4: {
+        return fileName;
+      }
+      default: throw new IllegalArgumentException("Not expected");
+    }
+  }
+
+  @Override
+  public boolean isNullAt(int ordinal) {
+    if (ordinal < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      return null == getMetaColumnVal(ordinal);
+    }
+    return row.isNullAt(ordinal);
+  }
+
+  @Override
+  public boolean getBoolean(int ordinal) {
+    return row.getBoolean(ordinal);
+  }
+
+  @Override
+  public byte getByte(int ordinal) {
+    return row.getByte(ordinal);
+  }
+
+  @Override
+  public short getShort(int ordinal) {
+    return row.getShort(ordinal);
+  }
+
+  @Override
+  public int getInt(int ordinal) {
+    return row.getInt(ordinal);
+  }
+
+  @Override
+  public long getLong(int ordinal) {
+    return row.getLong(ordinal);
+  }
+
+  @Override
+  public float getFloat(int ordinal) {
+    return row.getFloat(ordinal);
+  }
+
+  @Override
+  public double getDouble(int ordinal) {
+    return row.getDouble(ordinal);
+  }
+
+  @Override
+  public Decimal getDecimal(int ordinal, int precision, int scale) {
+    return row.getDecimal(ordinal, precision, scale);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int ordinal) {
+    if (ordinal < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      return UTF8String.fromBytes(getMetaColumnVal(ordinal).getBytes());
+    }
+    return row.getUTF8String(ordinal);
+  }
+
+  @Override
+  public String getString(int ordinal) {
+    if (ordinal < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      return new String(getMetaColumnVal(ordinal).getBytes());
+    }
+    return row.getString(ordinal);
+  }
+
+  @Override
+  public byte[] getBinary(int ordinal) {
+    return row.getBinary(ordinal);
+  }
+
+  @Override
+  public CalendarInterval getInterval(int ordinal) {
+    return row.getInterval(ordinal);
+  }
+
+  @Override
+  public InternalRow getStruct(int ordinal, int numFields) {
+    return row.getStruct(ordinal, numFields);
+  }
+
+  @Override
+  public ArrayData getArray(int ordinal) {
+    return row.getArray(ordinal);
+  }
+
+  @Override
+  public MapData getMap(int ordinal) {
+    return row.getMap(ordinal);
+  }
+
+  @Override
+  public Object get(int ordinal, DataType dataType) {
+    if (ordinal < HoodieRecord.HOODIE_META_COLUMNS.size()) {
+      return UTF8String.fromBytes(getMetaColumnVal(ordinal).getBytes());
+    }
+    return row.get(ordinal, dataType);
+  }
+
+  @Override
+  public InternalRow copy() {
+    return new HoodieInternalRow(commitTime, commitSeqNumber, recordKey, partitionPath, fileName, row.copy());
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/HoodieRowCreateHandle.java b/hudi-client/src/main/java/org/apache/hudi/io/HoodieRowCreateHandle.java
new file mode 100644
index 0000000..723d9f9
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieRowCreateHandle.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.client.model.HoodieInternalRow;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.io.storage.HoodieInternalRowFileWriter;
+import org.apache.hudi.io.storage.HoodieInternalRowFileWriterFactory;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.MarkerFiles;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.types.StructType;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Create handle with InternalRow for datasource implemention of bulk insert.
+ */
+public class HoodieRowCreateHandle implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LogManager.getLogger(HoodieRowCreateHandle.class);
+  private static final AtomicLong SEQGEN = new AtomicLong(1);
+
+  private final String instantTime;
+  private final int taskPartitionId;
+  private final long taskId;
+  private final long taskEpochId;
+  private final HoodieTable table;
+  private final HoodieWriteConfig writeConfig;
+  private final HoodieInternalRowFileWriter fileWriter;
+  private final String partitionPath;
+  private final Path path;
+  private final String fileId;
+  private final FileSystem fs;
+  private final HoodieInternalWriteStatus writeStatus;
+  private final HoodieTimer currTimer;
+
+  public HoodieRowCreateHandle(HoodieTable table, HoodieWriteConfig writeConfig, String partitionPath, String fileId,
+      String instantTime, int taskPartitionId, long taskId, long taskEpochId,
+      StructType structType) {
+    this.partitionPath = partitionPath;
+    this.table = table;
+    this.writeConfig = writeConfig;
+    this.instantTime = instantTime;
+    this.taskPartitionId = taskPartitionId;
+    this.taskId = taskId;
+    this.taskEpochId = taskEpochId;
+    this.fileId = fileId;
+    this.currTimer = new HoodieTimer();
+    this.currTimer.startTimer();
+    this.fs = table.getMetaClient().getFs();
+    this.path = makeNewPath(partitionPath);
+    this.writeStatus = new HoodieInternalWriteStatus(!table.getIndex().isImplicitWithStorage(),
+        writeConfig.getWriteStatusFailureFraction());
+    writeStatus.setPartitionPath(partitionPath);
+    writeStatus.setFileId(fileId);
+    try {
+      HoodiePartitionMetadata partitionMetadata =
+          new HoodiePartitionMetadata(
+              fs,
+              instantTime,
+              new Path(writeConfig.getBasePath()),
+              FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath));
+      partitionMetadata.trySave(taskPartitionId);
+      createMarkerFile(partitionPath, FSUtils.makeDataFileName(this.instantTime, getWriteToken(), this.fileId, table.getBaseFileExtension()));
+      this.fileWriter = createNewFileWriter(path, table, writeConfig, structType);
+    } catch (IOException e) {
+      throw new HoodieInsertException("Failed to initialize file writer for path " + path, e);
+    }
+    LOG.info("New handle created for partition :" + partitionPath + " with fileId " + fileId);
+  }
+
+  /**
+   * Writes an {@link InternalRow} to the underlying HoodieInternalRowFileWriter. Before writing, value for meta columns are computed as required
+   * and wrapped in {@link HoodieInternalRow}. {@link HoodieInternalRow} is what gets written to HoodieInternalRowFileWriter.
+   * @param record instance of {@link InternalRow} that needs to be written to the fileWriter.
+   * @throws IOException
+   */
+  public void write(InternalRow record) throws IOException {
+    try {
+      String partitionPath = record.getUTF8String(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(
+          HoodieRecord.PARTITION_PATH_METADATA_FIELD)).toString();
+      String seqId = HoodieRecord.generateSequenceId(instantTime, taskPartitionId, SEQGEN.getAndIncrement());
+      String recordKey = record.getUTF8String(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(
+          HoodieRecord.RECORD_KEY_METADATA_FIELD)).toString();
+      HoodieInternalRow internalRow = new HoodieInternalRow(instantTime, seqId, recordKey, partitionPath, path.getName(),
+          record);
+      try {
+        fileWriter.writeRow(recordKey, internalRow);
+        writeStatus.markSuccess(recordKey);
+      } catch (Throwable t) {
+        writeStatus.markFailure(recordKey, t);
+      }
+    } catch (Throwable ge) {
+      writeStatus.setGlobalError(ge);
+      throw ge;
+    }
+  }
+
+  /**
+   * @returns {@code true} if this handle can take in more writes. else {@code false}.
+   */
+  public boolean canWrite() {
+    return fileWriter.canWrite();
+  }
+
+  /**
+   * Closes the {@link HoodieRowCreateHandle} and returns an instance of {@link HoodieInternalWriteStatus} containing the stats and
+   * status of the writes to this handle.
+   * @return the {@link HoodieInternalWriteStatus} containing the stats and status of the writes to this handle.
+   * @throws IOException
+   */
+  public HoodieInternalWriteStatus close() throws IOException {
+    fileWriter.close();
+    HoodieWriteStat stat = new HoodieWriteStat();
+    stat.setPartitionPath(partitionPath);
+    stat.setNumWrites(writeStatus.getTotalRecords());
+    stat.setNumDeletes(0);
+    stat.setNumInserts(writeStatus.getTotalRecords());
+    stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
+    stat.setFileId(fileId);
+    stat.setPath(new Path(writeConfig.getBasePath()), path);
+    long fileSizeInBytes = FSUtils.getFileSize(table.getMetaClient().getFs(), path);
+    stat.setTotalWriteBytes(fileSizeInBytes);
+    stat.setFileSizeInBytes(fileSizeInBytes);
+    stat.setTotalWriteErrors(writeStatus.getFailedRowsSize());
+    HoodieWriteStat.RuntimeStats runtimeStats = new HoodieWriteStat.RuntimeStats();
+    runtimeStats.setTotalCreateTime(currTimer.endTimer());
+    stat.setRuntimeStats(runtimeStats);
+    writeStatus.setStat(stat);
+    return writeStatus;
+  }
+
+  public String getFileName() {
+    return path.getName();
+  }
+
+  private Path makeNewPath(String partitionPath) {
+    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
+    try {
+      fs.mkdirs(path); // create a new partition as needed.
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to make dir " + path, e);
+    }
+    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
+    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId,
+        tableConfig.getBaseFileFormat().getFileExtension()));
+  }
+
+  /**
+   * Creates an empty marker file corresponding to storage writer path.
+   *
+   * @param partitionPath Partition path
+   */
+  private void createMarkerFile(String partitionPath, String dataFileName) {
+    MarkerFiles markerFiles = new MarkerFiles(table, instantTime);
+    markerFiles.create(partitionPath, dataFileName, IOType.CREATE);
+  }
+
+  private String getWriteToken() {
+    return taskPartitionId + "-" + taskId + "-" + taskEpochId;
+  }
+
+  private HoodieInternalRowFileWriter createNewFileWriter(
+      Path path, HoodieTable hoodieTable, HoodieWriteConfig config, StructType schema)
+      throws IOException {
+    return HoodieInternalRowFileWriterFactory.getInternalRowFileWriter(
+        path, hoodieTable, config, schema);
+  }
+}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
similarity index 50%
copy from hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
copy to hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
index c0d027e..f934a8a 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
@@ -16,25 +16,21 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.keygen;
+package org.apache.hudi.io.storage;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.avro.HoodieAvroWriteSupport;
 
-public class TestKeyGeneratorUtilities {
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
-  public String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
-      + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
-      + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
-      + "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
+/**
+ * ParquetConfig for writing avro records in Parquet files.
+ */
+public class HoodieAvroParquetConfig extends HoodieBaseParquetConfig<HoodieAvroWriteSupport> {
 
-  public GenericRecord getRecord() {
-    GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
-    record.put("timestamp", 4357686);
-    record.put("_row_key", "key1");
-    record.put("ts_ms", "2020-03-21");
-    record.put("pii_col", "pi");
-    return record;
+  public HoodieAvroParquetConfig(HoodieAvroWriteSupport writeSupport, CompressionCodecName compressionCodecName,
+                                 int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf,
+                                 double compressionRatio) {
+    super(writeSupport, compressionCodecName, blockSize, pageSize, maxFileSize, hadoopConf, compressionRatio);
   }
 }
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
similarity index 81%
rename from hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
rename to hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
index ca63ee2..6e6f66c 100644
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
@@ -18,14 +18,15 @@
 
 package org.apache.hudi.io.storage;
 
-import org.apache.hudi.avro.HoodieAvroWriteSupport;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
-public class HoodieParquetConfig {
-
-  private HoodieAvroWriteSupport writeSupport;
+/**
+ * Base ParquetConfig to hold config params for writing to Parquet.
+ * @param <T>
+ */
+public class HoodieBaseParquetConfig<T> {
+  private final T writeSupport;
   private CompressionCodecName compressionCodecName;
   private int blockSize;
   private int pageSize;
@@ -33,8 +34,8 @@ public class HoodieParquetConfig {
   private Configuration hadoopConf;
   private double compressionRatio;
 
-  public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport, CompressionCodecName compressionCodecName,
-      int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio) {
+  public HoodieBaseParquetConfig(T writeSupport, CompressionCodecName compressionCodecName, int blockSize,
+                                 int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio) {
     this.writeSupport = writeSupport;
     this.compressionCodecName = compressionCodecName;
     this.blockSize = blockSize;
@@ -44,10 +45,6 @@ public class HoodieParquetConfig {
     this.compressionRatio = compressionRatio;
   }
 
-  public HoodieAvroWriteSupport getWriteSupport() {
-    return writeSupport;
-  }
-
   public CompressionCodecName getCompressionCodecName() {
     return compressionCodecName;
   }
@@ -71,4 +68,8 @@ public class HoodieParquetConfig {
   public double getCompressionRatio() {
     return compressionRatio;
   }
+
+  public T getWriteSupport() {
+    return writeSupport;
+  }
 }
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
index 90566fb..0fab31e 100644
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
@@ -58,7 +58,7 @@ public class HoodieFileWriterFactory {
     HoodieAvroWriteSupport writeSupport =
         new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
 
-    HoodieParquetConfig parquetConfig = new HoodieParquetConfig(writeSupport, config.getParquetCompressionCodec(),
+    HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(),
         config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
         hoodieTable.getHadoopConf(), config.getParquetCompressionRatio());
 
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriter.java
similarity index 50%
copy from hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
copy to hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriter.java
index c0d027e..6ab80b6 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriter.java
@@ -16,25 +16,31 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.keygen;
+package org.apache.hudi.io.storage;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.catalyst.InternalRow;
 
-public class TestKeyGeneratorUtilities {
+import java.io.IOException;
 
-  public String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
-      + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
-      + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
-      + "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
+/**
+ * Abstraction to assist in writing {@link InternalRow}s to be used in datasource implementation.
+ */
+public interface HoodieInternalRowFileWriter {
+
+  /**
+   * @returns {@code true} if this RowFileWriter can take in more writes. else {@code false}.
+   */
+  boolean canWrite();
+
+  /**
+   * Writes an {@link InternalRow} to the HoodieInternalRowFileWriter.
+   *
+   * @throws IOException on any exception while writing.
+   */
+  void writeRow(String key, InternalRow row) throws IOException;
 
-  public GenericRecord getRecord() {
-    GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
-    record.put("timestamp", 4357686);
-    record.put("_row_key", "key1");
-    record.put("ts_ms", "2020-03-21");
-    record.put("pii_col", "pi");
-    return record;
-  }
+  /**
+   * Closes the {@link HoodieInternalRowFileWriter} and may not take in any more writes.
+   */
+  void close() throws IOException;
 }
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriterFactory.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriterFactory.java
new file mode 100644
index 0000000..cb238bb
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowFileWriterFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.BloomFilterFactory;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
+
+import java.io.IOException;
+
+/**
+ * Factory to assist in instantiating a new {@link HoodieInternalRowFileWriter}.
+ */
+public class HoodieInternalRowFileWriterFactory {
+
+  /**
+   * Factory method to assist in instantiating an instance of {@link HoodieInternalRowFileWriter}.
+   * @param path path of the RowFileWriter.
+   * @param hoodieTable instance of {@link HoodieTable} in use.
+   * @param config instance of {@link HoodieWriteConfig} to use.
+   * @param schema schema of the dataset in use.
+   * @return the instantiated {@link HoodieInternalRowFileWriter}.
+   * @throws IOException if format is not supported or if any exception during instantiating the RowFileWriter.
+   *
+   */
+  public static HoodieInternalRowFileWriter getInternalRowFileWriter(
+      Path path, HoodieTable hoodieTable, HoodieWriteConfig config, StructType schema)
+      throws IOException {
+    final String extension = FSUtils.getFileExtension(path.getName());
+    if (PARQUET.getFileExtension().equals(extension)) {
+      return newParquetInternalRowFileWriter(path, config, schema, hoodieTable);
+    }
+    throw new UnsupportedOperationException(extension + " format not supported yet.");
+  }
+
+  private static HoodieInternalRowFileWriter newParquetInternalRowFileWriter(
+      Path path, HoodieWriteConfig writeConfig, StructType structType, HoodieTable table)
+      throws IOException {
+    BloomFilter filter = BloomFilterFactory.createBloomFilter(
+            writeConfig.getBloomFilterNumEntries(),
+            writeConfig.getBloomFilterFPP(),
+            writeConfig.getDynamicBloomFilterMaxNumEntries(),
+            writeConfig.getBloomFilterType());
+    HoodieRowParquetWriteSupport writeSupport =
+            new HoodieRowParquetWriteSupport(table.getHadoopConf(), structType, filter);
+    return new HoodieInternalRowParquetWriter(
+        path, new HoodieRowParquetConfig(
+            writeSupport,
+            writeConfig.getParquetCompressionCodec(),
+            writeConfig.getParquetBlockSize(),
+            writeConfig.getParquetPageSize(),
+            writeConfig.getParquetMaxFileSize(),
+            writeSupport.getHadoopConf(),
+            writeConfig.getParquetCompressionRatio()));
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowParquetWriter.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowParquetWriter.java
new file mode 100644
index 0000000..7d05163
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieInternalRowParquetWriter.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+
+import org.apache.parquet.hadoop.ParquetFileWriter;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.spark.sql.catalyst.InternalRow;
+
+import java.io.IOException;
+
+/**
+ * Parquet's impl of {@link HoodieInternalRowFileWriter} to write {@link InternalRow}s.
+ */
+public class HoodieInternalRowParquetWriter extends ParquetWriter<InternalRow>
+    implements HoodieInternalRowFileWriter {
+
+  private final Path file;
+  private final HoodieWrapperFileSystem fs;
+  private final long maxFileSize;
+  private final HoodieRowParquetWriteSupport writeSupport;
+
+  public HoodieInternalRowParquetWriter(Path file, HoodieRowParquetConfig parquetConfig)
+      throws IOException {
+    super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
+        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
+        parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(),
+        ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
+            ParquetWriter.DEFAULT_WRITER_VERSION, FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf()));
+    this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
+    this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(FSUtils.registerFileSystem(file,
+            parquetConfig.getHadoopConf()));
+    this.maxFileSize = parquetConfig.getMaxFileSize()
+            + Math.round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
+    this.writeSupport = parquetConfig.getWriteSupport();
+  }
+
+  @Override
+  public boolean canWrite() {
+    return fs.getBytesWritten(file) < maxFileSize;
+  }
+
+  @Override
+  public void writeRow(String key, InternalRow row) throws IOException {
+    super.write(row);
+    writeSupport.add(key);
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
index 8c4c7e6..5c8f99f 100644
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
@@ -52,7 +52,7 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
   private final String instantTime;
   private final SparkTaskContextSupplier sparkTaskContextSupplier;
 
-  public HoodieParquetWriter(String instantTime, Path file, HoodieParquetConfig parquetConfig,
+  public HoodieParquetWriter(String instantTime, Path file, HoodieAvroParquetConfig parquetConfig,
       Schema schema, SparkTaskContextSupplier sparkTaskContextSupplier) throws IOException {
     super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
         ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetConfig.java
similarity index 50%
copy from hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
copy to hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetConfig.java
index c0d027e..d993005 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetConfig.java
@@ -16,25 +16,19 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.keygen;
+package org.apache.hudi.io.storage;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
-public class TestKeyGeneratorUtilities {
-
-  public String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
-      + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
-      + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
-      + "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
+/**
+ * ParquetConfig for datasource implementation with {@link org.apache.hudi.client.model.HoodieInternalRow}.
+ */
+public class HoodieRowParquetConfig extends HoodieBaseParquetConfig<HoodieRowParquetWriteSupport> {
 
-  public GenericRecord getRecord() {
-    GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
-    record.put("timestamp", 4357686);
-    record.put("_row_key", "key1");
-    record.put("ts_ms", "2020-03-21");
-    record.put("pii_col", "pi");
-    return record;
+  public HoodieRowParquetConfig(HoodieRowParquetWriteSupport writeSupport, CompressionCodecName compressionCodecName,
+                                int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf,
+                                double compressionRatio) {
+    super(writeSupport, compressionCodecName, blockSize, pageSize, maxFileSize, hadoopConf, compressionRatio);
   }
 }
diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetWriteSupport.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetWriteSupport.java
new file mode 100644
index 0000000..f6cef20
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieRowParquetWriteSupport.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.HashMap;
+
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER;
+import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER;
+
+/**
+ * Hoodie Write Support for directly writing Row to Parquet.
+ */
+public class HoodieRowParquetWriteSupport extends ParquetWriteSupport {
+
+  private Configuration hadoopConf;
+  private BloomFilter bloomFilter;
+  private String minRecordKey;
+  private String maxRecordKey;
+
+  public HoodieRowParquetWriteSupport(Configuration conf, StructType structType, BloomFilter bloomFilter) {
+    super();
+    Configuration hadoopConf = new Configuration(conf);
+    hadoopConf.set("spark.sql.parquet.writeLegacyFormat", "false");
+    hadoopConf.set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MILLIS");
+    this.hadoopConf = hadoopConf;
+    setSchema(structType, hadoopConf);
+    this.bloomFilter = bloomFilter;
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
+
+  @Override
+  public WriteSupport.FinalizedWriteContext finalizeWrite() {
+    HashMap<String, String> extraMetaData = new HashMap<>();
+    if (bloomFilter != null) {
+      extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
+      if (minRecordKey != null && maxRecordKey != null) {
+        extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
+        extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
+      }
+      if (bloomFilter.getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) {
+        extraMetaData.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilter.getBloomFilterTypeCode().name());
+      }
+    }
+    return new WriteSupport.FinalizedWriteContext(extraMetaData);
+  }
+
+  public void add(String recordKey) {
+    this.bloomFilter.add(recordKey);
+    if (minRecordKey != null) {
+      minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
+    } else {
+      minRecordKey = recordKey;
+    }
+
+    if (maxRecordKey != null) {
+      maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
+    } else {
+      maxRecordKey = recordKey;
+    }
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java b/hudi-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
deleted file mode 100644
index 7c3edf7..0000000
--- a/hudi-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.keygen;
-
-import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.exception.HoodieKeyException;
-
-import org.apache.avro.generic.GenericRecord;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
-/**
- * Base class for all the built-in key generators. Contains methods structured for
- * code reuse amongst them.
- */
-public abstract class BuiltinKeyGenerator extends KeyGenerator {
-
-  protected BuiltinKeyGenerator(TypedProperties config) {
-    super(config);
-  }
-
-  /**
-   * Generate a record Key out of provided generic record.
-   */
-  public abstract String getRecordKey(GenericRecord record);
-
-  /**
-   * Generate a partition path out of provided generic record.
-   */
-  public abstract String getPartitionPath(GenericRecord record);
-
-  /**
-   * Generate a Hoodie Key out of provided generic record.
-   */
-  public final HoodieKey getKey(GenericRecord record) {
-    if (getRecordKeyFields() == null || getPartitionPathFields() == null) {
-      throw new HoodieKeyException("Unable to find field names for record key or partition path in cfg");
-    }
-    return new HoodieKey(getRecordKey(record), getPartitionPath(record));
-  }
-
-  /**
-   * Return fields that constitute record key. Used by Metadata bootstrap.
-   * Have a base implementation inorder to prevent forcing custom KeyGenerator implementation
-   * to implement this method
-   * @return list of record key fields
-   */
-  public List<String> getRecordKeyFields() {
-    throw new IllegalStateException("This method is expected to be overridden by subclasses");
-  }
-
-  /**
-   * Return fields that constiture partition path. Used by Metadata bootstrap.
-   * Have a base implementation inorder to prevent forcing custom KeyGenerator implementation
-   * to implement this method
-   * @return list of partition path fields
-   */
-  public List<String> getPartitionPathFields() {
-    throw new IllegalStateException("This method is expected to be overridden by subclasses");
-  }
-
-  @Override
-  public final List<String> getRecordKeyFieldNames() {
-    // For nested columns, pick top level column name
-    return getRecordKeyFields().stream().map(k -> {
-      int idx = k.indexOf('.');
-      return idx > 0 ? k.substring(0, idx) : k;
-    }).collect(Collectors.toList());
-  }
-}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java b/hudi-client/src/main/java/org/apache/hudi/keygen/KeyGeneratorInterface.java
similarity index 55%
copy from hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
copy to hudi-client/src/main/java/org/apache/hudi/keygen/KeyGeneratorInterface.java
index c0d027e..6412a2f 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
+++ b/hudi-client/src/main/java/org/apache/hudi/keygen/KeyGeneratorInterface.java
@@ -18,23 +18,24 @@
 
 package org.apache.hudi.keygen;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.spark.sql.Row;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Represents the interface key generators need to adhere to.
+ */
+public interface KeyGeneratorInterface extends Serializable {
+
+  HoodieKey getKey(GenericRecord record);
+
+  List<String> getRecordKeyFieldNames();
+
+  String getRecordKey(Row row);
+
+  String getPartitionPath(Row row);
 
-public class TestKeyGeneratorUtilities {
-
-  public String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
-      + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
-      + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
-      + "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
-
-  public GenericRecord getRecord() {
-    GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
-    record.put("timestamp", 4357686);
-    record.put("_row_key", "key1");
-    record.put("ts_ms", "2020-03-21");
-    record.put("pii_col", "pi");
-    return record;
-  }
 }
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
index 3b7c501..47791d9 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
@@ -53,7 +53,7 @@ import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
 import org.apache.hudi.io.HoodieBootstrapHandle;
-import org.apache.hudi.keygen.KeyGenerator;
+import org.apache.hudi.keygen.KeyGeneratorInterface;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.WorkloadProfile;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -225,7 +225,7 @@ public class BootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>>
   }
 
   private BootstrapWriteStatus handleMetadataBootstrap(String srcPartitionPath, String partitionPath,
-                                                       HoodieFileStatus srcFileStatus, KeyGenerator keyGenerator) {
+                                                       HoodieFileStatus srcFileStatus, KeyGeneratorInterface keyGenerator) {
 
     Path sourceFilePath = FileStatusUtils.toPath(srcFileStatus.getPath());
     HoodieBootstrapHandle bootstrapHandle = new HoodieBootstrapHandle(config, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS,
@@ -311,7 +311,7 @@ public class BootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>>
 
     TypedProperties properties = new TypedProperties();
     properties.putAll(config.getProps());
-    KeyGenerator keyGenerator  = (KeyGenerator) ReflectionUtils.loadClass(config.getBootstrapKeyGeneratorClass(),
+    KeyGeneratorInterface keyGenerator  = (KeyGeneratorInterface) ReflectionUtils.loadClass(config.getBootstrapKeyGeneratorClass(),
         properties);
     BootstrapPartitionPathTranslator translator = (BootstrapPartitionPathTranslator) ReflectionUtils.loadClass(
         config.getBootstrapPartitionPathTranslatorClass(), properties);
diff --git a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieInternalWriteStatus.java b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieInternalWriteStatus.java
new file mode 100644
index 0000000..3f69c65
--- /dev/null
+++ b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieInternalWriteStatus.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests {@link HoodieInternalWriteStatus}.
+ */
+public class TestHoodieInternalWriteStatus {
+
+  @Test
+  public void testFailureFraction() {
+    HoodieInternalWriteStatus status = new HoodieInternalWriteStatus(true, 0.1);
+    String fileId = UUID.randomUUID().toString();
+    String partitionPath = UUID.randomUUID().toString();
+    status.setFileId(fileId);
+    status.setPartitionPath(partitionPath);
+    Throwable t = new Exception("some error in writing");
+    for (int i = 0; i < 1000; i++) {
+      status.markFailure(UUID.randomUUID().toString(), t);
+    }
+    // verification
+    assertEquals(fileId, status.getFileId());
+    assertEquals(partitionPath, status.getPartitionPath());
+    assertTrue(status.getFailedRecordKeys().size() > 0);
+    assertTrue(status.getFailedRecordKeys().size() < 150); // 150 instead of 100, to prevent flaky test
+    assertTrue(status.hasErrors());
+  }
+
+  @Test
+  public void testSuccessRecordTracking() {
+    boolean[] vals = {true, false};
+    for (boolean trackSuccess : vals) {
+      HoodieInternalWriteStatus status = new HoodieInternalWriteStatus(trackSuccess, 1.0);
+      String fileId = UUID.randomUUID().toString();
+      status.setFileId(fileId);
+      String partitionPath = UUID.randomUUID().toString();
+      status.setPartitionPath(partitionPath);
+      Throwable t = new Exception("some error in writing");
+      for (int i = 0; i < 1000; i++) {
+        status.markSuccess(UUID.randomUUID().toString());
+        status.markFailure(UUID.randomUUID().toString(), t);
+      }
+      // verification
+      assertEquals(fileId, status.getFileId());
+      assertEquals(partitionPath, status.getPartitionPath());
+      assertEquals(1000, status.getFailedRecordKeys().size());
+      assertTrue(status.hasErrors());
+      if (trackSuccess) {
+        assertEquals(1000, status.getSuccessRecordKeys().size());
+      } else {
+        assertTrue(status.getSuccessRecordKeys().isEmpty());
+      }
+      assertEquals(2000, status.getTotalRecords());
+    }
+  }
+
+  @Test
+  public void testGlobalError() {
+    HoodieInternalWriteStatus status = new HoodieInternalWriteStatus(true, 0.1);
+    Throwable t = new Exception("some error in writing");
+    status.setGlobalError(t);
+    assertEquals(t, status.getGlobalError());
+  }
+}
diff --git a/hudi-client/src/test/java/org/apache/hudi/client/model/TestHoodieInternalRow.java b/hudi-client/src/test/java/org/apache/hudi/client/model/TestHoodieInternalRow.java
new file mode 100644
index 0000000..bfcb012
--- /dev/null
+++ b/hudi-client/src/test/java/org/apache/hudi/client/model/TestHoodieInternalRow.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.model;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests {@link HoodieInternalRow}.
+ */
+public class TestHoodieInternalRow {
+
+  private static final Random RANDOM = new Random();
+  private static final int INTEGER_INDEX = 5;
+  private static final int STRING_INDEX = 6;
+  private static final int BOOLEAN_INDEX = 7;
+  private static final int SHORT_INDEX = 8;
+  private static final int BYTE_INDEX = 9;
+  private static final int LONG_INDEX = 10;
+  private static final int FLOAT_INDEX = 11;
+  private static final int DOUBLE_INDEX = 12;
+  private static final int DECIMAL_INDEX = 13;
+  private static final int BINARY_INDEX = 14;
+  private static final int STRUCT_INDEX = 15;
+  // to do array and map
+  private static final int ARRAY_INDEX = 16;
+  private static final int MAP_INDEX = 17;
+
+  private List<Integer> nullIndices;
+
+  public TestHoodieInternalRow() {
+    this.nullIndices = new ArrayList<>();
+  }
+
+  @Test
+  public void testGet() {
+    Object[] values = getRandomValue(true);
+
+    InternalRow row = new GenericInternalRow(values);
+    HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
+
+    assertValues(hoodieInternalRow, "commitTime", "commitSeqNo", "recordKey", "partitionPath",
+        "fileName", values, nullIndices);
+  }
+
+  @Test
+  public void testUpdate() {
+    Object[] values = getRandomValue(true);
+    InternalRow row = new GenericInternalRow(values);
+    HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
+
+    hoodieInternalRow.update(0, "commitTime_updated");
+    hoodieInternalRow.update(1, "commitSeqNo_updated");
+    hoodieInternalRow.update(2, "recordKey_updated");
+    hoodieInternalRow.update(3, "partitionPath_updated");
+    hoodieInternalRow.update(4, "fileName_updated");
+
+    values = getRandomValue(true);
+    hoodieInternalRow.update(INTEGER_INDEX, values[INTEGER_INDEX]);
+    hoodieInternalRow.update(BOOLEAN_INDEX, values[BOOLEAN_INDEX]);
+    hoodieInternalRow.update(SHORT_INDEX, values[SHORT_INDEX]);
+    hoodieInternalRow.update(BYTE_INDEX, values[BYTE_INDEX]);
+    hoodieInternalRow.update(LONG_INDEX, values[LONG_INDEX]);
+    hoodieInternalRow.update(FLOAT_INDEX, values[FLOAT_INDEX]);
+    hoodieInternalRow.update(DOUBLE_INDEX, values[DOUBLE_INDEX]);
+    //hoodieInternalRow.update(decimalIndex, values[decimalIndex]);
+    hoodieInternalRow.update(BINARY_INDEX, values[BINARY_INDEX]);
+    hoodieInternalRow.update(STRUCT_INDEX, values[STRUCT_INDEX]);
+    hoodieInternalRow.update(STRING_INDEX, values[STRING_INDEX].toString());
+
+    assertValues(hoodieInternalRow, "commitTime_updated", "commitSeqNo_updated", "recordKey_updated", "partitionPath_updated",
+        "fileName_updated", values, nullIndices);
+  }
+
+  @Test
+  public void testIsNullCheck() {
+
+    for (int i = 0; i < 16; i++) {
+      Object[] values = getRandomValue(true);
+
+      InternalRow row = new GenericInternalRow(values);
+      HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
+
+      hoodieInternalRow.setNullAt(i);
+      nullIndices.clear();
+      nullIndices.add(i);
+      assertValues(hoodieInternalRow, "commitTime", "commitSeqNo", "recordKey", "partitionPath",
+          "fileName", values, nullIndices);
+    }
+
+    // try setting multiple values as null
+    // run it for 5 rounds
+    for (int i = 0; i < 5; i++) {
+      int numNullValues = 1 + RANDOM.nextInt(4);
+      List<Integer> nullsSoFar = new ArrayList<>();
+      while (nullsSoFar.size() < numNullValues) {
+        int randomIndex = RANDOM.nextInt(16);
+        if (!nullsSoFar.contains(randomIndex)) {
+          nullsSoFar.add(randomIndex);
+        }
+      }
+
+      Object[] values = getRandomValue(true);
+      InternalRow row = new GenericInternalRow(values);
+      HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
+
+      nullIndices.clear();
+
+      for (Integer index : nullsSoFar) {
+        hoodieInternalRow.setNullAt(index);
+        nullIndices.add(index);
+      }
+      assertValues(hoodieInternalRow, "commitTime", "commitSeqNo", "recordKey", "partitionPath",
+          "fileName", values, nullIndices);
+    }
+  }
+
+  /**
+   * Fetches a random Object[] of values for testing.
+   *
+   * @param withStructType true if structType need to be added as one of the elements in the Object[]
+   * @return the random Object[] thus generated
+   */
+  private Object[] getRandomValue(boolean withStructType) {
+    Object[] values = new Object[16];
+    values[INTEGER_INDEX] = RANDOM.nextInt();
+    values[STRING_INDEX] = UUID.randomUUID().toString();
+    values[BOOLEAN_INDEX] = RANDOM.nextBoolean();
+    values[SHORT_INDEX] = (short) RANDOM.nextInt(2);
+    byte[] bytes = new byte[1];
+    RANDOM.nextBytes(bytes);
+    values[BYTE_INDEX] = bytes[0];
+    values[LONG_INDEX] = RANDOM.nextLong();
+    values[FLOAT_INDEX] = RANDOM.nextFloat();
+    values[DOUBLE_INDEX] = RANDOM.nextDouble();
+    // TODO fix decimal type.
+    values[DECIMAL_INDEX] = RANDOM.nextFloat();
+    bytes = new byte[20];
+    RANDOM.nextBytes(bytes);
+    values[BINARY_INDEX] = bytes;
+    if (withStructType) {
+      Object[] structField = getRandomValue(false);
+      values[STRUCT_INDEX] = new GenericInternalRow(structField);
+    }
+    return values;
+  }
+
+  private void assertValues(HoodieInternalRow hoodieInternalRow, String commitTime, String commitSeqNo, String recordKey, String partitionPath, String filename, Object[] values,
+      List<Integer> nullIndexes) {
+    for (Integer index : nullIndexes) {
+      assertTrue(hoodieInternalRow.isNullAt(index));
+    }
+    for (int i = 0; i < 16; i++) {
+      if (!nullIndexes.contains(i)) {
+        assertFalse(hoodieInternalRow.isNullAt(i));
+      }
+    }
+    if (!nullIndexes.contains(0)) {
+      assertEquals(commitTime, hoodieInternalRow.get(0, DataTypes.StringType).toString());
+    }
+    if (!nullIndexes.contains(1)) {
+      assertEquals(commitSeqNo, hoodieInternalRow.get(1, DataTypes.StringType).toString());
+    }
+    if (!nullIndexes.contains(2)) {
+      assertEquals(recordKey, hoodieInternalRow.get(2, DataTypes.StringType).toString());
+    }
+    if (!nullIndexes.contains(3)) {
+      assertEquals(partitionPath, hoodieInternalRow.get(3, DataTypes.StringType).toString());
+    }
+    if (!nullIndexes.contains(4)) {
+      assertEquals(filename, hoodieInternalRow.get(4, DataTypes.StringType).toString());
+    }
+    if (!nullIndexes.contains(INTEGER_INDEX)) {
+      assertEquals(values[INTEGER_INDEX], hoodieInternalRow.getInt(INTEGER_INDEX));
+      assertEquals(values[INTEGER_INDEX], hoodieInternalRow.get(INTEGER_INDEX, DataTypes.IntegerType));
+    }
+    if (!nullIndexes.contains(STRING_INDEX)) {
+      assertEquals(values[STRING_INDEX].toString(), hoodieInternalRow.get(STRING_INDEX, DataTypes.StringType));
+    }
+    if (!nullIndexes.contains(BOOLEAN_INDEX)) {
+      assertEquals(values[BOOLEAN_INDEX], hoodieInternalRow.getBoolean(BOOLEAN_INDEX));
+      assertEquals(values[BOOLEAN_INDEX], hoodieInternalRow.get(BOOLEAN_INDEX, DataTypes.BooleanType));
+    }
+    if (!nullIndexes.contains(SHORT_INDEX)) {
+      assertEquals(values[SHORT_INDEX], hoodieInternalRow.getShort(SHORT_INDEX));
+      assertEquals(values[SHORT_INDEX], hoodieInternalRow.get(SHORT_INDEX, DataTypes.ShortType));
+    }
+    if (!nullIndexes.contains(BYTE_INDEX)) {
+      assertEquals(values[BYTE_INDEX], hoodieInternalRow.getByte(BYTE_INDEX));
+      assertEquals(values[BYTE_INDEX], hoodieInternalRow.get(BYTE_INDEX, DataTypes.ByteType));
+    }
+    if (!nullIndexes.contains(LONG_INDEX)) {
+      assertEquals(values[LONG_INDEX], hoodieInternalRow.getLong(LONG_INDEX));
+      assertEquals(values[LONG_INDEX], hoodieInternalRow.get(LONG_INDEX, DataTypes.LongType));
+    }
+    if (!nullIndexes.contains(FLOAT_INDEX)) {
+      assertEquals(values[FLOAT_INDEX], hoodieInternalRow.getFloat(FLOAT_INDEX));
+      assertEquals(values[FLOAT_INDEX], hoodieInternalRow.get(FLOAT_INDEX, DataTypes.FloatType));
+    }
+    if (!nullIndexes.contains(DOUBLE_INDEX)) {
+      assertEquals(values[DOUBLE_INDEX], hoodieInternalRow.getDouble(DOUBLE_INDEX));
+      assertEquals(values[DOUBLE_INDEX], hoodieInternalRow.get(DOUBLE_INDEX, DataTypes.DoubleType));
+    }
+    if (!nullIndexes.contains(BINARY_INDEX)) {
+      assertEquals(values[BINARY_INDEX], hoodieInternalRow.getBinary(BINARY_INDEX));
+      assertEquals(values[BINARY_INDEX], hoodieInternalRow.get(BINARY_INDEX, DataTypes.BinaryType));
+    }
+    if (!nullIndexes.contains(STRUCT_INDEX)) {
+      assertEquals(values[STRUCT_INDEX], hoodieInternalRow.getStruct(STRUCT_INDEX, 18));
+    }
+  }
+}
diff --git a/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieRowCreateHandle.java b/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieRowCreateHandle.java
new file mode 100644
index 0000000..93a0253
--- /dev/null
+++ b/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieRowCreateHandle.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.testutils.HoodieClientTestHarness;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getConfigBuilder;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getInternalRowWithError;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * Unit tests {@link HoodieRowCreateHandle}.
+ */
+public class TestHoodieRowCreateHandle extends HoodieClientTestHarness {
+
+  private static final Random RANDOM = new Random();
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initSparkContexts("TestHoodieRowCreateHandle");
+    initPath();
+    initFileSystem();
+    initTestDataGenerator();
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanupResources();
+  }
+
+  @Test
+  public void testRowCreateHandle() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    HoodieTable table = HoodieTable.create(metaClient, cfg, hadoopConf);
+    List<String> fileNames = new ArrayList<>();
+    List<String> fileAbsPaths = new ArrayList<>();
+
+    Dataset<Row> totalInputRows = null;
+    // one round per partition
+    for (int i = 0; i < 5; i++) {
+      String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
+
+      // init some args
+      String fileId = UUID.randomUUID().toString();
+      String instantTime = "000";
+
+      HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), STRUCT_TYPE);
+      int size = 10 + RANDOM.nextInt(1000);
+      // Generate inputs
+      Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+      if (totalInputRows == null) {
+        totalInputRows = inputRows;
+      } else {
+        totalInputRows = totalInputRows.union(inputRows);
+      }
+
+      // issue writes
+      HoodieInternalWriteStatus writeStatus = writeAndGetWriteStatus(inputRows, handle);
+
+      fileAbsPaths.add(basePath + "/" + writeStatus.getStat().getPath());
+      fileNames.add(handle.getFileName());
+      // verify output
+      assertOutput(writeStatus, size, fileId, partitionPath, instantTime, totalInputRows, fileNames, fileAbsPaths);
+    }
+  }
+
+  /**
+   * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch 2 of invalid records Global Error
+   * should be thrown.
+   */
+  @Test
+  public void testGlobalFailure() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    HoodieTable table = HoodieTable.create(metaClient, cfg, hadoopConf);
+    String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
+
+    // init some args
+    String fileId = UUID.randomUUID().toString();
+    String instantTime = "000";
+
+    HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), STRUCT_TYPE);
+    int size = 10 + RANDOM.nextInt(1000);
+    int totalFailures = 5;
+    // Generate first batch of valid rows
+    Dataset<Row> inputRows = getRandomRows(sqlContext, size / 2, partitionPath, false);
+    List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+
+    // generate some failures rows
+    for (int i = 0; i < totalFailures; i++) {
+      internalRows.add(getInternalRowWithError(partitionPath));
+    }
+
+    // generate 2nd batch of valid rows
+    Dataset<Row> inputRows2 = getRandomRows(sqlContext, size / 2, partitionPath, false);
+    internalRows.addAll(toInternalRows(inputRows2, ENCODER));
+
+    // issue writes
+    try {
+      for (InternalRow internalRow : internalRows) {
+        handle.write(internalRow);
+      }
+      fail("Should have failed");
+    } catch (Throwable e) {
+      // expected
+    }
+    // close the create handle
+    HoodieInternalWriteStatus writeStatus = handle.close();
+
+    List<String> fileNames = new ArrayList<>();
+    fileNames.add(handle.getFileName());
+    // verify write status
+    assertNotNull(writeStatus.getGlobalError());
+    assertTrue(writeStatus.getGlobalError().getMessage().contains("java.lang.String cannot be cast to org.apache.spark.unsafe.types.UTF8String"));
+    assertEquals(writeStatus.getFileId(), fileId);
+    assertEquals(writeStatus.getPartitionPath(), partitionPath);
+
+    // verify rows
+    Dataset<Row> result = sqlContext.read().parquet(basePath + "/" + partitionPath);
+    // passing only first batch of inputRows since after first batch global error would have been thrown
+    assertRows(inputRows, result, instantTime, fileNames);
+  }
+
+  @Test
+  public void testInstantiationFailure() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).withPath("/dummypath/abc/").build();
+    HoodieTable table = HoodieTable.create(metaClient, cfg, hadoopConf);
+
+    try {
+      new HoodieRowCreateHandle(table, cfg, " def", UUID.randomUUID().toString(), "001", RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), STRUCT_TYPE);
+      fail("Should have thrown exception");
+    } catch (HoodieInsertException ioe) {
+      // expected
+    }
+  }
+
+  private HoodieInternalWriteStatus writeAndGetWriteStatus(Dataset<Row> inputRows, HoodieRowCreateHandle handle) throws IOException {
+    List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+    // issue writes
+    for (InternalRow internalRow : internalRows) {
+      handle.write(internalRow);
+    }
+    // close the create handle
+    return handle.close();
+  }
+
+  private void assertOutput(HoodieInternalWriteStatus writeStatus, int size, String fileId, String partitionPath, String instantTime, Dataset<Row> inputRows, List<String> filenames,
+      List<String> fileAbsPaths) {
+    assertEquals(writeStatus.getPartitionPath(), partitionPath);
+    assertEquals(writeStatus.getTotalRecords(), size);
+    assertEquals(writeStatus.getFailedRowsSize(), 0);
+    assertEquals(writeStatus.getTotalErrorRecords(), 0);
+    assertFalse(writeStatus.hasErrors());
+    assertNull(writeStatus.getGlobalError());
+    assertEquals(writeStatus.getFileId(), fileId);
+    HoodieWriteStat writeStat = writeStatus.getStat();
+    assertEquals(size, writeStat.getNumInserts());
+    assertEquals(size, writeStat.getNumWrites());
+    assertEquals(fileId, writeStat.getFileId());
+    assertEquals(partitionPath, writeStat.getPartitionPath());
+    assertEquals(0, writeStat.getNumDeletes());
+    assertEquals(0, writeStat.getNumUpdateWrites());
+    assertEquals(0, writeStat.getTotalWriteErrors());
+
+    // verify rows
+    Dataset<Row> result = sqlContext.read().parquet(fileAbsPaths.toArray(new String[0]));
+    assertRows(inputRows, result, instantTime, filenames);
+  }
+
+  private void assertRows(Dataset<Row> expectedRows, Dataset<Row> actualRows, String instantTime, List<String> filenames) {
+    // verify 3 meta fields that are filled in within create handle
+    actualRows.collectAsList().forEach(entry -> {
+      assertEquals(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD)).toString(), instantTime);
+      assertTrue(filenames.contains(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD)).toString()));
+      assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD)));
+    });
+
+    // after trimming 2 of the meta fields, rest of the fields should match
+    Dataset<Row> trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    Dataset<Row> trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    assertEquals(0, trimmedActual.except(trimmedExpected).count());
+  }
+}
diff --git a/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieInternalRowParquetWriter.java b/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieInternalRowParquetWriter.java
new file mode 100644
index 0000000..bcb5aa6
--- /dev/null
+++ b/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieInternalRowParquetWriter.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.BloomFilterFactory;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.testutils.HoodieClientTestHarness;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getConfigBuilder;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Unit tests {@link HoodieInternalRowParquetWriter}.
+ */
+public class TestHoodieInternalRowParquetWriter extends HoodieClientTestHarness {
+
+  private static final Random RANDOM = new Random();
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initSparkContexts("TestHoodieInternalRowParquetWriter");
+    initPath();
+    initFileSystem();
+    initTestDataGenerator();
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanupResources();
+  }
+
+  @Test
+  public void endToEndTest() throws IOException {
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    for (int i = 0; i < 5; i++) {
+      // init write support and parquet config
+      HoodieRowParquetWriteSupport writeSupport = getWriteSupport(cfg, hadoopConf);
+      HoodieRowParquetConfig parquetConfig = new HoodieRowParquetConfig(writeSupport,
+          CompressionCodecName.SNAPPY, cfg.getParquetBlockSize(), cfg.getParquetPageSize(), cfg.getParquetMaxFileSize(),
+          writeSupport.getHadoopConf(), cfg.getParquetCompressionRatio());
+
+      // prepare path
+      String fileId = UUID.randomUUID().toString();
+      Path filePath = new Path(basePath + "/" + fileId);
+      String partitionPath = HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
+      metaClient.getFs().mkdirs(new Path(basePath));
+
+      // init writer
+      HoodieInternalRowParquetWriter writer = new HoodieInternalRowParquetWriter(filePath, parquetConfig);
+
+      // generate input
+      int size = 10 + RANDOM.nextInt(100);
+      // Generate inputs
+      Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+      List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+
+      // issue writes
+      for (InternalRow internalRow : internalRows) {
+        writer.write(internalRow);
+      }
+
+      // close the writer
+      writer.close();
+
+      // verify rows
+      Dataset<Row> result = sqlContext.read().parquet(basePath);
+      assertEquals(0, inputRows.except(result).count());
+    }
+  }
+
+  private HoodieRowParquetWriteSupport getWriteSupport(HoodieWriteConfig writeConfig, Configuration hadoopConf) {
+    BloomFilter filter = BloomFilterFactory.createBloomFilter(
+        writeConfig.getBloomFilterNumEntries(),
+        writeConfig.getBloomFilterFPP(),
+        writeConfig.getDynamicBloomFilterMaxNumEntries(),
+        writeConfig.getBloomFilterType());
+    return new HoodieRowParquetWriteSupport(hadoopConf, STRUCT_TYPE, filter);
+  }
+}
diff --git a/hudi-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 6db6529..4aaf585 100644
--- a/hudi-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -39,7 +39,7 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.io.IOType;
-import org.apache.hudi.io.storage.HoodieParquetConfig;
+import org.apache.hudi.io.storage.HoodieAvroParquetConfig;
 import org.apache.hudi.io.storage.HoodieParquetWriter;
 
 import org.apache.avro.Schema;
@@ -255,7 +255,7 @@ public class HoodieClientTestUtils {
     HoodieAvroWriteSupport writeSupport =
         new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
     String instantTime = FSUtils.getCommitTime(filename);
-    HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
+    HoodieAvroParquetConfig config = new HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP,
         ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
         HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO));
     HoodieParquetWriter writer =
diff --git a/hudi-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java b/hudi-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java
new file mode 100644
index 0000000..83b966a
--- /dev/null
+++ b/hudi-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.testutils;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieStorageConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$;
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
+import org.apache.spark.sql.catalyst.encoders.RowEncoder;
+import org.apache.spark.sql.catalyst.expressions.Attribute;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import scala.collection.JavaConversions;
+import scala.collection.JavaConverters;
+
+import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
+
+/**
+ * Dataset test utils.
+ */
+public class SparkDatasetTestUtils {
+
+  public static final StructType STRUCT_TYPE = new StructType(new StructField[] {
+      new StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.RECORD_KEY_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.FILENAME_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField("randomInt", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("randomLong", DataTypes.LongType, false, Metadata.empty())});
+
+  public static final StructType ERROR_STRUCT_TYPE = new StructType(new StructField[] {
+      new StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, DataTypes.LongType, false, Metadata.empty()),
+      new StructField(HoodieRecord.RECORD_KEY_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField(HoodieRecord.FILENAME_METADATA_FIELD, DataTypes.StringType, false, Metadata.empty()),
+      new StructField("randomInt", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("randomStr", DataTypes.StringType, false, Metadata.empty())});
+
+  public static final ExpressionEncoder ENCODER = getEncoder(STRUCT_TYPE);
+  public static final ExpressionEncoder ERROR_ENCODER = getEncoder(ERROR_STRUCT_TYPE);
+
+  /**
+   * Generate Encode for the passed in {@link StructType}.
+   *
+   * @param schema instance of {@link StructType} for which encoder is requested.
+   * @return the encoder thus generated.
+   */
+  private static ExpressionEncoder getEncoder(StructType schema) {
+    List<Attribute> attributes = JavaConversions.asJavaCollection(schema.toAttributes()).stream()
+        .map(Attribute::toAttribute).collect(Collectors.toList());
+    return RowEncoder.apply(schema)
+        .resolveAndBind(JavaConverters.asScalaBufferConverter(attributes).asScala().toSeq(),
+            SimpleAnalyzer$.MODULE$);
+  }
+
+  /**
+   * Generate random Rows.
+   *
+   * @param count total number of Rows to be generated.
+   * @param partitionPath partition path to be set
+   * @return the Dataset<Row>s thus generated.
+   */
+  public static Dataset<Row> getRandomRows(SQLContext sqlContext, int count, String partitionPath, boolean isError) {
+    List<Row> records = new ArrayList<>();
+    for (long recordNum = 0; recordNum < count; recordNum++) {
+      records.add(getRandomValue(partitionPath, isError));
+    }
+    return sqlContext.createDataFrame(records, isError ? ERROR_STRUCT_TYPE : STRUCT_TYPE);
+  }
+
+  /**
+   * Generate random Row.
+   *
+   * @param partitionPath partition path to be set in the Row.
+   * @return the Row thus generated.
+   */
+  public static Row getRandomValue(String partitionPath, boolean isError) {
+    // order commit time, seq no, record key, partition path, file name
+    Object[] values = new Object[7];
+    values[0] = ""; //commit time
+    if (!isError) {
+      values[1] = ""; // commit seq no
+    } else {
+      values[1] = RANDOM.nextLong();
+    }
+    values[2] = UUID.randomUUID().toString();
+    values[3] = partitionPath;
+    values[4] = ""; // filename
+    values[5] = RANDOM.nextInt();
+    if (!isError) {
+      values[6] = RANDOM.nextLong();
+    } else {
+      values[6] = UUID.randomUUID().toString();
+    }
+    return new GenericRow(values);
+  }
+
+  /**
+   * Convert Dataset<Row>s to List of {@link InternalRow}s.
+   *
+   * @param rows Dataset<Row>s to be converted
+   * @return the List of {@link InternalRow}s thus converted.
+   */
+  public static List<InternalRow> toInternalRows(Dataset<Row> rows, ExpressionEncoder encoder) {
+    List<InternalRow> toReturn = new ArrayList<>();
+    List<Row> rowList = rows.collectAsList();
+    for (Row row : rowList) {
+      toReturn.add(encoder.toRow(row).copy());
+    }
+    return toReturn;
+  }
+
+  public static InternalRow getInternalRowWithError(String partitionPath) {
+    // order commit time, seq no, record key, partition path, file name
+    String recordKey = UUID.randomUUID().toString();
+    Object[] values = new Object[7];
+    values[0] = "";
+    values[1] = "";
+    values[2] = recordKey;
+    values[3] = partitionPath;
+    values[4] = "";
+    values[5] = RANDOM.nextInt();
+    values[6] = RANDOM.nextBoolean();
+    return new GenericInternalRow(values);
+  }
+
+  public static HoodieWriteConfig.Builder getConfigBuilder(String basePath) {
+    return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
+        .withParallelism(2, 2)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
+        .forTable("test-trip-table")
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .withBulkInsertParallelism(2);
+  }
+
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
index df7843f..ce2562d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
@@ -18,12 +18,16 @@
 
 package org.apache.hudi.common.model;
 
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
 
 import java.io.Serializable;
 import java.util.List;
 import java.util.Objects;
+import org.apache.hudi.common.util.collection.Pair;
 
 /**
  * A Single Record managed by Hoodie.
@@ -40,6 +44,10 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
       CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
           RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
 
+  public static final Map<String, Integer> HOODIE_META_COLUMNS_NAME_TO_POS =
+      IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
+          .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+
   /**
    * Identifies the record across the table.
    */
diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
index 421c672..beb3632 100644
--- a/hudi-common/src/test/resources/timestamp-test-evolved.avsc
+++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
@@ -21,6 +21,6 @@
   "name": "User",
   "fields": [
     {"name": "field1", "type": ["null", "string"], "default": null},
-    {"name": "createTime", "type": ["null", "string"], "default": null}
+    {"name": "createTime", "type": ["null", "long"], "default": null}
   ]
 }
\ No newline at end of file
diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
index 5647e65..ea2cc5c 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.HoodieWriteClient;
 import org.apache.hudi.client.WriteStatus;
@@ -48,6 +46,8 @@ import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
@@ -106,7 +106,7 @@ public class DataSourceUtils {
 
   public static String getTablePath(FileSystem fs, Path[] userProvidedPaths) throws IOException {
     LOG.info("Getting table path..");
-    for (Path path: userProvidedPaths) {
+    for (Path path : userProvidedPaths) {
       try {
         Option<Path> tablePath = TablePathUtils.getTablePath(fs, path);
         if (tablePath.isPresent()) {
@@ -123,8 +123,7 @@ public class DataSourceUtils {
   /**
    * This method converts values for fields with certain Avro/Parquet data types that require special handling.
    *
-   * Logical Date Type is converted to actual Date value instead of Epoch Integer which is how it is
-   * represented/stored in parquet.
+   * Logical Date Type is converted to actual Date value instead of Epoch Integer which is how it is represented/stored in parquet.
    *
    * @param fieldSchema avro field schema
    * @param fieldValue avro field value
@@ -157,9 +156,8 @@ public class DataSourceUtils {
   /**
    * Create a key generator class via reflection, passing in any configs needed.
    * <p>
-   * If the class name of key generator is configured through the properties file, i.e., {@code props}, use the
-   * corresponding key generator class; otherwise, use the default key generator class specified in {@code
-   * DataSourceWriteOptions}.
+   * If the class name of key generator is configured through the properties file, i.e., {@code props}, use the corresponding key generator class; otherwise, use the default key generator class
+   * specified in {@code DataSourceWriteOptions}.
    */
   public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
     String keyGeneratorClass = props.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
@@ -173,10 +171,6 @@ public class DataSourceUtils {
 
   /**
    * Create a date time parser class for TimestampBasedKeyGenerator, passing in any configs needed.
-   * @param props
-   * @param parserClass
-   * @return
-   * @throws IOException
    */
   public static HoodieDateTimeParser createDateTimeParser(TypedProperties props, String parserClass) throws IOException {
     try {
@@ -190,6 +184,7 @@ public class DataSourceUtils {
    * Create a UserDefinedBulkInsertPartitioner class via reflection,
    * <br>
    * if the class name of UserDefinedBulkInsertPartitioner is configured through the HoodieWriteConfig.
+   *
    * @see HoodieWriteConfig#getUserDefinedBulkInsertPartitionerClass()
    */
   private static Option<BulkInsertPartitioner> createUserDefinedBulkInsertPartitioner(HoodieWriteConfig config)
@@ -225,35 +220,35 @@ public class DataSourceUtils {
     });
   }
 
-  public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath,
+  public static HoodieWriteConfig createHoodieConfig(String schemaStr, String basePath,
       String tblName, Map<String, String> parameters) {
-    boolean asyncCompact = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY()));
-    // inline compaction is on by default for MOR
+    boolean asyncCompact = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY()));
     boolean inlineCompact = !asyncCompact && parameters.get(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY())
         .equals(DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL());
-    return createHoodieClient(jssc, schemaStr, basePath, tblName, parameters, inlineCompact);
-  }
-
-  public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath,
-      String tblName, Map<String, String> parameters, boolean inlineCompact) {
-
     // insert/bulk-insert combining to be true, if filtering for duplicates
     boolean combineInserts = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
+    HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder()
+        .withPath(basePath).withAutoCommit(false).combineInput(combineInserts, true);
+    if (schemaStr != null) {
+      builder = builder.withSchema(schemaStr);
+    }
 
-    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath).withAutoCommit(false)
-        .combineInput(combineInserts, true).withSchema(schemaStr).forTable(tblName)
+    return builder.forTable(tblName)
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY()))
             .withInlineCompaction(inlineCompact).build())
         // override above with Hoodie configs specified as options.
         .withProps(parameters).build();
+  }
 
-    return new HoodieWriteClient<>(jssc, writeConfig, true);
+  public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath,
+      String tblName, Map<String, String> parameters) {
+    return new HoodieWriteClient<>(jssc, createHoodieConfig(schemaStr, basePath, tblName, parameters), true);
   }
 
   public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client, JavaRDD<HoodieRecord> hoodieRecords,
-                                                      String instantTime, String operation) throws HoodieException {
+      String instantTime, String operation) throws HoodieException {
     if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
       Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner =
           createUserDefinedBulkInsertPartitioner(client.getConfig());
@@ -267,12 +262,12 @@ public class DataSourceUtils {
   }
 
   public static JavaRDD<WriteStatus> doDeleteOperation(HoodieWriteClient client, JavaRDD<HoodieKey> hoodieKeys,
-                                                       String instantTime) {
+      String instantTime) {
     return client.delete(hoodieKeys, instantTime);
   }
 
   public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
-                                                String payloadClass) throws IOException {
+      String payloadClass) throws IOException {
     HoodieRecordPayload payload = DataSourceUtils.createPayload(payloadClass, gr, orderingVal);
     return new HoodieRecord<>(hKey, payload);
   }
@@ -280,13 +275,13 @@ public class DataSourceUtils {
   /**
    * Drop records already present in the dataset.
    *
-   * @param jssc                  JavaSparkContext
+   * @param jssc JavaSparkContext
    * @param incomingHoodieRecords HoodieRecords to deduplicate
-   * @param writeConfig           HoodieWriteConfig
+   * @param writeConfig HoodieWriteConfig
    */
   @SuppressWarnings("unchecked")
   public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
-                                                     HoodieWriteConfig writeConfig) {
+      HoodieWriteConfig writeConfig) {
     try {
       HoodieReadClient client = new HoodieReadClient<>(jssc, writeConfig);
       return client.tagLocation(incomingHoodieRecords)
@@ -300,7 +295,7 @@ public class DataSourceUtils {
 
   @SuppressWarnings("unchecked")
   public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
-                                                     Map<String, String> parameters) {
+      Map<String, String> parameters) {
     HoodieWriteConfig writeConfig =
         HoodieWriteConfig.newBuilder().withPath(parameters.get("path")).withProps(parameters).build();
     return dropDuplicates(jssc, incomingHoodieRecords, writeConfig);
diff --git a/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java b/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java
new file mode 100644
index 0000000..b3ed7ae
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import static org.apache.spark.sql.functions.callUDF;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.keygen.KeyGenerator;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.api.java.UDF1;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import scala.collection.JavaConverters;
+
+/**
+ * Helper class to assist in preparing {@link Dataset<Row>}s for bulk insert with datasource implementation.
+ */
+public class HoodieDatasetBulkInsertHelper {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieDatasetBulkInsertHelper.class);
+
+  private static final String RECORD_KEY_UDF_FN = "hudi_recordkey_gen_function";
+  private static final String PARTITION_PATH_UDF_FN = "hudi_partition_gen_function";
+
+  /**
+   * Prepares input hoodie spark dataset for bulk insert. It does the following steps.
+   *  1. Uses KeyGenerator to generate hoodie record keys and partition path.
+   *  2. Add hoodie columns to input spark dataset.
+   *  3. Reorders input dataset columns so that hoodie columns appear in the beginning.
+   *  4. Sorts input dataset by hoodie partition path and record key
+   *
+   * @param sqlContext SQL Context
+   * @param config  Hoodie Write Config
+   * @param rows    Spark Input dataset
+   * @return hoodie dataset which is ready for bulk insert.
+   */
+  public static Dataset<Row> prepareHoodieDatasetForBulkInsert(SQLContext sqlContext,
+      HoodieWriteConfig config, Dataset<Row> rows, String structName, String recordNamespace) {
+    List<Column> originalFields =
+        Arrays.stream(rows.schema().fields()).map(f -> new Column(f.name())).collect(Collectors.toList());
+
+    TypedProperties properties = new TypedProperties();
+    properties.putAll(config.getProps());
+    String keyGeneratorClass = properties.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY());
+    KeyGenerator keyGenerator = (KeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, properties);
+    StructType structTypeForUDF = rows.schema();
+
+    sqlContext.udf().register(RECORD_KEY_UDF_FN, (UDF1<Row, String>) keyGenerator::getRecordKey, DataTypes.StringType);
+    sqlContext.udf().register(PARTITION_PATH_UDF_FN, (UDF1<Row, String>) keyGenerator::getPartitionPath, DataTypes.StringType);
+
+    final Dataset<Row> rowDatasetWithRecordKeys = rows.withColumn(HoodieRecord.RECORD_KEY_METADATA_FIELD,
+        callUDF(RECORD_KEY_UDF_FN, org.apache.spark.sql.functions.struct(
+            JavaConverters.collectionAsScalaIterableConverter(originalFields).asScala().toSeq())));
+
+    final Dataset<Row> rowDatasetWithRecordKeysAndPartitionPath =
+        rowDatasetWithRecordKeys.withColumn(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
+            callUDF(PARTITION_PATH_UDF_FN,
+                org.apache.spark.sql.functions.struct(
+                    JavaConverters.collectionAsScalaIterableConverter(originalFields).asScala().toSeq())));
+
+    // Add other empty hoodie fields which will be populated before writing to parquet.
+    Dataset<Row> rowDatasetWithHoodieColumns =
+        rowDatasetWithRecordKeysAndPartitionPath.withColumn(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+            functions.lit("").cast(DataTypes.StringType))
+            .withColumn(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
+                functions.lit("").cast(DataTypes.StringType))
+            .withColumn(HoodieRecord.FILENAME_METADATA_FIELD,
+                functions.lit("").cast(DataTypes.StringType));
+    List<Column> orderedFields = Stream.concat(HoodieRecord.HOODIE_META_COLUMNS.stream().map(Column::new),
+        originalFields.stream()).collect(Collectors.toList());
+    Dataset<Row> colOrderedDataset = rowDatasetWithHoodieColumns.select(
+        JavaConverters.collectionAsScalaIterableConverter(orderedFields).asScala().toSeq());
+
+    return colOrderedDataset
+        .sort(functions.col(HoodieRecord.PARTITION_PATH_METADATA_FIELD), functions.col(HoodieRecord.RECORD_KEY_METADATA_FIELD))
+        .coalesce(config.getBulkInsertShuffleParallelism());
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/internal/DefaultSource.java b/hudi-spark/src/main/java/org/apache/hudi/internal/DefaultSource.java
new file mode 100644
index 0000000..5fb71df
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/internal/DefaultSource.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import org.apache.hudi.DataSourceUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.ReadSupport;
+import org.apache.spark.sql.sources.v2.WriteSupport;
+import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
+import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Optional;
+
+/**
+ * DataSource V2 implementation for managing internal write logic. Only called internally.
+ */
+public class DefaultSource implements DataSourceV2, ReadSupport, WriteSupport,
+    DataSourceRegister {
+
+  private static final Logger LOG = LogManager
+      .getLogger(DefaultSource.class);
+
+  private SparkSession sparkSession = null;
+  private Configuration configuration = null;
+
+  @Override
+  public String shortName() {
+    return "hudi_internal";
+  }
+
+  @Override
+  public DataSourceReader createReader(StructType schema, DataSourceOptions options) {
+    return null;
+  }
+
+  @Override
+  public DataSourceReader createReader(DataSourceOptions options) {
+    return null;
+  }
+
+  @Override
+  public Optional<DataSourceWriter> createWriter(String writeUUID, StructType schema, SaveMode mode,
+      DataSourceOptions options) {
+    String instantTime = options.get(HoodieDataSourceInternalWriter.INSTANT_TIME_OPT_KEY).get();
+    String path = options.get("path").get();
+    String tblName = options.get(HoodieWriteConfig.TABLE_NAME).get();
+    HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(null, path, tblName, options.asMap());
+    return Optional.of(new HoodieDataSourceInternalWriter(instantTime, config, schema, getSparkSession(),
+            getConfiguration()));
+  }
+
+  private SparkSession getSparkSession() {
+    if (sparkSession == null) {
+      sparkSession = SparkSession.builder().getOrCreate();
+    }
+    return sparkSession;
+  }
+
+  private Configuration getConfiguration() {
+    if (configuration == null) {
+      this.configuration = getSparkSession().sparkContext().hadoopConfiguration();
+    }
+    return configuration;
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriter.java b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriter.java
new file mode 100644
index 0000000..7aa0fc6
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.io.HoodieRowCreateHandle;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.types.StructType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Hoodie's Implementation of {@link DataWriter<InternalRow>}. This is used in data source implementation for bulk insert.
+ */
+public class HoodieBulkInsertDataInternalWriter implements DataWriter<InternalRow> {
+
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LogManager.getLogger(HoodieBulkInsertDataInternalWriter.class);
+
+  private final String instantTime;
+  private final int taskPartitionId;
+  private final long taskId;
+  private final long taskEpochId;
+  private final HoodieTable hoodieTable;
+  private final HoodieWriteConfig writeConfig;
+  private final StructType structType;
+  private final List<HoodieInternalWriteStatus> writeStatusList = new ArrayList<>();
+
+  private HoodieRowCreateHandle handle;
+  private String lastKnownPartitionPath = null;
+  private String fileIdPrefix = null;
+  private int numFilesWritten = 0;
+
+  public HoodieBulkInsertDataInternalWriter(HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
+      String instantTime, int taskPartitionId, long taskId, long taskEpochId,
+      StructType structType) {
+    this.hoodieTable = hoodieTable;
+    this.writeConfig = writeConfig;
+    this.instantTime = instantTime;
+    this.taskPartitionId = taskPartitionId;
+    this.taskId = taskId;
+    this.taskEpochId = taskEpochId;
+    this.structType = structType;
+    this.fileIdPrefix = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public void write(InternalRow record) throws IOException {
+    try {
+      String partitionPath = record.getUTF8String(
+          HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)).toString();
+
+      if ((lastKnownPartitionPath == null) || !lastKnownPartitionPath.equals(partitionPath) || !handle.canWrite()) {
+        LOG.info("Creating new file for partition path " + partitionPath);
+        createNewHandle(partitionPath);
+        lastKnownPartitionPath = partitionPath;
+      }
+      handle.write(record);
+    } catch (Throwable t) {
+      LOG.error("Global error thrown while trying to write records in HoodieRowCreateHandle ", t);
+      throw t;
+    }
+  }
+
+  @Override
+  public WriterCommitMessage commit() throws IOException {
+    close();
+    return new HoodieWriterCommitMessage(writeStatusList);
+  }
+
+  @Override
+  public void abort() throws IOException {
+  }
+
+  private void createNewHandle(String partitionPath) throws IOException {
+    if (null != handle) {
+      close();
+    }
+    handle = new HoodieRowCreateHandle(hoodieTable, writeConfig, partitionPath, getNextFileId(),
+        instantTime, taskPartitionId, taskId, taskEpochId, structType);
+  }
+
+  public void close() throws IOException {
+    if (null != handle) {
+      writeStatusList.add(handle.close());
+    }
+  }
+
+  protected String getNextFileId() {
+    return String.format("%s-%d", fileIdPrefix, numFilesWritten++);
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriterFactory.java b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriterFactory.java
new file mode 100644
index 0000000..1dd0aa3
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieBulkInsertDataInternalWriterFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Factory to assist in instantiating {@link HoodieBulkInsertDataInternalWriter}.
+ */
+public class HoodieBulkInsertDataInternalWriterFactory implements DataWriterFactory<InternalRow> {
+
+  private final String instantTime;
+  private final HoodieTable hoodieTable;
+  private final HoodieWriteConfig writeConfig;
+  private final StructType structType;
+
+  public HoodieBulkInsertDataInternalWriterFactory(HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
+                                                   String instantTime, StructType structType) {
+    this.hoodieTable = hoodieTable;
+    this.writeConfig = writeConfig;
+    this.instantTime = instantTime;
+    this.structType = structType;
+  }
+
+  @Override
+  public DataWriter<InternalRow> createDataWriter(int partitionId, long taskId, long epochId) {
+    return new HoodieBulkInsertDataInternalWriter(hoodieTable, writeConfig, instantTime, partitionId, taskId, epochId,
+            structType);
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieDataSourceInternalWriter.java b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieDataSourceInternalWriter.java
new file mode 100644
index 0000000..6e67dc9
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieDataSourceInternalWriter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.client.HoodieWriteClient;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Implementation of {@link DataSourceWriter} for datasource "hudi.internal" to be used in datasource implementation
+ * of bulk insert.
+ */
+public class HoodieDataSourceInternalWriter implements DataSourceWriter {
+
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LogManager.getLogger(HoodieDataSourceInternalWriter.class);
+  public static final String INSTANT_TIME_OPT_KEY = "hoodie.instant.time";
+
+  private final String instantTime;
+  private final HoodieTableMetaClient metaClient;
+  private final HoodieWriteConfig writeConfig;
+  private final StructType structType;
+  private final HoodieWriteClient writeClient;
+  private final HoodieTable hoodieTable;
+  private final WriteOperationType operationType;
+
+  public HoodieDataSourceInternalWriter(String instantTime, HoodieWriteConfig writeConfig, StructType structType,
+                                        SparkSession sparkSession, Configuration configuration) {
+    this.instantTime = instantTime;
+    this.writeConfig = writeConfig;
+    this.structType = structType;
+    this.operationType = WriteOperationType.BULK_INSERT;
+    this.writeClient  = new HoodieWriteClient<>(new JavaSparkContext(sparkSession.sparkContext()), writeConfig, true);
+    writeClient.setOperationType(operationType);
+    writeClient.startCommitWithTime(instantTime);
+    this.metaClient = new HoodieTableMetaClient(configuration, writeConfig.getBasePath());
+    this.hoodieTable = HoodieTable.create(metaClient, writeConfig, metaClient.getHadoopConf());
+  }
+
+  @Override
+  public DataWriterFactory<InternalRow> createWriterFactory() {
+    metaClient.getActiveTimeline().transitionRequestedToInflight(
+        new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, instantTime), Option.empty());
+    if (WriteOperationType.BULK_INSERT == operationType) {
+      return new HoodieBulkInsertDataInternalWriterFactory(hoodieTable, writeConfig, instantTime, structType);
+    } else {
+      throw new IllegalArgumentException("Write Operation Type + " + operationType + " not supported ");
+    }
+  }
+
+  @Override
+  public boolean useCommitCoordinator() {
+    return true;
+  }
+
+  @Override
+  public void onDataWriterCommit(WriterCommitMessage message) {
+    LOG.info("Received commit of a data writer =" + message);
+  }
+
+  @Override
+  public void commit(WriterCommitMessage[] messages) {
+    List<HoodieWriteStat> writeStatList = Arrays.stream(messages).map(m -> (HoodieWriterCommitMessage) m)
+            .flatMap(m -> m.getWriteStatuses().stream().map(m2 -> m2.getStat())).collect(Collectors.toList());
+
+    try {
+      writeClient.commitStats(instantTime, writeStatList, Option.empty());
+    } catch (Exception ioe) {
+      throw new HoodieException(ioe.getMessage(), ioe);
+    } finally {
+      writeClient.close();
+    }
+  }
+
+  @Override
+  public void abort(WriterCommitMessage[] messages) {
+    LOG.error("Commit " + instantTime + " aborted ");
+    writeClient.rollback(instantTime);
+    writeClient.close();
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieWriterCommitMessage.java
similarity index 54%
copy from hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
copy to hudi-spark/src/main/java/org/apache/hudi/internal/HoodieWriterCommitMessage.java
index de4f50b..757000c 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/internal/HoodieWriterCommitMessage.java
@@ -16,37 +16,30 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.keygen;
-
-import org.apache.hudi.DataSourceWriteOptions;
-import org.apache.hudi.common.config.TypedProperties;
-
-import org.apache.avro.generic.GenericRecord;
+package org.apache.hudi.internal;
 
 import java.util.ArrayList;
 import java.util.List;
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
 
 /**
- * Simple Key generator for unpartitioned Hive Tables.
+ * Hoodie's {@link WriterCommitMessage} used in datasource implementation.
  */
-public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
+public class HoodieWriterCommitMessage implements WriterCommitMessage {
 
-  private static final String EMPTY_PARTITION = "";
+  private List<HoodieInternalWriteStatus> writeStatuses = new ArrayList<>();
 
-  protected final String recordKeyField;
-
-  public NonpartitionedKeyGenerator(TypedProperties props) {
-    super(props);
-    this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
+  public HoodieWriterCommitMessage(List<HoodieInternalWriteStatus> writeStatuses) {
+    this.writeStatuses = writeStatuses;
   }
 
-  @Override
-  public String getPartitionPath(GenericRecord record) {
-    return EMPTY_PARTITION;
+  public List<HoodieInternalWriteStatus> getWriteStatuses() {
+    return writeStatuses;
   }
 
   @Override
-  public List<String> getPartitionPathFields() {
-    return new ArrayList<>();
+  public String toString() {
+    return "HoodieWriterCommitMessage{" + "writeStatuses=" + writeStatuses + '}';
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
new file mode 100644
index 0000000..8c973a6
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.exception.HoodieKeyException;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Base class for all the built-in key generators. Contains methods structured for
+ * code reuse amongst them.
+ */
+public abstract class BuiltinKeyGenerator extends KeyGenerator {
+
+  protected List<String> recordKeyFields;
+  protected List<String> partitionPathFields;
+  protected final boolean encodePartitionPath;
+  protected final boolean hiveStylePartitioning;
+
+  protected Map<String, List<Integer>> recordKeyPositions = new HashMap<>();
+  protected Map<String, List<Integer>> partitionPathPositions = new HashMap<>();
+  protected StructType structType;
+
+  protected BuiltinKeyGenerator(TypedProperties config) {
+    super(config);
+    this.encodePartitionPath = config.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
+        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
+    this.hiveStylePartitioning = config.getBoolean(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(),
+        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL()));
+  }
+
+  /**
+   * Generate a record Key out of provided generic record.
+   */
+  public abstract String getRecordKey(GenericRecord record);
+
+  /**
+   * Generate a partition path out of provided generic record.
+   */
+  public abstract String getPartitionPath(GenericRecord record);
+
+  /**
+   * Generate a Hoodie Key out of provided generic record.
+   */
+  public final HoodieKey getKey(GenericRecord record) {
+    if (getRecordKeyFields() == null || getPartitionPathFields() == null) {
+      throw new HoodieKeyException("Unable to find field names for record key or partition path in cfg");
+    }
+    return new HoodieKey(getRecordKey(record), getPartitionPath(record));
+  }
+
+  @Override
+  public final List<String> getRecordKeyFieldNames() {
+    // For nested columns, pick top level column name
+    return getRecordKeyFields().stream().map(k -> {
+      int idx = k.indexOf('.');
+      return idx > 0 ? k.substring(0, idx) : k;
+    }).collect(Collectors.toList());
+  }
+
+  void buildFieldPositionMapIfNeeded(StructType structType) {
+    if (this.structType == null) {
+      // parse simple fields
+      getRecordKeyFields().stream()
+          .filter(f -> !(f.contains(".")))
+          .forEach(f -> {
+            if (structType.getFieldIndex(f).isDefined()) {
+              recordKeyPositions.put(f, Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
+            } else {
+              throw new HoodieKeyException("recordKey value not found for field: \"" + f + "\"");
+            }
+          });
+      // parse nested fields
+      getRecordKeyFields().stream()
+          .filter(f -> f.contains("."))
+          .forEach(f -> recordKeyPositions.put(f, RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, true)));
+      // parse simple fields
+      if (getPartitionPathFields() != null) {
+        getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> !(f.contains(".")))
+            .forEach(f -> {
+              if (structType.getFieldIndex(f).isDefined()) {
+                partitionPathPositions.put(f,
+                    Collections.singletonList((Integer) (structType.getFieldIndex(f).get())));
+              } else {
+                partitionPathPositions.put(f, Collections.singletonList(-1));
+              }
+            });
+        // parse nested fields
+        getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f -> f.contains("."))
+            .forEach(f -> partitionPathPositions.put(f,
+                RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, false)));
+      }
+      this.structType = structType;
+    }
+  }
+
+  public List<String> getRecordKeyFields() {
+    return recordKeyFields;
+  }
+
+  public List<String> getPartitionPathFields() {
+    return partitionPathFields;
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
index e899642..664824c 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
@@ -24,8 +24,8 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.avro.generic.GenericRecord;
 
 import java.util.Arrays;
-import java.util.List;
 import java.util.stream.Collectors;
+import org.apache.spark.sql.Row;
 
 /**
  * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
@@ -34,39 +34,34 @@ public class ComplexKeyGenerator extends BuiltinKeyGenerator {
 
   public static final String DEFAULT_RECORD_KEY_SEPARATOR = ":";
 
-  protected final List<String> recordKeyFields;
-  protected final List<String> partitionPathFields;
-  protected final boolean hiveStylePartitioning;
-  protected final boolean encodePartitionPath;
-
   public ComplexKeyGenerator(TypedProperties props) {
     super(props);
-    this.recordKeyFields = Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
-    this.partitionPathFields =
-        Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
-    this.hiveStylePartitioning = props.getBoolean(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(),
-        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL()));
-    this.encodePartitionPath = props.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
-        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
+    this.recordKeyFields = Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY())
+        .split(",")).map(String::trim).collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY())
+        .split(",")).map(String::trim).collect(Collectors.toList());
   }
 
   @Override
   public String getRecordKey(GenericRecord record) {
-    return KeyGenUtils.getRecordKey(record, recordKeyFields);
+    return KeyGenUtils.getRecordKey(record, getRecordKeyFields());
   }
 
   @Override
   public String getPartitionPath(GenericRecord record) {
-    return KeyGenUtils.getRecordPartitionPath(record, partitionPathFields, hiveStylePartitioning, encodePartitionPath);
+    return KeyGenUtils.getRecordPartitionPath(record, getPartitionPathFields(), hiveStylePartitioning, encodePartitionPath);
   }
 
   @Override
-  public List<String> getRecordKeyFields() {
-    return recordKeyFields;
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
   }
 
   @Override
-  public List<String> getPartitionPathFields() {
-    return partitionPathFields;
+  public String getPartitionPath(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
+        hiveStylePartitioning, partitionPathPositions);
   }
 }
\ No newline at end of file
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
index e5cc61c..2ac6b77 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
@@ -20,37 +20,32 @@ package org.apache.hudi.keygen;
 
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.config.TypedProperties;
-
-import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieDeltaStreamerException;
 import org.apache.hudi.exception.HoodieKeyException;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
+
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.List;
 import java.util.stream.Collectors;
 
 /**
- * This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields.
- * Similarly partition path can be configured to have multiple fields or only one field. This class expects value for prop
- * "hoodie.datasource.write.partitionpath.field" in a specific format. For example:
+ * This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields. Similarly partition path can be configured to have multiple
+ * fields or only one field. This class expects value for prop "hoodie.datasource.write.partitionpath.field" in a specific format. For example:
  *
  * properties.put("hoodie.datasource.write.partitionpath.field", "field1:PartitionKeyType1,field2:PartitionKeyType2").
  *
  * The complete partition path is created as <value for field1 basis PartitionKeyType1>/<value for field2 basis PartitionKeyType2> and so on.
  *
- * Few points to consider:
- * 1. If you want to customize some partition path field on a timestamp basis, you can use field1:timestampBased
- * 2. If you simply want to have the value of your configured field in the partition path, use field1:simple
- * 3. If you want your table to be non partitioned, simply leave it as blank.
+ * Few points to consider: 1. If you want to customize some partition path field on a timestamp basis, you can use field1:timestampBased 2. If you simply want to have the value of your configured
+ * field in the partition path, use field1:simple 3. If you want your table to be non partitioned, simply leave it as blank.
  *
  * RecordKey is internally generated using either SimpleKeyGenerator or ComplexKeyGenerator.
  */
 public class CustomKeyGenerator extends BuiltinKeyGenerator {
 
-  protected final List<String> recordKeyFields;
-  protected final List<String> partitionPathFields;
-  protected final TypedProperties properties;
   private static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
   private static final String SPLIT_REGEX = ":";
 
@@ -63,15 +58,22 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator {
 
   public CustomKeyGenerator(TypedProperties props) {
     super(props);
-    this.properties = props;
     this.recordKeyFields = Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
-    this.partitionPathFields =
-      Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return getPartitionPath(Option.empty(), Option.of(row));
   }
 
   @Override
   public String getPartitionPath(GenericRecord record) {
-    if (partitionPathFields == null) {
+    return getPartitionPath(Option.of(record), Option.empty());
+  }
+
+  private String getPartitionPath(Option<GenericRecord> record, Option<Row> row) {
+    if (getPartitionPathFields() == null) {
       throw new HoodieKeyException("Unable to find field names for partition path in cfg");
     }
 
@@ -79,10 +81,10 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator {
     StringBuilder partitionPath = new StringBuilder();
 
     //Corresponds to no partition case
-    if (partitionPathFields.size() == 1 && partitionPathFields.get(0).isEmpty()) {
+    if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
       return "";
     }
-    for (String field : partitionPathFields) {
+    for (String field : getPartitionPathFields()) {
       String[] fieldWithType = field.split(SPLIT_REGEX);
       if (fieldWithType.length != 2) {
         throw new HoodieKeyException("Unable to find field names for partition path in proper format");
@@ -92,11 +94,19 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator {
       PartitionKeyType keyType = PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
       switch (keyType) {
         case SIMPLE:
-          partitionPath.append(new SimpleKeyGenerator(properties, partitionPathField).getPartitionPath(record));
+          if (record.isPresent()) {
+            partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
+          } else {
+            partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
+          }
           break;
         case TIMESTAMP:
           try {
-            partitionPath.append(new TimestampBasedKeyGenerator(properties, partitionPathField).getPartitionPath(record));
+            if (record.isPresent()) {
+              partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
+            } else {
+              partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
+            }
           } catch (IOException ioe) {
             throw new HoodieDeltaStreamerException("Unable to initialise TimestampBasedKeyGenerator class");
           }
@@ -114,20 +124,23 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator {
 
   @Override
   public String getRecordKey(GenericRecord record) {
-    if (recordKeyFields == null || recordKeyFields.isEmpty()) {
-      throw new HoodieKeyException("Unable to find field names for record key in cfg");
-    }
-
-    return recordKeyFields.size() == 1 ? new SimpleKeyGenerator(properties).getRecordKey(record) : new ComplexKeyGenerator(properties).getRecordKey(record);
+    validateRecordKeyFields();
+    return getRecordKeyFields().size() == 1
+        ? new SimpleKeyGenerator(config).getRecordKey(record)
+        : new ComplexKeyGenerator(config).getRecordKey(record);
   }
 
   @Override
-  public List<String> getRecordKeyFields() {
-    return recordKeyFields;
+  public String getRecordKey(Row row) {
+    validateRecordKeyFields();
+    return getRecordKeyFields().size() == 1
+        ? new SimpleKeyGenerator(config).getRecordKey(row)
+        : new ComplexKeyGenerator(config).getRecordKey(row);
   }
 
-  @Override
-  public List<String> getPartitionPathFields() {
-    return partitionPathFields;
+  private void validateRecordKeyFields() {
+    if (getRecordKeyFields() == null || getRecordKeyFields().isEmpty()) {
+      throw new HoodieKeyException("Unable to find field names for record key in cfg");
+    }
   }
 }
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
index 5851a9d..243493b 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
@@ -22,30 +22,28 @@ import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.config.TypedProperties;
 
 import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.stream.Collectors;
 
 /**
- * Key generator for deletes using global indices. Global index deletes do not require partition value
- * so this key generator avoids using partition value for generating HoodieKey.
+ * Key generator for deletes using global indices. Global index deletes do not require partition value so this key generator
+ * avoids using partition value for generating HoodieKey.
  */
 public class GlobalDeleteKeyGenerator extends BuiltinKeyGenerator {
 
   private static final String EMPTY_PARTITION = "";
 
-  protected final List<String> recordKeyFields;
-
   public GlobalDeleteKeyGenerator(TypedProperties config) {
     super(config);
-    this.recordKeyFields = Arrays.stream(config.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+    this.recordKeyFields = Arrays.asList(config.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(","));
   }
 
   @Override
   public String getRecordKey(GenericRecord record) {
-    return KeyGenUtils.getRecordKey(record, recordKeyFields);
+    return KeyGenUtils.getRecordKey(record, getRecordKeyFields());
   }
 
   @Override
@@ -54,12 +52,18 @@ public class GlobalDeleteKeyGenerator extends BuiltinKeyGenerator {
   }
 
   @Override
-  public List<String> getRecordKeyFields() {
-    return recordKeyFields;
+  public List<String> getPartitionPathFields() {
+    return new ArrayList<>();
   }
 
   @Override
-  public List<String> getPartitionPathFields() {
-    return new ArrayList<>();
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, true);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return EMPTY_PARTITION;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
similarity index 100%
rename from hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
rename to hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
diff --git a/hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
similarity index 56%
rename from hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
rename to hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
index 1a798af..a11bc84 100644
--- a/hudi-client/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
@@ -18,10 +18,13 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.hudi.AvroConversionHelper;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 
 import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
+import scala.Function1;
 
 import java.io.Serializable;
 import java.util.List;
@@ -29,9 +32,13 @@ import java.util.List;
 /**
  * Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record.
  */
-public abstract class KeyGenerator implements Serializable {
+public abstract class KeyGenerator implements Serializable, KeyGeneratorInterface {
+
+  private static final String STRUCT_NAME = "hoodieRowTopLevelField";
+  private static final String NAMESPACE = "hoodieRow";
 
   protected transient TypedProperties config;
+  private transient Function1<Object, Object> converterFn = null;
 
   protected KeyGenerator(TypedProperties config) {
     this.config = config;
@@ -51,4 +58,30 @@ public abstract class KeyGenerator implements Serializable {
     throw new UnsupportedOperationException("Bootstrap not supported for key generator. "
         + "Please override this method in your custom key generator.");
   }
+
+  /**
+   * Fetch record key from {@link Row}.
+   * @param row instance of {@link Row} from which record key is requested.
+   * @return the record key of interest from {@link Row}.
+   */
+  public String getRecordKey(Row row) {
+    if (null == converterFn) {
+      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+    }
+    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
+    return getKey(genericRecord).getRecordKey();
+  }
+
+  /**
+   * Fetch partition path from {@link Row}.
+   * @param row instance of {@link Row} from which partition path is requested
+   * @return the partition path of interest from {@link Row}.
+   */
+  public String getPartitionPath(Row row) {
+    if (null == converterFn) {
+      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+    }
+    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
+    return getKey(genericRecord).getPartitionPath();
+  }
 }
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
index de4f50b..db51024 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
@@ -18,10 +18,10 @@
 
 package org.apache.hudi.keygen;
 
-import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.config.TypedProperties;
 
 import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -32,12 +32,10 @@ import java.util.List;
 public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
 
   private static final String EMPTY_PARTITION = "";
-
-  protected final String recordKeyField;
+  private static final List<String> EMPTY_PARTITION_FIELD_LIST = new ArrayList<>();
 
   public NonpartitionedKeyGenerator(TypedProperties props) {
     super(props);
-    this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
   }
 
   @Override
@@ -47,6 +45,11 @@ public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
 
   @Override
   public List<String> getPartitionPathFields() {
-    return new ArrayList<>();
+    return EMPTY_PARTITION_FIELD_LIST;
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    return EMPTY_PARTITION;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
new file mode 100644
index 0000000..02b8492
--- /dev/null
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.exception.HoodieKeyException;
+
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import scala.Option;
+
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR;
+import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
+import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
+
+/**
+ * Helper class to fetch fields from Row.
+ */
+public class RowKeyGeneratorHelper {
+
+  /**
+   * Generates record key for the corresponding {@link Row}.
+   * @param row instance of {@link Row} of interest
+   * @param recordKeyFields record key fields as a list
+   * @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
+   * @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
+   * @return the record key thus generated
+   */
+  public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
+    AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
+    String toReturn = recordKeyFields.stream().map(field -> {
+      String val = null;
+      List<Integer> fieldPositions = recordKeyPositions.get(field);
+      if (fieldPositions.size() == 1) { // simple field
+        Integer fieldPos = fieldPositions.get(0);
+        if (row.isNullAt(fieldPos)) {
+          val = NULL_RECORDKEY_PLACEHOLDER;
+        } else {
+          val = row.getAs(field).toString();
+          if (val.isEmpty()) {
+            val = EMPTY_RECORDKEY_PLACEHOLDER;
+          } else {
+            keyIsNullOrEmpty.set(false);
+          }
+        }
+      } else { // nested fields
+        val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
+        if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+          keyIsNullOrEmpty.set(false);
+        }
+      }
+      return prefixFieldName ? (field + ":" + val) : val;
+    }).collect(Collectors.joining(","));
+    if (keyIsNullOrEmpty.get()) {
+      throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
+    }
+    return toReturn;
+  }
+
+  /**
+   * Generates partition path for the corresponding {@link Row}.
+   * @param row instance of {@link Row} of interest
+   * @param partitionPathFields partition path fields as a list
+   * @param hiveStylePartitioning {@code true} if hive style partitioning is set. {@code false} otherwise
+   * @param partitionPathPositions partition path positions for the corresponding fields in {@code partitionPathFields}
+   * @return the generated partition path for the row
+   */
+  public static String getPartitionPathFromRow(Row row, List<String> partitionPathFields, boolean hiveStylePartitioning, Map<String, List<Integer>> partitionPathPositions) {
+    return IntStream.range(0, partitionPathFields.size()).mapToObj(idx -> {
+      String field = partitionPathFields.get(idx);
+      String val = null;
+      List<Integer> fieldPositions = partitionPathPositions.get(field);
+      if (fieldPositions.size() == 1) { // simple
+        Integer fieldPos = fieldPositions.get(0);
+        // for partition path, if field is not found, index will be set to -1
+        if (fieldPos == -1 || row.isNullAt(fieldPos)) {
+          val = DEFAULT_PARTITION_PATH;
+        } else {
+          val = row.getAs(field).toString();
+          if (val.isEmpty()) {
+            val = DEFAULT_PARTITION_PATH;
+          }
+        }
+        if (hiveStylePartitioning) {
+          val = field + "=" + val;
+        }
+      } else { // nested
+        Object nestedVal = getNestedFieldVal(row, partitionPathPositions.get(field));
+        if (nestedVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER) || nestedVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+          val = hiveStylePartitioning ? field + "=" + DEFAULT_PARTITION_PATH : DEFAULT_PARTITION_PATH;
+        } else {
+          val = hiveStylePartitioning ? field + "=" + nestedVal.toString() : nestedVal.toString();
+        }
+      }
+      return val;
+    }).collect(Collectors.joining(DEFAULT_PARTITION_PATH_SEPARATOR));
+  }
+
+  /**
+   * Fetch the field value located at the positions requested for.
+   * @param row instance of {@link Row} of interest
+   * @param positions tree style positions where the leaf node need to be fetched and returned
+   * @return the field value as per the positions requested for.
+   */
+  public static Object getNestedFieldVal(Row row, List<Integer> positions) {
+    if (positions.size() == 1 && positions.get(0) == -1) {
+      return DEFAULT_PARTITION_PATH;
+    }
+    int index = 0;
+    int totalCount = positions.size();
+    Row valueToProcess = row;
+    Object toReturn = null;
+
+    while (index < totalCount) {
+      if (index < totalCount - 1) {
+        if (valueToProcess.isNullAt(positions.get(index))) {
+          toReturn = NULL_RECORDKEY_PLACEHOLDER;
+          break;
+        }
+        valueToProcess = (Row) valueToProcess.get(positions.get(index));
+      } else { // last index
+        if (valueToProcess.getAs(positions.get(index)).toString().isEmpty()) {
+          toReturn = EMPTY_RECORDKEY_PLACEHOLDER;
+          break;
+        }
+        toReturn = valueToProcess.getAs(positions.get(index));
+      }
+      index++;
+    }
+    return toReturn;
+  }
+
+  /**
+   * Generate the tree style positions for the field requested for as per the defined struct type.
+   * @param structType schema of interest
+   * @param field field of interest for which the positions are requested for
+   * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} incase of a partition path.
+   * @return the positions of the field as per the struct type.
+   */
+  public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
+    String[] slices = field.split("\\.");
+    List<Integer> positions = new ArrayList<>();
+    int index = 0;
+    int totalCount = slices.length;
+    while (index < totalCount) {
+      String slice = slices[index];
+      Option<Object> curIndexOpt = structType.getFieldIndex(slice);
+      if (curIndexOpt.isDefined()) {
+        int curIndex = (int) curIndexOpt.get();
+        positions.add(curIndex);
+        final StructField nestedField = structType.fields()[curIndex];
+        if (index < totalCount - 1) {
+          if (!(nestedField.dataType() instanceof StructType)) {
+            if (isRecordKey) {
+              throw new HoodieKeyException("Nested field should be of type StructType " + nestedField);
+            } else {
+              positions = Collections.singletonList(-1);
+              break;
+            }
+          }
+          structType = (StructType) nestedField.dataType();
+        }
+      } else {
+        if (isRecordKey) {
+          throw new HoodieKeyException("Can't find " + slice + " in StructType for the field " + field);
+        } else {
+          positions = Collections.singletonList(-1);
+          break;
+        }
+      }
+      index++;
+    }
+    return positions;
+  }
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
index ea460b5..c2b8b12 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
@@ -22,54 +22,52 @@ import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.config.TypedProperties;
 
 import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
 
-import java.util.Arrays;
-import java.util.List;
+import java.util.Collections;
 
 /**
  * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
  */
 public class SimpleKeyGenerator extends BuiltinKeyGenerator {
 
-  protected final String recordKeyField;
-
-  protected final String partitionPathField;
-
-  protected final boolean hiveStylePartitioning;
-
-  protected final boolean encodePartitionPath;
-
   public SimpleKeyGenerator(TypedProperties props) {
-    this(props, props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()));
+    this(props, props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()),
+        props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()));
+  }
+
+  SimpleKeyGenerator(TypedProperties props, String partitionPathField) {
+    this(props, null, partitionPathField);
   }
 
-  public SimpleKeyGenerator(TypedProperties props, String partitionPathField) {
+  SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
     super(props);
-    this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
-    this.partitionPathField = partitionPathField;
-    this.hiveStylePartitioning = props.getBoolean(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(),
-        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL()));
-    this.encodePartitionPath = props.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
-        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
+    this.recordKeyFields = recordKeyField == null
+        ? Collections.emptyList()
+        : Collections.singletonList(recordKeyField);
+    this.partitionPathFields = Collections.singletonList(partitionPathField);
   }
 
   @Override
   public String getRecordKey(GenericRecord record) {
-    return KeyGenUtils.getRecordKey(record, recordKeyField);
+    return KeyGenUtils.getRecordKey(record, getRecordKeyFields().get(0));
   }
 
   @Override
   public String getPartitionPath(GenericRecord record) {
-    return KeyGenUtils.getPartitionPath(record, partitionPathField, hiveStylePartitioning, encodePartitionPath);
+    return KeyGenUtils.getPartitionPath(record, getPartitionPathFields().get(0), hiveStylePartitioning, encodePartitionPath);
   }
 
   @Override
-  public List<String> getRecordKeyFields() {
-    return Arrays.asList(recordKeyField);
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
   }
 
   @Override
-  public List<String> getPartitionPathFields() {
-    return Arrays.asList(partitionPathField);
+  public String getPartitionPath(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getPartitionPathFromRow(row, getPartitionPathFields(),
+        hiveStylePartitioning, partitionPathPositions);
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
index b5e6fe8..0209fe8 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
@@ -25,10 +25,11 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.exception.HoodieDeltaStreamerException;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.keygen.parser.HoodieDateTimeParser;
 import org.apache.hudi.keygen.parser.HoodieDateTimeParserImpl;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormat;
@@ -39,10 +40,14 @@ import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
 import java.util.concurrent.TimeUnit;
 
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH;
+import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
+import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
 
 /**
  * Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
@@ -89,11 +94,16 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
   }
 
   public TimestampBasedKeyGenerator(TypedProperties config) throws IOException {
-    this(config, config.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()));
+    this(config, config.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()),
+        config.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()));
   }
 
-  public TimestampBasedKeyGenerator(TypedProperties config, String partitionPathField) throws IOException {
-    super(config, partitionPathField);
+  TimestampBasedKeyGenerator(TypedProperties config, String partitionPathField) throws IOException {
+    this(config, null, partitionPathField);
+  }
+
+  TimestampBasedKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
+    super(config, recordKeyField, partitionPathField);
     String dateTimeParserClass = config.getString(Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParserImpl.class.getName());
     this.parser = DataSourceUtils.createDateTimeParser(config, dateTimeParserClass);
     this.outputDateTimeZone = parser.getOutputDateTimeZone();
@@ -125,49 +135,58 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
 
   @Override
   public String getPartitionPath(GenericRecord record) {
-    Object partitionVal = HoodieAvroUtils.getNestedFieldVal(record, partitionPathField, true);
+    Object partitionVal = HoodieAvroUtils.getNestedFieldVal(record, getPartitionPathFields().get(0), true);
     if (partitionVal == null) {
       partitionVal = 1L;
     }
+    try {
+      return getPartitionPath(partitionVal);
+    } catch (Exception e) {
+      throw new HoodieDeltaStreamerException("Unable to parse input partition field :" + partitionVal, e);
+    }
+  }
 
+  /**
+   * Parse and fetch partition path based on data type.
+   *
+   * @param partitionVal partition path object value fetched from record/row
+   * @return the parsed partition path based on data type
+   * @throws ParseException on any parse exception
+   */
+  private String getPartitionPath(Object partitionVal) throws ParseException {
     DateTimeFormatter partitionFormatter = DateTimeFormat.forPattern(outputDateFormat);
     if (this.outputDateTimeZone != null) {
       partitionFormatter = partitionFormatter.withZone(outputDateTimeZone);
     }
-
-    try {
-      long timeMs;
-      if (partitionVal instanceof Double) {
-        timeMs = convertLongTimeToMillis(((Double) partitionVal).longValue());
-      } else if (partitionVal instanceof Float) {
-        timeMs = convertLongTimeToMillis(((Float) partitionVal).longValue());
-      } else if (partitionVal instanceof Long) {
-        timeMs = convertLongTimeToMillis((Long) partitionVal);
-      } else if (partitionVal instanceof CharSequence) {
-        DateTime parsedDateTime = inputFormatter.parseDateTime(partitionVal.toString());
-        if (this.outputDateTimeZone == null) {
-          // Use the timezone that came off the date that was passed in, if it had one
-          partitionFormatter = partitionFormatter.withZone(parsedDateTime.getZone());
-        }
-
-        timeMs = inputFormatter.parseDateTime(partitionVal.toString()).getMillis();
-      } else {
-        throw new HoodieNotSupportedException(
-            "Unexpected type for partition field: " + partitionVal.getClass().getName());
+    long timeMs;
+    if (partitionVal instanceof Double) {
+      timeMs = convertLongTimeToMillis(((Double) partitionVal).longValue());
+    } else if (partitionVal instanceof Float) {
+      timeMs = convertLongTimeToMillis(((Float) partitionVal).longValue());
+    } else if (partitionVal instanceof Long) {
+      timeMs = convertLongTimeToMillis((Long) partitionVal);
+    } else if (partitionVal instanceof CharSequence) {
+      DateTime parsedDateTime = inputFormatter.parseDateTime(partitionVal.toString());
+      if (this.outputDateTimeZone == null) {
+        // Use the timezone that came off the date that was passed in, if it had one
+        partitionFormatter = partitionFormatter.withZone(parsedDateTime.getZone());
       }
-      DateTime timestamp = new DateTime(timeMs, outputDateTimeZone);
-      String partitionPath = timestamp.toString(partitionFormatter);
-      if (encodePartitionPath) {
-        try {
-          partitionPath = URLEncoder.encode(partitionPath, StandardCharsets.UTF_8.toString());
-        } catch (UnsupportedEncodingException uoe) {
-          throw new HoodieException(uoe.getMessage(), uoe);
-        }
+
+      timeMs = inputFormatter.parseDateTime(partitionVal.toString()).getMillis();
+    } else {
+      throw new HoodieNotSupportedException(
+          "Unexpected type for partition field: " + partitionVal.getClass().getName());
+    }
+    DateTime timestamp = new DateTime(timeMs, outputDateTimeZone);
+    String partitionPath = timestamp.toString(partitionFormatter);
+    if (encodePartitionPath) {
+      try {
+        partitionPath = URLEncoder.encode(partitionPath, StandardCharsets.UTF_8.toString());
+      } catch (UnsupportedEncodingException uoe) {
+        throw new HoodieException(uoe.getMessage(), uoe);
       }
-      return hiveStylePartitioning ? partitionPathField + "=" + partitionPath : partitionPath;
-    } catch (Exception e) {
-      throw new HoodieDeltaStreamerException("Unable to parse input partition field :" + partitionVal, e);
     }
+    return hiveStylePartitioning ? getPartitionPathFields().get(0) + "=" + partitionPath : partitionPath;
   }
 
   private long convertLongTimeToMillis(Long partitionVal) {
@@ -177,4 +196,28 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
     }
     return MILLISECONDS.convert(partitionVal, timeUnit);
   }
+
+  @Override
+  public String getRecordKey(Row row) {
+    buildFieldPositionMapIfNeeded(row.schema());
+    return RowKeyGeneratorHelper.getRecordKeyFromRow(row, getRecordKeyFields(), recordKeyPositions, false);
+  }
+
+  @Override
+  public String getPartitionPath(Row row) {
+    Object fieldVal = null;
+    buildFieldPositionMapIfNeeded(row.schema());
+    Object partitionPathFieldVal =  RowKeyGeneratorHelper.getNestedFieldVal(row, partitionPathPositions.get(getPartitionPathFields().get(0)));
+    try {
+      if (partitionPathFieldVal.toString().contains(DEFAULT_PARTITION_PATH) || partitionPathFieldVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER)
+          || partitionPathFieldVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
+        fieldVal = 1L;
+      } else {
+        fieldVal = partitionPathFieldVal;
+      }
+      return getPartitionPath(fieldVal);
+    } catch (Exception e) {
+      throw new HoodieDeltaStreamerException("Unable to parse input partition field :" + fieldVal, e);
+    }
+  }
 }
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index ec51cb6..ba6bc87 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -229,6 +229,13 @@ object DataSourceWriteOptions {
   val DEFAULT_KEYGENERATOR_CLASS_OPT_VAL = classOf[SimpleKeyGenerator].getName
 
   /**
+   * When set to true, will perform write operations directly using the spark native `Row` representation.
+   * By default, false (will be enabled as default in a future release)
+   */
+  val ENABLE_ROW_WRITER_OPT_KEY = "hoodie.datasource.write.row.writer.enable"
+  val DEFAULT_ENABLE_ROW_WRITER_OPT_VAL = "false"
+
+  /**
     * Option keys beginning with this prefix, are automatically added to the commit/deltacommit metadata.
     * This is useful to store checkpointing information, in a consistent way with the hoodie timeline
     */
@@ -299,6 +306,6 @@ object DataSourceWriteOptions {
   val DEFAULT_HIVE_USE_JDBC_OPT_VAL = "true"
 
   // Async Compaction - Enabled by default for MOR
-  val ASYNC_COMPACT_ENABLE_KEY = "hoodie.datasource.compaction.async.enable"
-  val DEFAULT_ASYNC_COMPACT_ENABLE_VAL = "true"
+  val ASYNC_COMPACT_ENABLE_OPT_KEY = "hoodie.datasource.compaction.async.enable"
+  val DEFAULT_ASYNC_COMPACT_ENABLE_OPT_VAL = "true"
 }
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
index 6f42169..1cf9bdb 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -118,14 +118,12 @@ class DefaultSource extends RelationProvider
                               mode: SaveMode,
                               optParams: Map[String, String],
                               df: DataFrame): BaseRelation = {
-    val parameters = HoodieSparkSqlWriter.parametersWithWriteDefaults(optParams)
-
+    val parameters = HoodieWriterUtils.parametersWithWriteDefaults(optParams)
     if (parameters(OPERATION_OPT_KEY).equals(BOOTSTRAP_OPERATION_OPT_VAL)) {
       HoodieSparkSqlWriter.bootstrap(sqlContext, mode, parameters, df)
     } else {
       HoodieSparkSqlWriter.write(sqlContext, mode, parameters, df)
     }
-
     new HoodieEmptyRelation(sqlContext, df.schema)
   }
 
@@ -133,7 +131,7 @@ class DefaultSource extends RelationProvider
                           optParams: Map[String, String],
                           partitionColumns: Seq[String],
                           outputMode: OutputMode): Sink = {
-    val parameters = HoodieSparkSqlWriter.parametersWithWriteDefaults(optParams)
+    val parameters = HoodieWriterUtils.parametersWithWriteDefaults(optParams)
     new HoodieStreamingSink(
       sqlContext,
       parameters,
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index d66981e..36886cd 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -29,7 +29,6 @@ import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.client.{HoodieWriteClient, WriteStatus}
 import org.apache.hudi.common.config.TypedProperties
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.{HoodieRecordPayload, HoodieTableType}
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline
@@ -38,6 +37,7 @@ import org.apache.hudi.config.HoodieBootstrapConfig.{BOOTSTRAP_BASE_PATH_PROP, B
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool}
+import org.apache.hudi.internal.HoodieDataSourceInternalWriter
 import org.apache.hudi.sync.common.AbstractSyncTool
 import org.apache.log4j.LogManager
 import org.apache.spark.SparkContext
@@ -62,7 +62,7 @@ private[hudi] object HoodieSparkSqlWriter {
             asyncCompactionTriggerFn: Option[Function1[HoodieWriteClient[HoodieRecordPayload[Nothing]], Unit]] = Option.empty
            )
   : (Boolean, common.util.Option[String], common.util.Option[String],
-    HoodieWriteClient[HoodieRecordPayload[Nothing]], HoodieTableConfig) = {
+     HoodieWriteClient[HoodieRecordPayload[Nothing]], HoodieTableConfig) = {
 
     val sparkContext = sqlContext.sparkContext
     val path = parameters.get("path")
@@ -105,6 +105,22 @@ private[hudi] object HoodieSparkSqlWriter {
     } else {
       // Handle various save modes
       handleSaveModes(mode, basePath, tableConfig, tblName, operation, fs)
+      // Create the table if not present
+      if (!tableExists) {
+        val tableMetaClient = HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get,
+          HoodieTableType.valueOf(tableType), tblName, "archived", parameters(PAYLOAD_CLASS_OPT_KEY),
+          null.asInstanceOf[String])
+        tableConfig = tableMetaClient.getTableConfig
+      }
+
+      // short-circuit if bulk_insert via row is enabled.
+      // scalastyle:off
+      if (parameters(ENABLE_ROW_WRITER_OPT_KEY).toBoolean) {
+        val (success, commitTime: common.util.Option[String]) = bulkInsertAsRow(sqlContext, parameters, df, tblName,
+                                                                                basePath, path, instantTime)
+        return (success, commitTime, common.util.Option.empty(), hoodieWriteClient.orNull, tableConfig)
+      }
+      // scalastyle:on
 
       val (writeStatuses, writeClient: HoodieWriteClient[HoodieRecordPayload[Nothing]]) =
         if (!operation.equalsIgnoreCase(DELETE_OPERATION_OPT_VAL)) {
@@ -128,14 +144,6 @@ private[hudi] object HoodieSparkSqlWriter {
               parameters(PAYLOAD_CLASS_OPT_KEY))
           }).toJavaRDD()
 
-          // Create the table if not present
-          if (!tableExists) {
-            val tableMetaClient = HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get,
-              HoodieTableType.valueOf(tableType), tblName, "archived", parameters(PAYLOAD_CLASS_OPT_KEY),
-              null.asInstanceOf[String])
-            tableConfig = tableMetaClient.getTableConfig
-          }
-
           // Create a HoodieWriteClient & issue the write.
           val client = hoodieWriteClient.getOrElse(DataSourceUtils.createHoodieClient(jsc, schema.toString, path.get,
             tblName, mapAsJavaMap(parameters)
@@ -250,41 +258,29 @@ private[hudi] object HoodieSparkSqlWriter {
     metaSyncSuccess
   }
 
-  /**
-    * Add default options for unspecified write options keys.
-    *
-    * @param parameters
-    * @return
-    */
-  def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = {
-    Map(OPERATION_OPT_KEY -> DEFAULT_OPERATION_OPT_VAL,
-      TABLE_TYPE_OPT_KEY -> DEFAULT_TABLE_TYPE_OPT_VAL,
-      PRECOMBINE_FIELD_OPT_KEY -> DEFAULT_PRECOMBINE_FIELD_OPT_VAL,
-      PAYLOAD_CLASS_OPT_KEY -> DEFAULT_PAYLOAD_OPT_VAL,
-      RECORDKEY_FIELD_OPT_KEY -> DEFAULT_RECORDKEY_FIELD_OPT_VAL,
-      PARTITIONPATH_FIELD_OPT_KEY -> DEFAULT_PARTITIONPATH_FIELD_OPT_VAL,
-      KEYGENERATOR_CLASS_OPT_KEY -> DEFAULT_KEYGENERATOR_CLASS_OPT_VAL,
-      COMMIT_METADATA_KEYPREFIX_OPT_KEY -> DEFAULT_COMMIT_METADATA_KEYPREFIX_OPT_VAL,
-      INSERT_DROP_DUPS_OPT_KEY -> DEFAULT_INSERT_DROP_DUPS_OPT_VAL,
-      STREAMING_RETRY_CNT_OPT_KEY -> DEFAULT_STREAMING_RETRY_CNT_OPT_VAL,
-      STREAMING_RETRY_INTERVAL_MS_OPT_KEY -> DEFAULT_STREAMING_RETRY_INTERVAL_MS_OPT_VAL,
-      STREAMING_IGNORE_FAILED_BATCH_OPT_KEY -> DEFAULT_STREAMING_IGNORE_FAILED_BATCH_OPT_VAL,
-      META_SYNC_CLIENT_TOOL_CLASS -> DEFAULT_META_SYNC_CLIENT_TOOL_CLASS,
-      //just for backwards compatiblity
-      HIVE_SYNC_ENABLED_OPT_KEY -> DEFAULT_HIVE_SYNC_ENABLED_OPT_VAL,
-      META_SYNC_ENABLED_OPT_KEY -> DEFAULT_META_SYNC_ENABLED_OPT_VAL,
-      HIVE_DATABASE_OPT_KEY -> DEFAULT_HIVE_DATABASE_OPT_VAL,
-      HIVE_TABLE_OPT_KEY -> DEFAULT_HIVE_TABLE_OPT_VAL,
-      HIVE_BASE_FILE_FORMAT_OPT_KEY -> DEFAULT_HIVE_BASE_FILE_FORMAT_OPT_VAL,
-      HIVE_USER_OPT_KEY -> DEFAULT_HIVE_USER_OPT_VAL,
-      HIVE_PASS_OPT_KEY -> DEFAULT_HIVE_PASS_OPT_VAL,
-      HIVE_URL_OPT_KEY -> DEFAULT_HIVE_URL_OPT_VAL,
-      HIVE_PARTITION_FIELDS_OPT_KEY -> DEFAULT_HIVE_PARTITION_FIELDS_OPT_VAL,
-      HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY -> DEFAULT_HIVE_PARTITION_EXTRACTOR_CLASS_OPT_VAL,
-      HIVE_STYLE_PARTITIONING_OPT_KEY -> DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL,
-      HIVE_USE_JDBC_OPT_KEY -> DEFAULT_HIVE_USE_JDBC_OPT_VAL,
-      ASYNC_COMPACT_ENABLE_KEY -> DEFAULT_ASYNC_COMPACT_ENABLE_VAL
-    ) ++ translateStorageTypeToTableType(parameters)
+  def bulkInsertAsRow(sqlContext: SQLContext,
+                      parameters: Map[String, String],
+                      df: DataFrame,
+                      tblName: String,
+                      basePath: Path,
+                      path: Option[String],
+                      instantTime: String): (Boolean, common.util.Option[String]) = {
+    val structName = s"${tblName}_record"
+    val nameSpace = s"hoodie.${tblName}"
+    val writeConfig = DataSourceUtils.createHoodieConfig(null, path.get, tblName, mapAsJavaMap(parameters))
+    val hoodieDF = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, writeConfig, df, structName, nameSpace)
+    hoodieDF.write.format("org.apache.hudi.internal")
+      .option(HoodieDataSourceInternalWriter.INSTANT_TIME_OPT_KEY, instantTime)
+      .options(parameters)
+      .save()
+    val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
+    val metaSyncEnabled = parameters.get(META_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
+    val syncHiveSucess = if (hiveSyncEnabled || metaSyncEnabled) {
+      metaSync(parameters, basePath, sqlContext.sparkContext.hadoopConfiguration)
+    } else {
+      true
+    }
+    (syncHiveSucess, common.util.Option.ofNullable(instantTime))
   }
 
   def toProperties(params: Map[String, String]): TypedProperties = {
@@ -298,7 +294,7 @@ private[hudi] object HoodieSparkSqlWriter {
     if (mode == SaveMode.Append && tableExists) {
       val existingTableName = tableConfig.getTableName
       if (!existingTableName.equals(tableName)) {
-        throw new HoodieException(s"hoodie table with name $existingTableName already exist at $tablePath")
+        throw new HoodieException(s"hoodie table with name $existingTableName already exists at $tablePath")
       }
     }
 
@@ -411,11 +407,11 @@ private[hudi] object HoodieSparkSqlWriter {
 
       val asyncCompactionEnabled = isAsyncCompactionEnabled(client, tableConfig, parameters, jsc.hadoopConfiguration())
       val compactionInstant : common.util.Option[java.lang.String] =
-      if (asyncCompactionEnabled) {
-        client.scheduleCompaction(common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap))))
-      } else {
-        common.util.Option.empty()
-      }
+        if (asyncCompactionEnabled) {
+          client.scheduleCompaction(common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap))))
+        } else {
+          common.util.Option.empty()
+        }
 
       log.info(s"Compaction Scheduled is $compactionInstant")
       val metaSyncSuccess =  metaSync(parameters, basePath, jsc.hadoopConfiguration())
@@ -448,7 +444,7 @@ private[hudi] object HoodieSparkSqlWriter {
                                        parameters: Map[String, String], configuration: Configuration) : Boolean = {
     log.info(s"Config.isInlineCompaction ? ${client.getConfig.isInlineCompaction}")
     if (!client.getConfig.isInlineCompaction
-      && parameters.get(ASYNC_COMPACT_ENABLE_KEY).exists(r => r.toBoolean)) {
+      && parameters.get(ASYNC_COMPACT_ENABLE_OPT_KEY).exists(r => r.toBoolean)) {
       tableConfig.getTableType == HoodieTableType.MERGE_ON_READ
     } else {
       false
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
new file mode 100644
index 0000000..484605c
--- /dev/null
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.common.config.TypedProperties
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
+/**
+ * WriterUtils to assist in write path in Datasource and tests.
+ */
+object HoodieWriterUtils {
+
+  def javaParametersWithWriteDefaults(parameters: java.util.Map[String, String]): java.util.Map[String, String] = {
+    mapAsJavaMap(parametersWithWriteDefaults(parameters.asScala.toMap))
+  }
+
+  /**
+    * Add default options for unspecified write options keys.
+    *
+    * @param parameters
+    * @return
+    */
+  def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = {
+    Map(OPERATION_OPT_KEY -> DEFAULT_OPERATION_OPT_VAL,
+      TABLE_TYPE_OPT_KEY -> DEFAULT_TABLE_TYPE_OPT_VAL,
+      PRECOMBINE_FIELD_OPT_KEY -> DEFAULT_PRECOMBINE_FIELD_OPT_VAL,
+      PAYLOAD_CLASS_OPT_KEY -> DEFAULT_PAYLOAD_OPT_VAL,
+      RECORDKEY_FIELD_OPT_KEY -> DEFAULT_RECORDKEY_FIELD_OPT_VAL,
+      PARTITIONPATH_FIELD_OPT_KEY -> DEFAULT_PARTITIONPATH_FIELD_OPT_VAL,
+      KEYGENERATOR_CLASS_OPT_KEY -> DEFAULT_KEYGENERATOR_CLASS_OPT_VAL,
+      COMMIT_METADATA_KEYPREFIX_OPT_KEY -> DEFAULT_COMMIT_METADATA_KEYPREFIX_OPT_VAL,
+      INSERT_DROP_DUPS_OPT_KEY -> DEFAULT_INSERT_DROP_DUPS_OPT_VAL,
+      STREAMING_RETRY_CNT_OPT_KEY -> DEFAULT_STREAMING_RETRY_CNT_OPT_VAL,
+      STREAMING_RETRY_INTERVAL_MS_OPT_KEY -> DEFAULT_STREAMING_RETRY_INTERVAL_MS_OPT_VAL,
+      STREAMING_IGNORE_FAILED_BATCH_OPT_KEY -> DEFAULT_STREAMING_IGNORE_FAILED_BATCH_OPT_VAL,
+      META_SYNC_CLIENT_TOOL_CLASS -> DEFAULT_META_SYNC_CLIENT_TOOL_CLASS,
+      HIVE_SYNC_ENABLED_OPT_KEY -> DEFAULT_HIVE_SYNC_ENABLED_OPT_VAL,
+      META_SYNC_ENABLED_OPT_KEY -> DEFAULT_META_SYNC_ENABLED_OPT_VAL,
+      HIVE_DATABASE_OPT_KEY -> DEFAULT_HIVE_DATABASE_OPT_VAL,
+      HIVE_TABLE_OPT_KEY -> DEFAULT_HIVE_TABLE_OPT_VAL,
+      HIVE_BASE_FILE_FORMAT_OPT_KEY -> DEFAULT_HIVE_BASE_FILE_FORMAT_OPT_VAL,
+      HIVE_USER_OPT_KEY -> DEFAULT_HIVE_USER_OPT_VAL,
+      HIVE_PASS_OPT_KEY -> DEFAULT_HIVE_PASS_OPT_VAL,
+      HIVE_URL_OPT_KEY -> DEFAULT_HIVE_URL_OPT_VAL,
+      HIVE_PARTITION_FIELDS_OPT_KEY -> DEFAULT_HIVE_PARTITION_FIELDS_OPT_VAL,
+      HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY -> DEFAULT_HIVE_PARTITION_EXTRACTOR_CLASS_OPT_VAL,
+      HIVE_STYLE_PARTITIONING_OPT_KEY -> DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL,
+      HIVE_USE_JDBC_OPT_KEY -> DEFAULT_HIVE_USE_JDBC_OPT_VAL,
+      ASYNC_COMPACT_ENABLE_OPT_KEY -> DEFAULT_ASYNC_COMPACT_ENABLE_OPT_VAL,
+      ENABLE_ROW_WRITER_OPT_KEY -> DEFAULT_ENABLE_ROW_WRITER_OPT_VAL
+    ) ++ translateStorageTypeToTableType(parameters)
+  }
+
+  def toProperties(params: Map[String, String]): TypedProperties = {
+    val props = new TypedProperties()
+    params.foreach(kv => props.setProperty(kv._1, kv._2))
+    props
+  }
+}
diff --git a/hudi-spark/src/test/java/HoodieJavaApp.java b/hudi-spark/src/test/java/HoodieJavaApp.java
index 6eda051..594d813 100644
--- a/hudi-spark/src/test/java/HoodieJavaApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaApp.java
@@ -151,7 +151,7 @@ public class HoodieJavaApp {
         .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
             nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
                 : SimpleKeyGenerator.class.getCanonicalName())
-        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
+        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "false")
         // This will remove any existing data at path below, and create a
         .mode(SaveMode.Overwrite);
 
@@ -178,7 +178,7 @@ public class HoodieJavaApp {
             nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
                 : SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
         .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
-        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
+        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "false")
         .option(HoodieWriteConfig.TABLE_NAME, tableName).mode(SaveMode.Append);
 
     updateHiveSyncConfig(writer);
@@ -204,7 +204,7 @@ public class HoodieJavaApp {
             nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
                 : SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
         .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
-        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
+        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "false")
         .option(HoodieWriteConfig.TABLE_NAME, tableName).mode(SaveMode.Append);
 
     updateHiveSyncConfig(writer);
diff --git a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
index 500189d..e93784e 100644
--- a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
@@ -358,7 +358,7 @@ public class HoodieJavaStreamingApp {
         .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
         .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
         .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
-        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "true")
+        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "true")
         .option(HoodieWriteConfig.TABLE_NAME, tableName).option("checkpointLocation", checkpointLocation)
         .outputMode(OutputMode.Append());
 
diff --git a/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java b/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java
new file mode 100644
index 0000000..12a7d20
--- /dev/null
+++ b/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.testutils.DataSourceTestUtils;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+
+import org.apache.avro.Schema;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.StructType;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * Tests {@link HoodieDatasetBulkInsertHelper}.
+ */
+public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
+
+  private String schemaStr;
+  private Schema schema;
+  private StructType structType;
+
+  public TestHoodieDatasetBulkInsertHelper() throws IOException {
+    init();
+  }
+
+  private void init() throws IOException {
+    schemaStr = FileIOUtils.readAsUTFString(getClass().getResourceAsStream("/exampleSchema.txt"));
+    schema = DataSourceTestUtils.getStructTypeExampleSchema();
+    structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
+  }
+
+  @Test
+  public void testBulkInsertHelper() throws IOException {
+    HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet()).build();
+    List<Row> rows = DataSourceTestUtils.generateRandomRows(10);
+    Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
+    Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName", "testNamespace");
+    StructType resultSchema = result.schema();
+
+    assertEquals(result.count(), 10);
+    assertEquals(resultSchema.fieldNames().length, structType.fieldNames().length + HoodieRecord.HOODIE_META_COLUMNS.size());
+
+    for (Map.Entry<String, Integer> entry : HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.entrySet()) {
+      assertTrue(resultSchema.fieldIndex(entry.getKey()) == entry.getValue());
+    }
+
+    int metadataRecordKeyIndex = resultSchema.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD);
+    int metadataParitionPathIndex = resultSchema.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
+    int metadataCommitTimeIndex = resultSchema.fieldIndex(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+    int metadataCommitSeqNoIndex = resultSchema.fieldIndex(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD);
+    int metadataFilenameIndex = resultSchema.fieldIndex(HoodieRecord.FILENAME_METADATA_FIELD);
+
+    result.toJavaRDD().foreach(entry -> {
+      assertTrue(entry.get(metadataRecordKeyIndex).equals(entry.getAs("_row_key")));
+      assertTrue(entry.get(metadataParitionPathIndex).equals(entry.getAs("partition")));
+      assertTrue(entry.get(metadataCommitSeqNoIndex).equals(""));
+      assertTrue(entry.get(metadataCommitTimeIndex).equals(""));
+      assertTrue(entry.get(metadataFilenameIndex).equals(""));
+    });
+  }
+
+  private Map<String, String> getPropsAllSet() {
+    return getProps(true, true, true, true);
+  }
+
+  private Map<String, String> getProps(boolean setAll, boolean setKeyGen, boolean setRecordKey, boolean setPartitionPath) {
+    Map<String, String> props = new HashMap<>();
+    if (setAll) {
+      props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(), "org.apache.hudi.keygen.SimpleKeyGenerator");
+      props.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
+      props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition");
+    } else {
+      if (setKeyGen) {
+        props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(), "org.apache.hudi.keygen.SimpleKeyGenerator");
+      }
+      if (setRecordKey) {
+        props.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
+      }
+      if (setPartitionPath) {
+        props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition");
+      }
+    }
+    return props;
+  }
+
+  @Test
+  public void testNoPropsSet() {
+    HoodieWriteConfig config = getConfigBuilder(schemaStr).build();
+    List<Row> rows = DataSourceTestUtils.generateRandomRows(10);
+    Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
+    try {
+      HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName", "testNamespace");
+      fail("Should have thrown exception");
+    } catch (Exception e) {
+      // ignore
+    }
+
+    config = getConfigBuilder(schemaStr).withProps(getProps(false, false, true, true)).build();
+    rows = DataSourceTestUtils.generateRandomRows(10);
+    dataset = sqlContext.createDataFrame(rows, structType);
+    try {
+      HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName", "testNamespace");
+      fail("Should have thrown exception");
+    } catch (Exception e) {
+      // ignore
+    }
+
+    config = getConfigBuilder(schemaStr).withProps(getProps(false, true, false, true)).build();
+    rows = DataSourceTestUtils.generateRandomRows(10);
+    dataset = sqlContext.createDataFrame(rows, structType);
+    try {
+      HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName", "testNamespace");
+      fail("Should have thrown exception");
+    } catch (Exception e) {
+      // ignore
+    }
+
+    config = getConfigBuilder(schemaStr).withProps(getProps(false, true, true, false)).build();
+    rows = DataSourceTestUtils.generateRandomRows(10);
+    dataset = sqlContext.createDataFrame(rows, structType);
+    try {
+      HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName", "testNamespace");
+      fail("Should have thrown exception");
+    } catch (Exception e) {
+      // ignore
+    }
+  }
+}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieBulkInsertDataInternalWriter.java b/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieBulkInsertDataInternalWriter.java
new file mode 100644
index 0000000..884e11c
--- /dev/null
+++ b/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieBulkInsertDataInternalWriter.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.testutils.HoodieClientTestHarness;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getConfigBuilder;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getInternalRowWithError;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * Unit tests {@link HoodieBulkInsertDataInternalWriter}.
+ */
+public class TestHoodieBulkInsertDataInternalWriter extends HoodieClientTestHarness {
+
+  private static final Random RANDOM = new Random();
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initSparkContexts("TestHoodieBulkInsertDataInternalWriter");
+    initPath();
+    initFileSystem();
+    initTestDataGenerator();
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanupResources();
+  }
+
+  @Test
+  public void testDataInternalWriter() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    HoodieTable table = HoodieTable.create(metaClient, cfg, hadoopConf);
+    // execute N rounds
+    for (int i = 0; i < 5; i++) {
+      String instantTime = "00" + i;
+      // init writer
+      HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), STRUCT_TYPE);
+
+      int size = 10 + RANDOM.nextInt(1000);
+      // write N rows to partition1, N rows to partition2 and N rows to partition3 ... Each batch should create a new RowCreateHandle and a new file
+      int batches = 5;
+      Dataset<Row> totalInputRows = null;
+
+      for (int j = 0; j < batches; j++) {
+        String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+        Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+        writeRows(inputRows, writer);
+        if (totalInputRows == null) {
+          totalInputRows = inputRows;
+        } else {
+          totalInputRows = totalInputRows.union(inputRows);
+        }
+      }
+
+      HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+      List<String> fileAbsPaths = new ArrayList<>();
+      List<String> fileNames = new ArrayList<>();
+
+      // verify write statuses
+      assertWriteStatuses(commitMetadata.getWriteStatuses(), batches, size, fileAbsPaths, fileNames);
+
+      // verify rows
+      Dataset<Row> result = sqlContext.read().parquet(fileAbsPaths.toArray(new String[0]));
+      assertOutput(totalInputRows, result, instantTime, fileNames);
+    }
+  }
+
+
+  /**
+   * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch2 of invalid records which is expected
+   * to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk.
+   */
+  @Test
+  public void testGlobalFailure() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    HoodieTable table = HoodieTable.create(metaClient, cfg, hadoopConf);
+    String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
+
+    String instantTime = "001";
+    HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), STRUCT_TYPE);
+
+    int size = 10 + RANDOM.nextInt(100);
+    int totalFailures = 5;
+    // Generate first batch of valid rows
+    Dataset<Row> inputRows = getRandomRows(sqlContext, size / 2, partitionPath, false);
+    List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+
+    // generate some failures rows
+    for (int i = 0; i < totalFailures; i++) {
+      internalRows.add(getInternalRowWithError(partitionPath));
+    }
+
+    // generate 2nd batch of valid rows
+    Dataset<Row> inputRows2 = getRandomRows(sqlContext, size / 2, partitionPath, false);
+    internalRows.addAll(toInternalRows(inputRows2, ENCODER));
+
+    // issue writes
+    try {
+      for (InternalRow internalRow : internalRows) {
+        writer.write(internalRow);
+      }
+      fail("Should have failed");
+    } catch (Throwable e) {
+      // expected
+    }
+
+    HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+
+    List<String> fileAbsPaths = new ArrayList<>();
+    List<String> fileNames = new ArrayList<>();
+    // verify write statuses
+    assertWriteStatuses(commitMetadata.getWriteStatuses(), 1, size / 2, fileAbsPaths, fileNames);
+
+    // verify rows
+    Dataset<Row> result = sqlContext.read().parquet(fileAbsPaths.toArray(new String[0]));
+    assertOutput(inputRows, result, instantTime, fileNames);
+  }
+
+  private void writeRows(Dataset<Row> inputRows, HoodieBulkInsertDataInternalWriter writer) throws IOException {
+    List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+    // issue writes
+    for (InternalRow internalRow : internalRows) {
+      writer.write(internalRow);
+    }
+  }
+
+  private void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size, List<String> fileAbsPaths, List<String> fileNames) {
+    assertEquals(batches, writeStatuses.size());
+    int counter = 0;
+    for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
+      // verify write status
+      assertEquals(writeStatus.getTotalRecords(), size);
+      assertNull(writeStatus.getGlobalError());
+      assertEquals(writeStatus.getFailedRowsSize(), 0);
+      assertNotNull(writeStatus.getFileId());
+      String fileId = writeStatus.getFileId();
+      assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3], writeStatus.getPartitionPath());
+      fileAbsPaths.add(basePath + "/" + writeStatus.getStat().getPath());
+      fileNames.add(writeStatus.getStat().getPath().substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1));
+      HoodieWriteStat writeStat = writeStatus.getStat();
+      assertEquals(size, writeStat.getNumInserts());
+      assertEquals(size, writeStat.getNumWrites());
+      assertEquals(fileId, writeStat.getFileId());
+      assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
+      assertEquals(0, writeStat.getNumDeletes());
+      assertEquals(0, writeStat.getNumUpdateWrites());
+      assertEquals(0, writeStat.getTotalWriteErrors());
+    }
+  }
+
+  private void assertOutput(Dataset<Row> expectedRows, Dataset<Row> actualRows, String instantTime, List<String> fileNames) {
+    // verify 3 meta fields that are filled in within create handle
+    actualRows.collectAsList().forEach(entry -> {
+      assertEquals(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD)).toString(), instantTime);
+      assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD)));
+      assertTrue(fileNames.contains(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD))));
+      assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD)));
+    });
+
+    // after trimming 2 of the meta fields, rest of the fields should match
+    Dataset<Row> trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    Dataset<Row> trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    assertEquals(0, trimmedActual.except(trimmedExpected).count());
+  }
+}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java b/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java
new file mode 100644
index 0000000..89d748f
--- /dev/null
+++ b/hudi-spark/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.internal;
+
+import org.apache.hudi.client.HoodieInternalWriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.testutils.HoodieClientTestHarness;
+import org.apache.hudi.testutils.HoodieClientTestUtils;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getConfigBuilder;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows;
+import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+/**
+ * Unit tests {@link HoodieDataSourceInternalWriter}.
+ */
+public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness {
+
+  private static final Random RANDOM = new Random();
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initSparkContexts("TestHoodieDataSourceInternalWriter");
+    initPath();
+    initFileSystem();
+    initTestDataGenerator();
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanupResources();
+  }
+
+  @Test
+  public void testDataSourceWriter() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    String instantTime = "001";
+    // init writer
+    HoodieDataSourceInternalWriter dataSourceInternalWriter =
+        new HoodieDataSourceInternalWriter(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf);
+    DataWriter<InternalRow> writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(0, RANDOM.nextLong(), RANDOM.nextLong());
+
+    List<String> partitionPaths = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS);
+    List<String> partitionPathsAbs = new ArrayList<>();
+    for (String partitionPath : partitionPaths) {
+      partitionPathsAbs.add(basePath + "/" + partitionPath + "/*");
+    }
+
+    int size = 10 + RANDOM.nextInt(1000);
+    int batches = 5;
+    Dataset<Row> totalInputRows = null;
+
+    for (int j = 0; j < batches; j++) {
+      String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+      Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+      writeRows(inputRows, writer);
+      if (totalInputRows == null) {
+        totalInputRows = inputRows;
+      } else {
+        totalInputRows = totalInputRows.union(inputRows);
+      }
+    }
+
+    HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+    List<HoodieWriterCommitMessage> commitMessages = new ArrayList<>();
+    commitMessages.add(commitMetadata);
+    dataSourceInternalWriter.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0]));
+    metaClient.reloadActiveTimeline();
+    Dataset<Row> result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0]));
+    // verify output
+    assertOutput(totalInputRows, result, instantTime);
+    assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size);
+  }
+
+  @Test
+  public void testMultipleDataSourceWrites() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    int partitionCounter = 0;
+
+    // execute N rounds
+    for (int i = 0; i < 5; i++) {
+      String instantTime = "00" + i;
+      // init writer
+      HoodieDataSourceInternalWriter dataSourceInternalWriter =
+          new HoodieDataSourceInternalWriter(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf);
+
+      List<HoodieWriterCommitMessage> commitMessages = new ArrayList<>();
+      Dataset<Row> totalInputRows = null;
+      DataWriter<InternalRow> writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(partitionCounter++, RANDOM.nextLong(), RANDOM.nextLong());
+
+      int size = 10 + RANDOM.nextInt(1000);
+      int batches = 5; // one batch per partition
+
+      for (int j = 0; j < batches; j++) {
+        String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+        Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+        writeRows(inputRows, writer);
+        if (totalInputRows == null) {
+          totalInputRows = inputRows;
+        } else {
+          totalInputRows = totalInputRows.union(inputRows);
+        }
+      }
+
+      HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+      commitMessages.add(commitMetadata);
+      dataSourceInternalWriter.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0]));
+      metaClient.reloadActiveTimeline();
+
+      Dataset<Row> result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime);
+
+      // verify output
+      assertOutput(totalInputRows, result, instantTime);
+      assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size);
+    }
+  }
+
+  @Test
+  public void testLargeWrites() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+    int partitionCounter = 0;
+
+    // execute N rounds
+    for (int i = 0; i < 3; i++) {
+      String instantTime = "00" + i;
+      // init writer
+      HoodieDataSourceInternalWriter dataSourceInternalWriter =
+          new HoodieDataSourceInternalWriter(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf);
+
+      List<HoodieWriterCommitMessage> commitMessages = new ArrayList<>();
+      Dataset<Row> totalInputRows = null;
+      DataWriter<InternalRow> writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(partitionCounter++, RANDOM.nextLong(), RANDOM.nextLong());
+
+      int size = 10000 + RANDOM.nextInt(10000);
+      int batches = 3; // one batch per partition
+
+      for (int j = 0; j < batches; j++) {
+        String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+        Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+        writeRows(inputRows, writer);
+        if (totalInputRows == null) {
+          totalInputRows = inputRows;
+        } else {
+          totalInputRows = totalInputRows.union(inputRows);
+        }
+      }
+
+      HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+      commitMessages.add(commitMetadata);
+      dataSourceInternalWriter.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0]));
+      metaClient.reloadActiveTimeline();
+
+      Dataset<Row> result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime);
+
+      // verify output
+      assertOutput(totalInputRows, result, instantTime);
+      assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size);
+    }
+  }
+
+  /**
+   * Tests that DataSourceWriter.abort() will abort the written records of interest write and commit batch1 write and abort batch2 Read of entire dataset should show only records from batch1.
+   * commit batch1
+   * abort batch2
+   * verify only records from batch1 is available to read
+   */
+  @Test
+  public void testAbort() throws IOException {
+    // init config and table
+    HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
+
+    String instantTime0 = "00" + 0;
+    // init writer
+    HoodieDataSourceInternalWriter dataSourceInternalWriter =
+        new HoodieDataSourceInternalWriter(instantTime0, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf);
+    DataWriter<InternalRow> writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(0, RANDOM.nextLong(), RANDOM.nextLong());
+
+    List<String> partitionPaths = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS);
+    List<String> partitionPathsAbs = new ArrayList<>();
+    for (String partitionPath : partitionPaths) {
+      partitionPathsAbs.add(basePath + "/" + partitionPath + "/*");
+    }
+
+    int size = 10 + RANDOM.nextInt(100);
+    int batches = 1;
+    Dataset<Row> totalInputRows = null;
+
+    for (int j = 0; j < batches; j++) {
+      String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+      Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+      writeRows(inputRows, writer);
+      if (totalInputRows == null) {
+        totalInputRows = inputRows;
+      } else {
+        totalInputRows = totalInputRows.union(inputRows);
+      }
+    }
+
+    HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+    List<HoodieWriterCommitMessage> commitMessages = new ArrayList<>();
+    commitMessages.add(commitMetadata);
+    // commit 1st batch
+    dataSourceInternalWriter.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0]));
+    metaClient.reloadActiveTimeline();
+    Dataset<Row> result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0]));
+    // verify rows
+    assertOutput(totalInputRows, result, instantTime0);
+    assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size);
+
+    // 2nd batch. abort in the end
+    String instantTime1 = "00" + 1;
+    dataSourceInternalWriter =
+        new HoodieDataSourceInternalWriter(instantTime1, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf);
+    writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(1, RANDOM.nextLong(), RANDOM.nextLong());
+
+    for (int j = 0; j < batches; j++) {
+      String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
+      Dataset<Row> inputRows = getRandomRows(sqlContext, size, partitionPath, false);
+      writeRows(inputRows, writer);
+    }
+
+    commitMetadata = (HoodieWriterCommitMessage) writer.commit();
+    commitMessages = new ArrayList<>();
+    commitMessages.add(commitMetadata);
+    // commit 1st batch
+    dataSourceInternalWriter.abort(commitMessages.toArray(new HoodieWriterCommitMessage[0]));
+    metaClient.reloadActiveTimeline();
+    result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0]));
+    // verify rows
+    // only rows from first batch should be present
+    assertOutput(totalInputRows, result, instantTime0);
+  }
+
+  private void writeRows(Dataset<Row> inputRows, DataWriter<InternalRow> writer) throws IOException {
+    List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
+    // issue writes
+    for (InternalRow internalRow : internalRows) {
+      writer.write(internalRow);
+    }
+  }
+
+  private void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size) {
+    assertEquals(batches, writeStatuses.size());
+    int counter = 0;
+    for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
+      assertEquals(writeStatus.getPartitionPath(), HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]);
+      assertEquals(writeStatus.getTotalRecords(), size);
+      assertEquals(writeStatus.getFailedRowsSize(), 0);
+      assertEquals(writeStatus.getTotalErrorRecords(), 0);
+      assertFalse(writeStatus.hasErrors());
+      assertNull(writeStatus.getGlobalError());
+      assertNotNull(writeStatus.getFileId());
+      String fileId = writeStatus.getFileId();
+      HoodieWriteStat writeStat = writeStatus.getStat();
+      assertEquals(size, writeStat.getNumInserts());
+      assertEquals(size, writeStat.getNumWrites());
+      assertEquals(fileId, writeStat.getFileId());
+      assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
+      assertEquals(0, writeStat.getNumDeletes());
+      assertEquals(0, writeStat.getNumUpdateWrites());
+      assertEquals(0, writeStat.getTotalWriteErrors());
+    }
+  }
+
+  private void assertOutput(Dataset<Row> expectedRows, Dataset<Row> actualRows, String instantTime) {
+    // verify 3 meta fields that are filled in within create handle
+    actualRows.collectAsList().forEach(entry -> {
+      assertEquals(entry.get(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD)).toString(), instantTime);
+      assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD)));
+      assertFalse(entry.isNullAt(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD)));
+    });
+
+    // after trimming 2 of the meta fields, rest of the fields should match
+    Dataset<Row> trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    Dataset<Row> trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD);
+    assertEquals(0, trimmedActual.except(trimmedExpected).count());
+  }
+}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
index bb94c25..4c5ded3 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
@@ -23,10 +23,13 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.exception.HoodieKeyException;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-public class TestComplexKeyGenerator extends TestKeyGeneratorUtilities {
+public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
 
   private TypedProperties getCommonProps(boolean getComplexRecordKey) {
     TypedProperties properties = new TypedProperties();
@@ -76,13 +79,18 @@ public class TestComplexKeyGenerator extends TestKeyGeneratorUtilities {
   public void testWrongRecordKeyField() {
     ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getWrongRecordKeyFieldProps());
     Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
+    Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
   }
 
   @Test
   public void testHappyFlow() {
     ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getProps());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
     Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=2020-03-21");
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=2020-03-21");
   }
 }
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
index 699bf43..add2547 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
@@ -22,10 +22,13 @@ import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.config.TypedProperties;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-public class TestCustomKeyGenerator extends TestKeyGeneratorUtilities {
+public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
 
   private TypedProperties getCommonProps(boolean getComplexRecordKey) {
     TypedProperties properties = new TypedProperties();
@@ -97,25 +100,37 @@ public class TestCustomKeyGenerator extends TestKeyGeneratorUtilities {
   @Test
   public void testSimpleKeyGenerator() {
     KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForSimpleKeyGen());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "key1");
     Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
   }
 
   @Test
   public void testTimestampBasedKeyGenerator() {
     KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForTimestampBasedKeyGen());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "key1");
     Assertions.assertEquals(key.getPartitionPath(), "ts_ms=20200321");
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "ts_ms=20200321");
   }
 
   @Test
   public void testNonPartitionedKeyGenerator() {
     KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForNonPartitionedKeyGen());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "key1");
     Assertions.assertTrue(key.getPartitionPath().isEmpty());
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
+    Assertions.assertTrue(keyGenerator.getPartitionPath(row).isEmpty());
   }
 
   @Test
@@ -127,6 +142,16 @@ public class TestCustomKeyGenerator extends TestKeyGeneratorUtilities {
     } catch (Exception e) {
       Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomKeyGenerator.PartitionKeyType.DUMMY"));
     }
+
+    try {
+      KeyGenerator keyGenerator = new CustomKeyGenerator(getInvalidPartitionKeyTypeProps());
+      GenericRecord record = getRecord();
+      Row row = KeyGeneratorTestUtilities.getRow(record);
+      keyGenerator.getPartitionPath(row);
+      Assertions.fail("should fail when invalid PartitionKeyType is provided!");
+    } catch (Exception e) {
+      Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomKeyGenerator.PartitionKeyType.DUMMY"));
+    }
   }
 
   @Test
@@ -138,6 +163,16 @@ public class TestCustomKeyGenerator extends TestKeyGeneratorUtilities {
     } catch (Exception e) {
       Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
     }
+
+    try {
+      KeyGenerator keyGenerator = new CustomKeyGenerator(getPropsWithoutRecordKeyFieldProps());
+      GenericRecord record = getRecord();
+      Row row = KeyGeneratorTestUtilities.getRow(record);
+      keyGenerator.getRecordKey(row);
+      Assertions.fail("should fail when record key field is not provided!");
+    } catch (Exception e) {
+      Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
+    }
   }
 
   @Test
@@ -149,21 +184,41 @@ public class TestCustomKeyGenerator extends TestKeyGeneratorUtilities {
     } catch (Exception e) {
       Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
     }
+
+    try {
+      KeyGenerator keyGenerator = new CustomKeyGenerator(getImproperPartitionFieldFormatProp());
+      GenericRecord record = getRecord();
+      Row row = KeyGeneratorTestUtilities.getRow(record);
+      keyGenerator.getPartitionPath(row);
+      Assertions.fail("should fail when partition key field is provided in improper format!");
+    } catch (Exception e) {
+      Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
+    }
   }
 
   @Test
   public void testComplexRecordKeyWithSimplePartitionPath() {
     KeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyWithSimplePartitionProps());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
     Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
+
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
   }
 
   @Test
   public void testComplexRecordKeysWithComplexPartitionPath() {
     KeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyAndPartitionPathProps());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
     Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=20200321");
+
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=20200321");
   }
 }
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
index e46c783..96d607a 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
@@ -23,10 +23,13 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.exception.HoodieKeyException;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-public class TestGlobalDeleteKeyGenerator extends TestKeyGeneratorUtilities {
+public class TestGlobalDeleteKeyGenerator extends KeyGeneratorTestUtilities {
 
   private TypedProperties getCommonProps(boolean getComplexRecordKey) {
     TypedProperties properties = new TypedProperties();
@@ -66,13 +69,19 @@ public class TestGlobalDeleteKeyGenerator extends TestKeyGeneratorUtilities {
   public void testWrongRecordKeyField() {
     GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getWrongRecordKeyFieldProps());
     Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
+    Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
   }
 
   @Test
   public void testHappyFlow() {
     GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getProps());
-    HoodieKey key = keyGenerator.getKey(getRecord());
+    GenericRecord record = getRecord();
+    HoodieKey key = keyGenerator.getKey(record);
     Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
     Assertions.assertEquals(key.getPartitionPath(), "");
+    keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType);
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
   }
 }
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
index f36331a..4eb184e 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
@@ -23,10 +23,13 @@ import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.exception.HoodieKeyException;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+import org.apache.spark.sql.Row;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-public class TestSimpleKeyGenerator extends TestKeyGeneratorUtilities {
+public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
 
   private TypedProperties getCommonProps() {
     TypedProperties properties = new TypedProperties();
@@ -52,6 +55,13 @@ public class TestSimpleKeyGenerator extends TestKeyGeneratorUtilities {
     return properties;
   }
 
+  private TypedProperties getWrongPartitionPathFieldProps() {
+    TypedProperties properties = new TypedProperties();
+    properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "_wrong_partition_path");
+    properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
+    return properties;
+  }
+
   private TypedProperties getComplexRecordKeyProp() {
     TypedProperties properties = new TypedProperties();
     properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
@@ -79,19 +89,36 @@ public class TestSimpleKeyGenerator extends TestKeyGeneratorUtilities {
   public void testWrongRecordKeyField() {
     SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongRecordKeyFieldProps());
     Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
+    Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
+  }
+
+  @Test
+  public void testWrongPartitionPathField() {
+    SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongPartitionPathFieldProps());
+    GenericRecord record = getRecord();
+    Assertions.assertEquals(keyGenerator.getPartitionPath(record), KeyGenUtils.DEFAULT_PARTITION_PATH);
+    Assertions.assertEquals(keyGenerator.getPartitionPath(KeyGeneratorTestUtilities.getRow(record)),
+        KeyGenUtils.DEFAULT_PARTITION_PATH);
   }
 
   @Test
   public void testComplexRecordKeyField() {
     SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getComplexRecordKeyProp());
     Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
+    Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
   }
 
   @Test
   public void testHappyFlow() {
     SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getProps());
+    GenericRecord record = getRecord();
     HoodieKey key = keyGenerator.getKey(getRecord());
     Assertions.assertEquals(key.getRecordKey(), "key1");
     Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
+
+    Row row = KeyGeneratorTestUtilities.getRow(record);
+    Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
+    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
   }
+
 }
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
index b8e0d29..6afc6eb 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
@@ -18,31 +18,45 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.hudi.AvroConversionHelper;
+import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
+import org.apache.hudi.exception.HoodieDeltaStreamerException;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
-import org.apache.hudi.exception.HoodieDeltaStreamerException;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
+import org.apache.spark.sql.types.StructType;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 
+import scala.Function1;
+
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TestTimestampBasedKeyGenerator {
+
   private GenericRecord baseRecord;
   private TypedProperties properties = new TypedProperties();
 
+  private Schema schema;
+  private StructType structType;
+  private Row baseRow;
+
   @BeforeEach
   public void initialize() throws IOException {
-    Schema schema = SchemaTestUtil.getTimestampEvolvedSchema();
+    schema = SchemaTestUtil.getTimestampEvolvedSchema();
+    structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
     baseRecord = SchemaTestUtil
-      .generateAvroRecordFromJson(schema, 1, "001", "f1");
+        .generateAvroRecordFromJson(schema, 1, "001", "f1");
+    baseRow = genericRecordToRow(baseRecord);
 
     properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1");
     properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime");
@@ -61,6 +75,17 @@ public class TestTimestampBasedKeyGenerator {
     return properties;
   }
 
+  private Row genericRecordToRow(GenericRecord baseRecord) {
+    Function1<Object, Object> convertor = AvroConversionHelper.createConverterToRow(schema, structType);
+    Row row = (Row) convertor.apply(baseRecord);
+    int fieldCount = structType.fieldNames().length;
+    Object[] values = new Object[fieldCount];
+    for (int i = 0; i < fieldCount; i++) {
+      values[i] = row.get(i);
+    }
+    return new GenericRowWithSchema(values, structType);
+  }
+
   private TypedProperties getBaseKeyConfig(String timestampType, String inputFormatList, String inputFormatDelimiterRegex, String inputTimezone, String outputFormat, String outputTimezone) {
     if (timestampType != null) {
       properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
@@ -88,25 +113,43 @@ public class TestTimestampBasedKeyGenerator {
     // timezone is GMT+8:00
     baseRecord.put("createTime", 1578283932000L);
     properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk1.getPartitionPath());
 
+    // test w/ Row
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
+
     // timezone is GMT
     properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT", null);
-    HoodieKey hk2 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk2 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 04", hk2.getPartitionPath());
 
+    // test w/ Row
+    assertEquals("2020-01-06 04", keyGen.getPartitionPath(baseRow));
+
     // timestamp is DATE_STRING, timezone is GMT+8:00
     baseRecord.put("createTime", "2020-01-06 12:12:12");
     properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00", null);
     properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
-    HoodieKey hk3 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk3 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk3.getPartitionPath());
 
+    // test w/ Row
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
+
     // timezone is GMT
     properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT", null);
-    HoodieKey hk4 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk4 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk4.getPartitionPath());
+
+    // test w/ Row
+    assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
@@ -116,140 +159,173 @@ public class TestTimestampBasedKeyGenerator {
 
     // timezone is GMT
     properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
-    HoodieKey hk5 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk5 = keyGen.getKey(baseRecord);
     assertEquals(hk5.getPartitionPath(), "2024-10-04 12");
+
+    // test w/ Row
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2024-10-04 12", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_SingleInputFormat_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "GMT");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "GMT");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040113", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_SingleInputFormats_ISO8601WithMsZ_OutputTimezoneAsInputDateTimeZone() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040113", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "UTC");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040113", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsZ_OutputTimezoneAsUTC() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33Z");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "UTC");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040113", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsWithOffset_OutputTimezoneAsUTC() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33-05:00");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "UTC");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040118", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsWithOffset_OutputTimezoneAsUTC() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33.123-05:00");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "UTC");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040118", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsEST() throws IOException {
     baseRecord.put("createTime", "2020-04-01T13:01:33.123Z");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "EST");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "EST");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("2020040109", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("2020040109", keyGen.getPartitionPath(baseRow));
   }
 
   @Test
-  public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() {
+  public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() throws IOException {
     baseRecord.put("createTime", "2020-04-01 13:01:33.123-05:00");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
-      "",
-      "",
-      "yyyyMMddHH",
-      "UTC");
-    Assertions.assertThrows(HoodieDeltaStreamerException.class, () -> new TimestampBasedKeyGenerator(properties).getKey(baseRecord));
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+        "",
+        "",
+        "yyyyMMddHH",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    Assertions.assertThrows(HoodieDeltaStreamerException.class, () -> keyGen.getKey(baseRecord));
+
+    baseRow = genericRecordToRow(baseRecord);
+    Assertions.assertThrows(HoodieDeltaStreamerException.class, () -> keyGen.getPartitionPath(baseRow));
   }
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ShortDate_OutputCustomDate() throws IOException {
     baseRecord.put("createTime", "20200401");
     properties = this.getBaseKeyConfig(
-      "DATE_STRING",
-      "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ,yyyyMMdd",
-      "",
-      "UTC",
-      "MM/dd/yyyy",
-      "UTC");
-    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
-
+        "DATE_STRING",
+        "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ,yyyyMMdd",
+        "",
+        "UTC",
+        "MM/dd/yyyy",
+        "UTC");
+    KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
+    HoodieKey hk1 = keyGen.getKey(baseRecord);
     Assertions.assertEquals("04/01/2020", hk1.getPartitionPath());
+
+    baseRow = genericRecordToRow(baseRecord);
+    assertEquals("04/01/2020", keyGen.getPartitionPath(baseRow));
   }
 }
\ No newline at end of file
diff --git a/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java b/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java
new file mode 100644
index 0000000..4c707f5
--- /dev/null
+++ b/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.testutils;
+
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.testutils.RawTripTestPayload;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.table.BulkInsertPartitioner;
+
+import org.apache.avro.Schema;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH;
+
+/**
+ * Test utils for data source tests.
+ */
+public class DataSourceTestUtils {
+
+  public static Option<String> convertToString(HoodieRecord record) {
+    try {
+      String str = ((RawTripTestPayload) record.getData()).getJsonData();
+      str = "{" + str.substring(str.indexOf("\"timestamp\":"));
+      // Remove the last } bracket
+      str = str.substring(0, str.length() - 1);
+      return Option.of(str + ", \"partition\": \"" + record.getPartitionPath() + "\"}");
+    } catch (IOException e) {
+      return Option.empty();
+    }
+  }
+
+  public static List<String> convertToStringList(List<HoodieRecord> records) {
+    return records.stream().map(DataSourceTestUtils::convertToString).filter(Option::isPresent).map(Option::get)
+        .collect(Collectors.toList());
+  }
+
+  public static List<String> convertKeysToStringList(List<HoodieKey> keys) {
+    return keys.stream()
+        .map(hr -> "{\"_row_key\":\"" + hr.getRecordKey() + "\",\"partition\":\"" + hr.getPartitionPath() + "\"}")
+        .collect(Collectors.toList());
+  }
+
+  public static class NoOpBulkInsertPartitioner<T extends HoodieRecordPayload>
+      implements BulkInsertPartitioner<T> {
+
+    @Override
+    public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
+      return records;
+    }
+
+    @Override
+    public boolean arePartitionRecordsSorted() {
+      return false;
+    }
+  }
+
+  public static Schema getStructTypeExampleSchema() throws IOException {
+    return new Schema.Parser().parse(FileIOUtils.readAsUTFString(DataSourceTestUtils.class.getResourceAsStream("/exampleSchema.txt")));
+  }
+
+  public static List<Row> generateRandomRows(int count) {
+    Random random = new Random();
+    List<Row> toReturn = new ArrayList<>();
+    List<String> partitions = Arrays.asList(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH});
+    for (int i = 0; i < count; i++) {
+      Object[] values = new Object[3];
+      values[0] = UUID.randomUUID().toString();
+      values[1] = partitions.get(random.nextInt(3));
+      values[2] = new Date().getTime();
+      toReturn.add(RowFactory.create(values));
+    }
+    return toReturn;
+  }
+}
diff --git a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java b/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
similarity index 51%
rename from hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
rename to hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
index c0d027e..53b2abf 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/keygen/TestKeyGeneratorUtilities.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
@@ -16,19 +16,32 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.keygen;
+package org.apache.hudi.testutils;
+
+import org.apache.hudi.AvroConversionHelper;
+import org.apache.hudi.AvroConversionUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
+import org.apache.spark.sql.types.StructType;
+
+import scala.Function1;
 
-public class TestKeyGeneratorUtilities {
+public class KeyGeneratorTestUtilities {
 
-  public String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
+  public static String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
       + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
       + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
       + "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
 
+  public static final String TEST_STRUCTNAME = "test_struct_name";
+  public static final String TEST_RECORD_NAMESPACE = "test_record_namespace";
+  public static Schema schema = new Schema.Parser().parse(exampleSchema);
+  public static StructType structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
+
   public GenericRecord getRecord() {
     GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
     record.put("timestamp", 4357686);
@@ -37,4 +50,19 @@ public class TestKeyGeneratorUtilities {
     record.put("pii_col", "pi");
     return record;
   }
+
+  public static Row getRow(GenericRecord record) {
+    return getRow(record, schema, structType);
+  }
+
+  public static Row getRow(GenericRecord record, Schema schema, StructType structType) {
+    Function1<Object, Object> converterFn = AvroConversionHelper.createConverterToRow(schema, structType);
+    Row row = (Row) converterFn.apply(record);
+    int fieldCount = structType.fieldNames().length;
+    Object[] values = new Object[fieldCount];
+    for (int i = 0; i < fieldCount; i++) {
+      values[i] = row.get(i);
+    }
+    return new GenericRowWithSchema(values, structType);
+  }
 }
diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-spark/src/test/resources/exampleSchema.txt
similarity index 69%
copy from hudi-common/src/test/resources/timestamp-test-evolved.avsc
copy to hudi-spark/src/test/resources/exampleSchema.txt
index 421c672..c7c0ff7 100644
--- a/hudi-common/src/test/resources/timestamp-test-evolved.avsc
+++ b/hudi-spark/src/test/resources/exampleSchema.txt
@@ -16,11 +16,21 @@
  * limitations under the License.
  */
 {
-  "namespace": "example.avro",
-  "type": "record",
-  "name": "User",
-  "fields": [
-    {"name": "field1", "type": ["null", "string"], "default": null},
-    {"name": "createTime", "type": ["null", "string"], "default": null}
-  ]
-}
\ No newline at end of file
+    "namespace": "example.schema",
+    "type": "record",
+    "name": "trip",
+    "fields": [
+        {
+            "name": "_row_key",
+            "type": "string"
+        },
+        {
+            "name": "partition",
+            "type": "string"
+        },
+        {
+            "name": "ts",
+            "type": ["long", "null"]
+        }
+    ]
+}
diff --git a/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala b/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
index a1229fb..73e1f5d 100644
--- a/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
+++ b/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
@@ -17,13 +17,18 @@
 
 package org.apache.hudi
 
+import java.util
+
 import org.apache.avro.generic.GenericRecord
+import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.common.config.TypedProperties
-import org.apache.hudi.common.model.{EmptyHoodieRecordPayload, OverwriteWithLatestAvroPayload}
+import org.apache.hudi.common.model.{EmptyHoodieRecordPayload, HoodieKey, OverwriteWithLatestAvroPayload}
 import org.apache.hudi.common.testutils.SchemaTestUtil
 import org.apache.hudi.common.util.Option
 import org.apache.hudi.exception.{HoodieException, HoodieKeyException}
-import org.apache.hudi.keygen.{ComplexKeyGenerator, GlobalDeleteKeyGenerator, SimpleKeyGenerator}
+import org.apache.hudi.keygen._
+import org.apache.hudi.testutils.KeyGeneratorTestUtilities
+import org.apache.spark.sql.Row
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.{BeforeEach, Test}
 import org.scalatest.Assertions.fail
@@ -34,14 +39,18 @@ import org.scalatest.Assertions.fail
 class TestDataSourceDefaults {
 
   val schema = SchemaTestUtil.getComplexEvolvedSchema
+  val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
   var baseRecord: GenericRecord = _
+  var baseRow: Row = _
+  val testStructName = "testStructName"
+  val testNamespace = "testNamespace"
 
   @BeforeEach def initialize(): Unit = {
     baseRecord = SchemaTestUtil
       .generateAvroRecordFromJson(schema, 1, "001", "f1")
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
   }
 
-
   private def getKeyConfig(recordKeyFieldName: String, partitionPathField: String, hiveStylePartitioning: String): TypedProperties = {
     val props = new TypedProperties()
     props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, recordKeyFieldName)
@@ -51,11 +60,16 @@ class TestDataSourceDefaults {
   }
 
   @Test def testSimpleKeyGenerator() = {
+
     // top level, valid fields
-    val hk1 = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false")).getKey(baseRecord)
+    var keyGen = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false"))
+    val hk1 = keyGen.getKey(baseRecord)
     assertEquals("field1", hk1.getRecordKey)
     assertEquals("name1", hk1.getPartitionPath)
 
+    assertEquals("field1", keyGen.getRecordKey(baseRow))
+    assertEquals("name1", keyGen.getPartitionPath(baseRow))
+
     // partition path field not specified
     try {
       val props = new TypedProperties()
@@ -64,7 +78,19 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: IllegalArgumentException =>
-        // do nothing
+      // do nothing
+    }
+
+    // partition path field not specified using Row
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      val keyGen = new SimpleKeyGenerator(props)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException =>
+      // do nothing
     }
 
     // recordkey field not specified
@@ -75,7 +101,19 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: IllegalArgumentException =>
-        // do nothing
+      // do nothing
+    }
+
+    // recordkey field not specified using Row
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionField")
+      val keyGen = new SimpleKeyGenerator(props)
+      keyGen.getPartitionPath(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException =>
+      // do nothing
     }
 
     // nested field as record key and partition path
@@ -91,7 +129,7 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieException =>
-        // do nothing
+      // do nothing
     }
 
     // if partition path can't be found, return default partition path
@@ -99,22 +137,44 @@ class TestDataSourceDefaults {
       .getKey(baseRecord)
     assertEquals("default", hk3.getPartitionPath)
 
+    // if partition path can't be found, return default partition path using row
+    keyGen = new SimpleKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.notThere", "false"))
+    val hk3_row = keyGen.getPartitionPath(baseRow)
+    assertEquals("default", hk3_row)
+
     // if enable hive style partitioning
     val hk4 = new SimpleKeyGenerator(getKeyConfig("field1", "name", "true")).getKey(baseRecord)
     assertEquals("name=name1", hk4.getPartitionPath)
 
+    // if enable hive style partitioning using row
+    keyGen = new SimpleKeyGenerator(getKeyConfig("field1", "name", "true"))
+    val hk4_row = keyGen.getPartitionPath(baseRow)
+    assertEquals("name=name1", hk4_row)
+
     // if partition is null, return default partition path
     baseRecord.put("name", "")
     val hk5 = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false"))
       .getKey(baseRecord)
     assertEquals("default", hk5.getPartitionPath)
 
+    // if partition is null, return default partition path using Row
+    keyGen = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false"))
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    val hk5_row = keyGen.getPartitionPath(baseRow)
+    assertEquals("default", hk5_row)
+
     // if partition is empty, return default partition path
     baseRecord.put("name", null)
     val hk6 = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false"))
       .getKey(baseRecord)
     assertEquals("default", hk6.getPartitionPath)
 
+    // if partition is empty, return default partition path using Row
+    keyGen = new SimpleKeyGenerator(getKeyConfig("field1", "name", "false"))
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    val hk6_row = keyGen.getPartitionPath(baseRow)
+    assertEquals("default", hk6_row)
+
     // if record key is empty, throw error
     try {
       baseRecord.put("field1", "")
@@ -125,7 +185,21 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieKeyException =>
-        // do nothing
+      // do nothing
+    }
+
+    // if record key is empty, throw error. Using Row
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "name")
+      keyGen = new SimpleKeyGenerator(props)
+      baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieKeyException =>
+      // do nothing
     }
 
     // if record key is null, throw error
@@ -138,16 +212,51 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieKeyException =>
-        // do nothing
+      // do nothing
+    }
+
+    // if record key is null, throw error. Using Row
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "name")
+      keyGen = new SimpleKeyGenerator(props)
+      baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieKeyException =>
+      // do nothing
+    }
+  }
+
+  @Test def testUserDefinedKeyGeneratorWorksWithRows(): Unit = {
+    val keyGen = new UserDefinedKeyGenerator(getKeyConfig("field1", "name", "false"))
+    assertEquals("field1", keyGen.getRecordKey(baseRow))
+    assertEquals("name1", keyGen.getPartitionPath(baseRow))
+  }
+
+  class UserDefinedKeyGenerator(props: TypedProperties) extends KeyGenerator(props) {
+    val recordKeyProp: String = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY)
+    val partitionPathProp: String = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY)
+
+    override def getKey(record: GenericRecord): HoodieKey = {
+      new HoodieKey(HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyProp, true),
+        HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathProp, true))
     }
   }
 
   @Test def testComplexKeyGenerator() = {
     // top level, valid fields
-    val hk1 = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    var keyGen = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk1 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:name1", hk1.getRecordKey)
     assertEquals("field1/name1", hk1.getPartitionPath)
 
+    // top level, valid fields with Row
+    assertEquals("field1:field1,name:name1", keyGen.getRecordKey(baseRow))
+    assertEquals("field1/name1", keyGen.getPartitionPath(baseRow))
+
     // partition path field not specified
     try {
       val props = new TypedProperties()
@@ -156,7 +265,19 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: IllegalArgumentException =>
-        // do nothing
+      // do nothing
+    }
+
+    // partition path field not specified using Row
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      val keyGen = new ComplexKeyGenerator(props)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException =>
+      // do nothing
     }
 
     // recordkey field not specified
@@ -167,15 +288,31 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: IllegalArgumentException =>
-        // do nothing
+      // do nothing
+    }
+
+    // recordkey field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionField")
+      val keyGen = new ComplexKeyGenerator(props)
+      keyGen.getPartitionPath(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException =>
+      // do nothing
     }
 
     // nested field as record key and partition path
-    val hk2 = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId,testNestedRecord.isAdmin", "testNestedRecord.userId,testNestedRecord.isAdmin", "false"))
-      .getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId,testNestedRecord.isAdmin", "testNestedRecord.userId,testNestedRecord.isAdmin", "false"))
+    val hk2 = keyGen.getKey(baseRecord)
     assertEquals("testNestedRecord.userId:UserId1@001,testNestedRecord.isAdmin:false", hk2.getRecordKey)
     assertEquals("UserId1@001/false", hk2.getPartitionPath)
 
+    // nested field as record key and partition path
+    assertEquals("testNestedRecord.userId:UserId1@001,testNestedRecord.isAdmin:false", keyGen.getRecordKey(baseRow))
+    assertEquals("UserId1@001/false", keyGen.getPartitionPath(baseRow))
+
     // Nested record key not found
     try {
       new ComplexKeyGenerator(getKeyConfig("testNestedRecord.NotThere", "testNestedRecord.isAdmin", "false"))
@@ -183,31 +320,57 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieException =>
-        // do nothing
+      // do nothing
+    }
+
+    // Nested record key not found
+    try {
+      val keyGen = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.NotThere", "testNestedRecord.isAdmin", "false"))
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieException =>
+      // do nothing
     }
 
     // if partition path can't be found, return default partition path
-    val hk3 = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.notThere", "false"))
-      .getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.notThere", "false"))
+    val hk3 = keyGen.getKey(baseRecord)
     assertEquals("default", hk3.getPartitionPath)
 
+    assertEquals("default", keyGen.getPartitionPath(baseRow))
+
     // if enable hive style partitioning
-    val hk4 = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "true")).getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "true"))
+    val hk4 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:name1", hk4.getRecordKey)
     assertEquals("field1=field1/name=name1", hk4.getPartitionPath)
 
+    assertEquals("field1:field1,name:name1", keyGen.getRecordKey(baseRow))
+    assertEquals("field1=field1/name=name1", keyGen.getPartitionPath(baseRow))
+
     // if one part of the record key is empty, replace with "__empty__"
     baseRecord.put("name", "")
-    val hk5 = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk5 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:__empty__", hk5.getRecordKey)
     assertEquals("field1/default", hk5.getPartitionPath)
 
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    assertEquals("field1:field1,name:__empty__", keyGen.getRecordKey(baseRow))
+    assertEquals("field1/default", keyGen.getPartitionPath(baseRow))
+
     // if one part of the record key is null, replace with "__null__"
     baseRecord.put("name", null)
-    val hk6 = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk6 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:__null__", hk6.getRecordKey)
     assertEquals("field1/default", hk6.getPartitionPath)
 
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    assertEquals("field1:field1,name:__null__", keyGen.getRecordKey(baseRow))
+    assertEquals("field1/default", keyGen.getPartitionPath(baseRow))
+
     // if all parts of the composite record key are null/empty, throw error
     try {
       baseRecord.put("name", "")
@@ -219,46 +382,89 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieKeyException =>
-        // do nothing
+      // do nothing
+    }
+
+    // if all parts of the composite record key are null/empty, throw error
+    try {
+      baseRecord.put("name", "")
+      baseRecord.put("field1", null)
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1,name")
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "field1,name")
+      keyGen = new ComplexKeyGenerator(props)
+      baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieKeyException =>
+      // do nothing
     }
 
     // reset name and field1 values.
     baseRecord.put("name", "name1")
     baseRecord.put("field1", "field1")
-    val hk7 = new ComplexKeyGenerator(getKeyConfig("field1, name", "field1, name", "false")).getKey(baseRecord)
+    keyGen = new ComplexKeyGenerator(getKeyConfig("field1, name", "field1, name", "false"))
+    val hk7 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:name1", hk7.getRecordKey)
     assertEquals("field1/name1", hk7.getPartitionPath)
 
-    val hk8 = new ComplexKeyGenerator(getKeyConfig("field1,", "field1,", "false")).getKey(baseRecord)
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    assertEquals("field1:field1,name:name1", keyGen.getRecordKey(baseRow))
+    assertEquals("field1/name1", keyGen.getPartitionPath(baseRow))
+
+    keyGen = new ComplexKeyGenerator(getKeyConfig("field1,", "field1,", "false"))
+    val hk8 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1", hk8.getRecordKey)
     assertEquals("field1", hk8.getPartitionPath)
+
+    assertEquals("field1:field1", keyGen.getRecordKey(baseRow))
+    assertEquals("field1", keyGen.getPartitionPath(baseRow))
   }
 
   @Test def testGlobalDeleteKeyGenerator() = {
     // top level, partition value included but not actually used
-    val hk1 = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    var keyGen = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk1 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:name1", hk1.getRecordKey)
     assertEquals("", hk1.getPartitionPath)
 
+    assertEquals("field1:field1,name:name1", keyGen.getRecordKey(baseRow))
+    assertEquals("", keyGen.getPartitionPath(baseRow))
+
     // top level, partition value not included
     val props = new TypedProperties()
     props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1,name")
-    val hk2 = new GlobalDeleteKeyGenerator(props).getKey(baseRecord)
+    keyGen = new GlobalDeleteKeyGenerator(props)
+    val hk2 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:name1", hk2.getRecordKey)
     assertEquals("", hk2.getPartitionPath)
 
+    assertEquals("field1:field1,name:name1", keyGen.getRecordKey(baseRow))
+    assertEquals("", keyGen.getPartitionPath(baseRow))
+
     // if one part of the record key is empty, replace with "__empty__"
     baseRecord.put("name", "")
-    val hk3 = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    keyGen = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk3 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:__empty__", hk3.getRecordKey)
     assertEquals("", hk3.getPartitionPath)
 
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    assertEquals("field1:field1,name:__empty__", keyGen.getRecordKey(baseRow))
+    assertEquals("", keyGen.getPartitionPath(baseRow))
+
     // if one part of the record key is null, replace with "__null__"
     baseRecord.put("name", null)
-    val hk4 = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false")).getKey(baseRecord)
+    keyGen = new GlobalDeleteKeyGenerator(getKeyConfig("field1,name", "field1,name", "false"))
+    val hk4 = keyGen.getKey(baseRecord)
     assertEquals("field1:field1,name:__null__", hk4.getRecordKey)
     assertEquals("", hk4.getPartitionPath)
 
+    baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+    assertEquals("field1:field1,name:__null__", keyGen.getRecordKey(baseRow))
+    assertEquals("", keyGen.getPartitionPath(baseRow))
+
     // recordkey field not specified
     try {
       val props = new TypedProperties()
@@ -267,7 +473,19 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: IllegalArgumentException =>
-        // do nothing
+      // do nothing
+    }
+
+    // recordkey field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionField")
+      val keyGen = new GlobalDeleteKeyGenerator(props)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException =>
+      // do nothing
     }
 
     // Nested record key not found
@@ -277,7 +495,17 @@ class TestDataSourceDefaults {
       fail("Should have errored out")
     } catch {
       case e: HoodieException =>
-        // do nothing
+      // do nothing
+    }
+
+    // Nested record key not found
+    try {
+      val keyGen = new GlobalDeleteKeyGenerator(getKeyConfig("testNestedRecord.NotThere", "testNestedRecord.isAdmin", "false"))
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieException =>
+      // do nothing
     }
 
     // if all parts of the composite record key are null/empty, throw error
@@ -292,6 +520,21 @@ class TestDataSourceDefaults {
       case e: HoodieKeyException =>
       // do nothing
     }
+
+    // if all parts of the composite record key are null/empty, throw error
+    try {
+      baseRecord.put("name", "")
+      baseRecord.put("field1", null)
+      baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1,name")
+      val keyGen = new GlobalDeleteKeyGenerator(props)
+      keyGen.getRecordKey(baseRow)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieKeyException =>
+      // do nothing
+    }
   }
 
   @Test def testOverwriteWithLatestAvroPayload() = {
diff --git a/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala b/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala
index feeec96..8995b7c 100644
--- a/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala
+++ b/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala
@@ -17,24 +17,28 @@
 
 package org.apache.hudi.functional
 
+import java.util
 import java.util.{Date, UUID}
 
 import org.apache.commons.io.FileUtils
 import org.apache.hudi.DataSourceWriteOptions._
-import org.apache.hudi.HoodieSparkSqlWriter
+import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
-import org.apache.spark.sql.{SaveMode, SparkSession}
+import org.apache.hudi.testutils.DataSourceTestUtils
+import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions, HoodieSparkSqlWriter, HoodieWriterUtils}
+import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 import org.scalatest.{FunSuite, Matchers}
 
 class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
 
   test("Parameters With Write Defaults") {
-    val originals = HoodieSparkSqlWriter.parametersWithWriteDefaults(Map.empty)
+    val originals = HoodieWriterUtils.parametersWithWriteDefaults(Map.empty)
     val rhsKey = "hoodie.right.hand.side.key"
     val rhsVal = "hoodie.right.hand.side.val"
     val modifier = Map(OPERATION_OPT_KEY -> INSERT_OPERATION_OPT_VAL, TABLE_TYPE_OPT_KEY -> MOR_TABLE_TYPE_OPT_VAL, rhsKey -> rhsVal)
-    val modified = HoodieSparkSqlWriter.parametersWithWriteDefaults(modifier)
+    val modified = HoodieWriterUtils.parametersWithWriteDefaults(modifier)
     val matcher = (k: String, v: String) => modified(k) should be(v)
 
     originals foreach {
@@ -77,7 +81,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
         HoodieWriteConfig.TABLE_NAME -> hoodieFooTableName,
         "hoodie.insert.shuffle.parallelism" -> "4",
         "hoodie.upsert.shuffle.parallelism" -> "4")
-      val fooTableParams = HoodieSparkSqlWriter.parametersWithWriteDefaults(fooTableModifier)
+      val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier)
       val dataFrame = session.createDataFrame(Seq(Test(UUID.randomUUID().toString, new Date().getTime)))
       HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableParams, dataFrame)
 
@@ -86,7 +90,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
         HoodieWriteConfig.TABLE_NAME -> "hoodie_bar_tbl",
         "hoodie.insert.shuffle.parallelism" -> "4",
         "hoodie.upsert.shuffle.parallelism" -> "4")
-      val barTableParams = HoodieSparkSqlWriter.parametersWithWriteDefaults(barTableModifier)
+      val barTableParams = HoodieWriterUtils.parametersWithWriteDefaults(barTableModifier)
       val dataFrame2 = session.createDataFrame(Seq(Test(UUID.randomUUID().toString, new Date().getTime)))
       val tableAlreadyExistException = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, barTableParams, dataFrame2))
       assert(tableAlreadyExistException.getMessage.contains("hoodie table with name " + hoodieFooTableName + " already exist"))
@@ -101,6 +105,129 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
     }
   }
 
+  test("test bulk insert dataset with datasource impl") {
+    val session = SparkSession.builder()
+      .appName("test_bulk_insert_datasource")
+      .master("local[2]")
+      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .getOrCreate()
+    val path = java.nio.file.Files.createTempDirectory("hoodie_test_path")
+    try {
+
+      val sqlContext = session.sqlContext
+      val sc = session.sparkContext
+      val hoodieFooTableName = "hoodie_foo_tbl"
+
+      //create a new table
+      val fooTableModifier = Map("path" -> path.toAbsolutePath.toString,
+        HoodieWriteConfig.TABLE_NAME -> hoodieFooTableName,
+        "hoodie.bulkinsert.shuffle.parallelism" -> "4",
+        DataSourceWriteOptions.OPERATION_OPT_KEY -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL,
+        DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY -> "true",
+        DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "_row_key",
+        DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "partition",
+        DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY -> "org.apache.hudi.keygen.SimpleKeyGenerator")
+      val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier)
+
+      // generate the inserts
+      val schema = DataSourceTestUtils.getStructTypeExampleSchema
+      val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
+      val records = DataSourceTestUtils.generateRandomRows(100)
+      val recordsSeq = convertRowListToSeq(records)
+      val df = session.createDataFrame(sc.parallelize(recordsSeq), structType)
+      // write to Hudi
+      HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableParams, df)
+
+      // collect all parition paths to issue read of parquet files
+      val partitions = Seq(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH,
+        HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH)
+      // Check the entire dataset has all records still
+      val fullPartitionPaths = new Array[String](3)
+      for (i <- 0 until fullPartitionPaths.length) {
+        fullPartitionPaths(i) = String.format("%s/%s/*", path.toAbsolutePath.toString, partitions(i))
+      }
+
+      // fetch all records from parquet files generated from write to hudi
+      val actualDf = session.sqlContext.read.parquet(fullPartitionPaths(0), fullPartitionPaths(1), fullPartitionPaths(2))
+
+      // remove metadata columns so that expected and actual DFs can be compared as is
+      val trimmedDf = actualDf.drop(HoodieRecord.HOODIE_META_COLUMNS.get(0)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(1))
+        .drop(HoodieRecord.HOODIE_META_COLUMNS.get(2)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(3))
+        .drop(HoodieRecord.HOODIE_META_COLUMNS.get(4))
+
+      assert(df.except(trimmedDf).count() == 0)
+    } finally {
+      session.stop()
+      FileUtils.deleteDirectory(path.toFile)
+    }
+  }
+
+  test("test bulk insert dataset with datasource impl multiple rounds") {
+    val session = SparkSession.builder()
+      .appName("test_bulk_insert_datasource")
+      .master("local[2]")
+      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .getOrCreate()
+    val path = java.nio.file.Files.createTempDirectory("hoodie_test_path")
+    try {
+
+      val sqlContext = session.sqlContext
+      val sc = session.sparkContext
+      val hoodieFooTableName = "hoodie_foo_tbl"
+
+      //create a new table
+      val fooTableModifier = Map("path" -> path.toAbsolutePath.toString,
+        HoodieWriteConfig.TABLE_NAME -> hoodieFooTableName,
+        "hoodie.bulkinsert.shuffle.parallelism" -> "4",
+        DataSourceWriteOptions.OPERATION_OPT_KEY -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL,
+        DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "_row_key",
+        DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "partition",
+        DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY -> "org.apache.hudi.keygen.SimpleKeyGenerator")
+      val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier)
+
+      val partitions = Seq(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH,
+        HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH)
+      val fullPartitionPaths = new Array[String](3)
+      for (i <- 0 to 2) {
+        fullPartitionPaths(i) = String.format("%s/%s/*", path.toAbsolutePath.toString, partitions(i))
+      }
+
+      val schema = DataSourceTestUtils.getStructTypeExampleSchema
+      val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
+      var totalExpectedDf = session.createDataFrame(sc.emptyRDD[Row], structType)
+
+      for (_ <- 0 to 2) {
+        // generate the inserts
+        val records = DataSourceTestUtils.generateRandomRows(200)
+        val recordsSeq = convertRowListToSeq(records)
+        val df = session.createDataFrame(sc.parallelize(recordsSeq), structType)
+        // write to Hudi
+        HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableParams, df)
+
+        // Fetch records from entire dataset
+        val actualDf = session.sqlContext.read.parquet(fullPartitionPaths(0), fullPartitionPaths(1), fullPartitionPaths(2))
+
+        // remove metadata columns so that expected and actual DFs can be compared as is
+        val trimmedDf = actualDf.drop(HoodieRecord.HOODIE_META_COLUMNS.get(0)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(1))
+          .drop(HoodieRecord.HOODIE_META_COLUMNS.get(2)).drop(HoodieRecord.HOODIE_META_COLUMNS.get(3))
+          .drop(HoodieRecord.HOODIE_META_COLUMNS.get(4))
+
+        // find total df (union from multiple rounds)
+        totalExpectedDf = totalExpectedDf.union(df)
+        // find mismatch between actual and expected df
+        assert(totalExpectedDf.except(trimmedDf).count() == 0)
+      }
+    } finally {
+      session.stop()
+      FileUtils.deleteDirectory(path.toFile)
+    }
+  }
+
   case class Test(uuid: String, ts: Long)
 
+  import scala.collection.JavaConverters
+
+  def convertRowListToSeq(inputList: util.List[Row]): Seq[Row] =
+    JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
+
 }
diff --git a/style/checkstyle-suppressions.xml b/style/checkstyle-suppressions.xml
index 30dc512..a0a7680 100644
--- a/style/checkstyle-suppressions.xml
+++ b/style/checkstyle-suppressions.xml
@@ -26,4 +26,6 @@
   <!-- Member Names expected to start with "_"  -->
   <suppress checks="naming" files="TestRecord.java" lines="1-9999"/>
   <suppress checks="IllegalImport" files="Option.java" />
+  <!-- java.util.Optional part of DataSource V2 API signature -->
+  <suppress checks="IllegalImport" files="DefaultSource.java" />
 </suppressions>