You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2021/11/25 01:57:16 UTC

[hudi] branch master updated: [HUDI-1290] Add Debezium Source for deltastreamer (#4063)

This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 83f8ed2  [HUDI-1290] Add Debezium Source for deltastreamer (#4063)
83f8ed2 is described below

commit 83f8ed2ae3ba7fb20813cbb8768deae6244b020c
Author: rmahindra123 <76...@users.noreply.github.com>
AuthorDate: Wed Nov 24 17:57:02 2021 -0800

    [HUDI-1290] Add Debezium Source for deltastreamer (#4063)
    
    * add source for postgres debezium
    
    * Add tests for debezium payload
    
    * Fix test
    
    * Fix test
    
    * Add tests for debezium source
    
    * Add tests for debezium source
    
    * Fix schema for debezium
    
    * Fix checkstyle issues
    
    * Fix config issue for schema registry
    
    * Add mysql source for debezium
    
    * Fix checkstyle issues an tests
    
    * Improve code for merging toasted values
    
    * Improve code for merging toasted values
    
    Co-authored-by: Rajesh Mahindra <rm...@Rajeshs-MacBook-Pro.local>
---
 .../debezium/AbstractDebeziumAvroPayload.java      |  89 ++++++++
 .../common/model/debezium/DebeziumConstants.java   |  81 +++++++
 .../model/debezium/MySqlDebeziumAvroPayload.java   |  67 ++++++
 .../debezium/PostgresDebeziumAvroPayload.java      | 130 +++++++++++
 .../debezium/TestMySqlDebeziumAvroPayload.java     | 131 +++++++++++
 .../debezium/TestPostgresDebeziumAvroPayload.java  | 181 +++++++++++++++
 .../utilities/schema/SchemaRegistryProvider.java   |   2 +-
 .../utilities/sources/debezium/DebeziumSource.java | 243 +++++++++++++++++++++
 .../sources/debezium/MysqlDebeziumSource.java      | 104 +++++++++
 .../sources/debezium/PostgresDebeziumSource.java   |  87 ++++++++
 .../debezium/TestAbstractDebeziumSource.java       | 209 ++++++++++++++++++
 .../sources/debezium/TestMysqlDebeziumSource.java  | 100 +++++++++
 .../debezium/TestPostgresDebeziumSource.java       |  97 ++++++++
 13 files changed, 1520 insertions(+), 1 deletion(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java
new file mode 100644
index 0000000..33f1d9f
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/AbstractDebeziumAvroPayload.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+
+/**
+ * Base class that provides support for seamlessly applying changes captured via Debezium.
+ * <p>
+ * Debezium change event types are determined for the op field in the payload
+ * <p>
+ * - For inserts, op=i
+ * - For deletes, op=d
+ * - For updates, op=u
+ * - For snapshort inserts, op=r
+ * <p>
+ * This payload implementation will issue matching insert, delete, updates against the hudi table
+ */
+public abstract class AbstractDebeziumAvroPayload extends OverwriteWithLatestAvroPayload {
+
+  private static final Logger LOG = LogManager.getLogger(AbstractDebeziumAvroPayload.class);
+
+  public AbstractDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
+    super(record, orderingVal);
+  }
+
+  public AbstractDebeziumAvroPayload(Option<GenericRecord> record) {
+    super(record);
+  }
+
+  @Override
+  public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    IndexedRecord insertRecord = getInsertRecord(schema);
+    return handleDeleteOperation(insertRecord);
+  }
+
+  @Override
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
+    // Step 1: If the time occurrence of the current record in storage is higher than the time occurrence of the
+    // insert record (including a delete record), pick the current record.
+    if (shouldPickCurrentRecord(currentValue, getInsertRecord(schema), schema)) {
+      return Option.of(currentValue);
+    }
+    // Step 2: Pick the insert record (as a delete record if its a deleted event)
+    return getInsertValue(schema);
+  }
+
+  protected abstract boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException;
+
+  private Option<IndexedRecord> handleDeleteOperation(IndexedRecord insertRecord) {
+    boolean delete = false;
+    if (insertRecord instanceof GenericRecord) {
+      GenericRecord record = (GenericRecord) insertRecord;
+      Object value = record.get(DebeziumConstants.FLATTENED_OP_COL_NAME);
+      delete = value != null && value.toString().equalsIgnoreCase(DebeziumConstants.DELETE_OP);
+    }
+
+    return delete ? Option.empty() : Option.of(insertRecord);
+  }
+
+  private IndexedRecord getInsertRecord(Schema schema) throws IOException {
+    return super.getInsertValue(schema).get();
+  }
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/DebeziumConstants.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/DebeziumConstants.java
new file mode 100644
index 0000000..d3e115e
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/DebeziumConstants.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Constants used by {@link DebeziumSource} and {@link DebeziumAvroPayload}.
+ */
+public class DebeziumConstants {
+
+  // INPUT COLUMNS
+  public static final String INCOMING_BEFORE_FIELD = "before";
+  public static final String INCOMING_AFTER_FIELD = "after";
+  public static final String INCOMING_SOURCE_FIELD = "source";
+  public static final String INCOMING_OP_FIELD = "op";
+  public static final String INCOMING_TS_MS_FIELD = "ts_ms";
+
+  public static final String INCOMING_SOURCE_NAME_FIELD = "source.name";
+  public static final String INCOMING_SOURCE_TS_MS_FIELD = "source.ts_ms";
+  public static final String INCOMING_SOURCE_TXID_FIELD = "source.txId";
+
+  // INPUT COLUMNS SPECIFIC TO MYSQL
+  public static final String INCOMING_SOURCE_FILE_FIELD = "source.file";
+  public static final String INCOMING_SOURCE_POS_FIELD = "source.pos";
+  public static final String INCOMING_SOURCE_ROW_FIELD = "source.row";
+
+  // INPUT COLUMNS SPECIFIC TO POSTGRES
+  public static final String INCOMING_SOURCE_LSN_FIELD = "source.lsn";
+  public static final String INCOMING_SOURCE_XMIN_FIELD = "source.xmin";
+
+  // OUTPUT COLUMNS
+  public static final String FLATTENED_OP_COL_NAME = "_change_operation_type";
+  public static final String UPSTREAM_PROCESSING_TS_COL_NAME = "_upstream_event_processed_ts_ms";
+  public static final String FLATTENED_SHARD_NAME = "db_shard_source_partition";
+  public static final String FLATTENED_TS_COL_NAME = "_event_origin_ts_ms";
+  public static final String FLATTENED_TX_ID_COL_NAME = "_event_tx_id";
+
+  // OUTPUT COLUMNS SPECIFIC TO MYSQL
+  public static final String FLATTENED_FILE_COL_NAME = "_event_bin_file";
+  public static final String FLATTENED_POS_COL_NAME = "_event_pos";
+  public static final String FLATTENED_ROW_COL_NAME = "_event_row";
+  public static final String ADDED_SEQ_COL_NAME = "_event_seq";
+
+  // OUTPUT COLUMNS SPECIFIC TO POSTGRES
+  public static final String FLATTENED_LSN_COL_NAME = "_event_lsn";
+  public static final String FLATTENED_XMIN_COL_NAME = "_event_xmin";
+
+  // Other Constants
+  public static final String DELETE_OP = "d";
+
+  // List of meta data columns
+  public static List<String> META_COLUMNS = Collections.unmodifiableList(Arrays.asList(
+      FLATTENED_OP_COL_NAME,
+      UPSTREAM_PROCESSING_TS_COL_NAME,
+      FLATTENED_TS_COL_NAME,
+      FLATTENED_TX_ID_COL_NAME,
+      FLATTENED_LSN_COL_NAME,
+      FLATTENED_XMIN_COL_NAME,
+      FLATTENED_SHARD_NAME
+  ));
+}
+
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/MySqlDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/MySqlDebeziumAvroPayload.java
new file mode 100644
index 0000000..ea6165d
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/MySqlDebeziumAvroPayload.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+
+/**
+ * Provides support for seamlessly applying changes captured via Debezium for MysqlDB.
+ * <p>
+ * Debezium change event types are determined for the op field in the payload
+ * <p>
+ * - For inserts, op=i
+ * - For deletes, op=d
+ * - For updates, op=u
+ * - For snapshort inserts, op=r
+ * <p>
+ * This payload implementation will issue matching insert, delete, updates against the hudi table
+ */
+public class MySqlDebeziumAvroPayload extends AbstractDebeziumAvroPayload {
+
+  private static final Logger LOG = LogManager.getLogger(MySqlDebeziumAvroPayload.class);
+
+  public MySqlDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
+    super(record, orderingVal);
+  }
+
+  public MySqlDebeziumAvroPayload(Option<GenericRecord> record) {
+    super(record);
+  }
+
+  private String extractSeq(IndexedRecord record) {
+    return ((CharSequence) ((GenericRecord) record).get(DebeziumConstants.ADDED_SEQ_COL_NAME)).toString();
+  }
+
+  @Override
+  protected boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException {
+    String currentSourceSeq = extractSeq(currentRecord);
+    String insertSourceSeq = extractSeq(insertRecord);
+    // Pick the current value in storage only if its Seq (file+pos) is latest
+    // compared to the Seq (file+pos) of the insert value
+    return insertSourceSeq.compareTo(currentSourceSeq) < 0;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java
new file mode 100644
index 0000000..448627d
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+/**
+ * Provides support for seamlessly applying changes captured via Debezium for PostgresDB.
+ * <p>
+ * Debezium change event types are determined for the op field in the payload
+ * <p>
+ * - For inserts, op=i
+ * - For deletes, op=d
+ * - For updates, op=u
+ * - For snapshort inserts, op=r
+ * <p>
+ * This payload implementation will issue matching insert, delete, updates against the hudi table
+ */
+public class PostgresDebeziumAvroPayload extends AbstractDebeziumAvroPayload {
+
+  private static final Logger LOG = LogManager.getLogger(PostgresDebeziumAvroPayload.class);
+  public static final String DEBEZIUM_TOASTED_VALUE = "__debezium_unavailable_value";
+
+  public PostgresDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
+    super(record, orderingVal);
+  }
+
+  public PostgresDebeziumAvroPayload(Option<GenericRecord> record) {
+    super(record);
+  }
+
+  private Long extractLSN(IndexedRecord record) {
+    GenericRecord genericRecord = (GenericRecord) record;
+    return (Long) genericRecord.get(DebeziumConstants.FLATTENED_LSN_COL_NAME);
+  }
+
+  @Override
+  protected boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException {
+    Long currentSourceLSN = extractLSN(currentRecord);
+    Long insertSourceLSN = extractLSN(insertRecord);
+
+    // Pick the current value in storage only if its LSN is latest compared to the LSN of the insert value
+    return insertSourceLSN < currentSourceLSN;
+  }
+
+  @Override
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
+    // Specific to Postgres: If the updated record has TOASTED columns,
+    // we will need to keep the previous value for those columns
+    // see https://debezium.io/documentation/reference/connectors/postgresql.html#postgresql-toasted-values
+    Option<IndexedRecord> insertOrDeleteRecord = super.combineAndGetUpdateValue(currentValue, schema);
+
+    if (insertOrDeleteRecord.isPresent()) {
+      mergeToastedValuesIfPresent(insertOrDeleteRecord.get(), currentValue);
+    }
+    return insertOrDeleteRecord;
+  }
+
+  private void mergeToastedValuesIfPresent(IndexedRecord incomingRecord, IndexedRecord currentRecord) {
+    List<Schema.Field> fields = incomingRecord.getSchema().getFields();
+
+    fields.forEach(field -> {
+      // There are only four avro data types that have unconstrained sizes, which are
+      // NON-NULLABLE STRING, NULLABLE STRING, NON-NULLABLE BYTES, NULLABLE BYTES
+      if (((GenericData.Record) incomingRecord).get(field.name()) != null
+          && (containsStringToastedValues(incomingRecord, field) || containsBytesToastedValues(incomingRecord, field))) {
+        ((GenericData.Record) incomingRecord).put(field.name(), ((GenericData.Record) currentRecord).get(field.name()));
+      }
+    });
+  }
+
+  /**
+   * Returns true if a column is either of type string or a union of one or more strings that contain a debezium toasted value.
+   *
+   * @param incomingRecord The incoming avro record
+   * @param field          the column of interest
+   * @return
+   */
+  private boolean containsStringToastedValues(IndexedRecord incomingRecord, Schema.Field field) {
+    return ((field.schema().getType() == Schema.Type.STRING
+        || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().stream().anyMatch(s -> s.getType() == Schema.Type.STRING)))
+        // Check length first as an optimization
+        && ((CharSequence) ((GenericData.Record) incomingRecord).get(field.name())).length() == DEBEZIUM_TOASTED_VALUE.length()
+        && DEBEZIUM_TOASTED_VALUE.equals(((CharSequence) ((GenericData.Record) incomingRecord).get(field.name())).toString()));
+  }
+
+  /**
+   * Returns true if a column is either of type bytes or a union of one or more bytes that contain a debezium toasted value.
+   *
+   * @param incomingRecord The incoming avro record
+   * @param field          the column of interest
+   * @return
+   */
+  private boolean containsBytesToastedValues(IndexedRecord incomingRecord, Schema.Field field) {
+    return ((field.schema().getType() == Schema.Type.BYTES
+        || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().stream().anyMatch(s -> s.getType() == Schema.Type.BYTES)))
+        // Check length first as an optimization
+        && ((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array().length == DEBEZIUM_TOASTED_VALUE.length()
+        && DEBEZIUM_TOASTED_VALUE.equals(new String(((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array(), StandardCharsets.UTF_8)));
+  }
+}
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestMySqlDebeziumAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestMySqlDebeziumAvroPayload.java
new file mode 100644
index 0000000..6163c0a
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestMySqlDebeziumAvroPayload.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or mo contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestMySqlDebeziumAvroPayload {
+
+  private static final String KEY_FIELD_NAME = "Key";
+
+  private Schema avroSchema;
+
+  @BeforeEach
+  void setUp() {
+    this.avroSchema = Schema.createRecord(Arrays.asList(
+        new Schema.Field(KEY_FIELD_NAME, Schema.create(Schema.Type.INT), "", 0),
+        new Schema.Field(DebeziumConstants.FLATTENED_OP_COL_NAME, Schema.create(Schema.Type.STRING), "", null),
+        new Schema.Field(DebeziumConstants.ADDED_SEQ_COL_NAME, Schema.create(Schema.Type.STRING), "", null)
+    ));
+  }
+
+  @Test
+  public void testInsert() throws IOException {
+    GenericRecord insertRecord = createRecord(0, Operation.INSERT, "00001.111");
+    MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(insertRecord, "00001.111");
+    validateRecord(payload.getInsertValue(avroSchema), 0, Operation.INSERT, "00001.111");
+  }
+
+  @Test
+  public void testPreCombine() {
+    GenericRecord insertRecord = createRecord(0, Operation.INSERT, "00002.111");
+    MySqlDebeziumAvroPayload insertPayload = new MySqlDebeziumAvroPayload(insertRecord, "00002.111");
+
+    GenericRecord updateRecord = createRecord(0, Operation.UPDATE, "00001.111");
+    MySqlDebeziumAvroPayload updatePayload = new MySqlDebeziumAvroPayload(updateRecord, "00001.111");
+
+    GenericRecord deleteRecord = createRecord(0, Operation.DELETE, "00002.11");
+    MySqlDebeziumAvroPayload deletePayload = new MySqlDebeziumAvroPayload(deleteRecord, "00002.11");
+
+    assertEquals(insertPayload, insertPayload.preCombine(updatePayload));
+    assertEquals(deletePayload, deletePayload.preCombine(updatePayload));
+    assertEquals(insertPayload, deletePayload.preCombine(insertPayload));
+  }
+
+  @Test
+  public void testMergeWithUpdate() throws IOException {
+    GenericRecord updateRecord = createRecord(1, Operation.UPDATE, "00002.11");
+    MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(updateRecord, "00002.11");
+
+    GenericRecord existingRecord = createRecord(1, Operation.INSERT, "00001.111");
+    Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 1, Operation.UPDATE, "00002.11");
+
+    GenericRecord lateRecord = createRecord(1, Operation.UPDATE, "00000.222");
+    payload = new MySqlDebeziumAvroPayload(lateRecord, "00000.222");
+    mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 1, Operation.INSERT, "00001.111");
+  }
+
+  @Test
+  public void testMergeWithDelete() throws IOException {
+    GenericRecord deleteRecord = createRecord(2, Operation.DELETE, "00002.11");
+    MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(deleteRecord, "00002.11");
+
+    GenericRecord existingRecord = createRecord(2, Operation.UPDATE, "00001.111");
+    Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    // expect nothing to be committed to table
+    assertFalse(mergedRecord.isPresent());
+
+    GenericRecord lateRecord = createRecord(2, Operation.DELETE, "00000.222");
+    payload = new MySqlDebeziumAvroPayload(lateRecord, "00000.222");
+    mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 2, Operation.UPDATE, "00001.111");
+  }
+
+  private GenericRecord createRecord(int primaryKeyValue, Operation op, String seqValue) {
+    GenericRecord record = new GenericData.Record(avroSchema);
+    record.put(KEY_FIELD_NAME, primaryKeyValue);
+    record.put(DebeziumConstants.FLATTENED_OP_COL_NAME, op.op);
+    record.put(DebeziumConstants.ADDED_SEQ_COL_NAME, seqValue);
+    return record;
+  }
+
+  private void validateRecord(Option<IndexedRecord> iRecord, int primaryKeyValue, Operation op, String seqValue) {
+    IndexedRecord record = iRecord.get();
+    assertEquals(primaryKeyValue, (int) record.get(0));
+    assertEquals(op.op, record.get(1).toString());
+    assertEquals(seqValue, record.get(2).toString());
+  }
+
+  private enum Operation {
+    INSERT("c"),
+    UPDATE("u"),
+    DELETE("d");
+
+    public final String op;
+
+    Operation(String op) {
+      this.op = op;
+    }
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java
new file mode 100644
index 0000000..07512b1
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or mo contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.model.debezium;
+
+import org.apache.hudi.common.util.Option;
+
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.util.Utf8;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class TestPostgresDebeziumAvroPayload {
+
+  private static final String KEY_FIELD_NAME = "Key";
+  private Schema avroSchema;
+
+  @BeforeEach
+  void setUp() {
+    this.avroSchema = Schema.createRecord(Arrays.asList(
+        new Schema.Field(KEY_FIELD_NAME, Schema.create(Schema.Type.INT), "", 0),
+        new Schema.Field(DebeziumConstants.FLATTENED_OP_COL_NAME, Schema.create(Schema.Type.STRING), "", null),
+        new Schema.Field(DebeziumConstants.FLATTENED_LSN_COL_NAME, Schema.create(Schema.Type.LONG), "", null)
+    ));
+  }
+
+  @Test
+  public void testInsert() throws IOException {
+    GenericRecord insertRecord = createRecord(0, Operation.INSERT, 100L);
+    PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(insertRecord, 100L);
+    validateRecord(payload.getInsertValue(avroSchema), 0, Operation.INSERT, 100L);
+  }
+
+  @Test
+  public void testPreCombine() {
+    GenericRecord insertRecord = createRecord(0, Operation.INSERT, 120L);
+    PostgresDebeziumAvroPayload insertPayload = new PostgresDebeziumAvroPayload(insertRecord, 120L);
+
+    GenericRecord updateRecord = createRecord(0, Operation.UPDATE, 99L);
+    PostgresDebeziumAvroPayload updatePayload = new PostgresDebeziumAvroPayload(updateRecord, 99L);
+
+    GenericRecord deleteRecord = createRecord(0, Operation.DELETE, 111L);
+    PostgresDebeziumAvroPayload deletePayload = new PostgresDebeziumAvroPayload(deleteRecord, 111L);
+
+    assertEquals(insertPayload, insertPayload.preCombine(updatePayload));
+    assertEquals(deletePayload, deletePayload.preCombine(updatePayload));
+    assertEquals(insertPayload, deletePayload.preCombine(insertPayload));
+  }
+
+  @Test
+  public void testMergeWithUpdate() throws IOException {
+    GenericRecord updateRecord = createRecord(1, Operation.UPDATE, 100L);
+    PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(updateRecord, 100L);
+
+    GenericRecord existingRecord = createRecord(1, Operation.INSERT, 99L);
+    Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 1, Operation.UPDATE, 100L);
+
+    GenericRecord lateRecord = createRecord(1, Operation.UPDATE, 98L);
+    payload = new PostgresDebeziumAvroPayload(lateRecord, 98L);
+    mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 1, Operation.INSERT, 99L);
+  }
+
+  @Test
+  public void testMergeWithDelete() throws IOException {
+    GenericRecord deleteRecord = createRecord(2, Operation.DELETE, 100L);
+    PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(deleteRecord, 100L);
+
+    GenericRecord existingRecord = createRecord(2, Operation.UPDATE, 99L);
+    Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    // expect nothing to be committed to table
+    assertFalse(mergedRecord.isPresent());
+
+    GenericRecord lateRecord = createRecord(2, Operation.DELETE, 98L);
+    payload = new PostgresDebeziumAvroPayload(lateRecord, 98L);
+    mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
+    validateRecord(mergedRecord, 2, Operation.UPDATE, 99L);
+  }
+
+  @Test
+  public void testMergeWithToastedValues() throws IOException {
+    Schema avroSchema = SchemaBuilder.builder()
+        .record("test_schema")
+        .namespace("test_namespace")
+        .fields()
+        .name(DebeziumConstants.FLATTENED_LSN_COL_NAME).type().longType().noDefault()
+        .name("string_col").type().stringType().noDefault()
+        .name("byte_col").type().bytesType().noDefault()
+        .name("string_null_col_1").type().nullable().stringType().noDefault()
+        .name("byte_null_col_1").type().nullable().bytesType().noDefault()
+        .name("string_null_col_2").type().nullable().stringType().noDefault()
+        .name("byte_null_col_2").type().nullable().bytesType().noDefault()
+        .endRecord();
+
+    GenericRecord oldVal = new GenericData.Record(avroSchema);
+    oldVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 100L);
+    oldVal.put("string_col", "valid string value");
+    oldVal.put("byte_col", ByteBuffer.wrap("valid byte value".getBytes()));
+    oldVal.put("string_null_col_1", "valid string value");
+    oldVal.put("byte_null_col_1", ByteBuffer.wrap("valid byte value".getBytes()));
+    oldVal.put("string_null_col_2", null);
+    oldVal.put("byte_null_col_2", null);
+
+    GenericRecord newVal = new GenericData.Record(avroSchema);
+    newVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 105L);
+    newVal.put("string_col", PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE);
+    newVal.put("byte_col", ByteBuffer.wrap(PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE.getBytes()));
+    newVal.put("string_null_col_1", null);
+    newVal.put("byte_null_col_1", null);
+    newVal.put("string_null_col_2", "valid string value");
+    newVal.put("byte_null_col_2", ByteBuffer.wrap("valid byte value".getBytes()));
+
+    PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(Option.of(newVal));
+
+    GenericRecord outputRecord = (GenericRecord) payload
+        .combineAndGetUpdateValue(oldVal, avroSchema).get();
+
+    assertEquals("valid string value", outputRecord.get("string_col"));
+    assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_col")).array(), StandardCharsets.UTF_8));
+    assertNull(outputRecord.get("string_null_col_1"));
+    assertNull(outputRecord.get("byte_null_col_1"));
+    assertEquals("valid string value", ((Utf8) outputRecord.get("string_null_col_2")).toString());
+    assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_null_col_2")).array(), StandardCharsets.UTF_8));
+  }
+
+  private GenericRecord createRecord(int primaryKeyValue, Operation op, long lsnValue) {
+    GenericRecord record = new GenericData.Record(avroSchema);
+    record.put(KEY_FIELD_NAME, primaryKeyValue);
+    record.put(DebeziumConstants.FLATTENED_OP_COL_NAME, op.op);
+    record.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, lsnValue);
+    return record;
+  }
+
+  private void validateRecord(Option<IndexedRecord> iRecord, int primaryKeyValue, Operation op, long lsnValue) {
+    IndexedRecord record = iRecord.get();
+    assertEquals(primaryKeyValue, (int) record.get(0));
+    assertEquals(op.op, record.get(1).toString());
+    assertEquals(lsnValue, (long) record.get(2));
+  }
+
+  private enum Operation {
+    INSERT("c"),
+    UPDATE("u"),
+    DELETE("d");
+
+    public final String op;
+
+    Operation(String op) {
+      this.op = op;
+    }
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
index 32ef609..2163692 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
@@ -49,7 +49,7 @@ public class SchemaRegistryProvider extends SchemaProvider {
    */
   public static class Config {
 
-    private static final String SRC_SCHEMA_REGISTRY_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.url";
+    public static final String SRC_SCHEMA_REGISTRY_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.url";
     private static final String TARGET_SCHEMA_REGISTRY_URL_PROP =
         "hoodie.deltastreamer.schemaprovider.registry.targetUrl";
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java
new file mode 100644
index 0000000..7018419
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.generic.GenericRecord;
+
+import org.apache.hudi.AvroConversionUtils;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.schema.SchemaRegistryProvider;
+import org.apache.hudi.utilities.sources.RowSource;
+import org.apache.hudi.utilities.sources.helpers.AvroConvertor;
+import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
+import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
+
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.kafka010.OffsetRange;
+
+/**
+ * Base class for Debezium streaming source which expects change events as Kafka Avro records.
+ * Obtains the schema from the confluent schema-registry.
+ */
+public abstract class DebeziumSource extends RowSource {
+
+  private static final Logger LOG = LogManager.getLogger(DebeziumSource.class);
+  // these are native kafka's config. do not change the config names.
+  private static final String NATIVE_KAFKA_KEY_DESERIALIZER_PROP = "key.deserializer";
+  private static final String NATIVE_KAFKA_VALUE_DESERIALIZER_PROP = "value.deserializer";
+  private static final String OVERRIDE_CHECKPOINT_STRING = "hoodie.debezium.override.initial.checkpoint.key";
+  private static final String CONNECT_NAME_KEY = "connect.name";
+  private static final String DATE_CONNECT_NAME = "custom.debezium.DateString";
+
+  private final KafkaOffsetGen offsetGen;
+  private final HoodieDeltaStreamerMetrics metrics;
+  private final SchemaRegistryProvider schemaRegistryProvider;
+  private final String deserializerClassName;
+
+  public DebeziumSource(TypedProperties props, JavaSparkContext sparkContext,
+                        SparkSession sparkSession,
+                        SchemaProvider schemaProvider,
+                        HoodieDeltaStreamerMetrics metrics) {
+    super(props, sparkContext, sparkSession, schemaProvider);
+
+    props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class);
+    deserializerClassName = props.getString(DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().key(),
+        DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().defaultValue());
+
+    try {
+      props.put(NATIVE_KAFKA_VALUE_DESERIALIZER_PROP, Class.forName(deserializerClassName));
+    } catch (ClassNotFoundException e) {
+      String error = "Could not load custom avro kafka deserializer: " + deserializerClassName;
+      LOG.error(error);
+      throw new HoodieException(error, e);
+    }
+
+    // Currently, debezium source requires Confluent/Kafka schema-registry to fetch the latest schema.
+    if (schemaProvider == null || !(schemaProvider instanceof SchemaRegistryProvider)) {
+      schemaRegistryProvider = new SchemaRegistryProvider(props, sparkContext);
+    } else {
+      schemaRegistryProvider = (SchemaRegistryProvider) schemaProvider;
+    }
+
+    offsetGen = new KafkaOffsetGen(props);
+    this.metrics = metrics;
+  }
+
+  @Override
+  protected Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkptStr, long sourceLimit) {
+    String overrideCheckpointStr = props.getString(OVERRIDE_CHECKPOINT_STRING, "");
+
+    OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCkptStr, sourceLimit, metrics);
+    long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
+    LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
+
+    if (totalNewMsgs == 0) {
+      // If there are no new messages, use empty dataframe with no schema. This is because the schema from schema registry can only be considered
+      // up to date if a change event has occurred.
+      return Pair.of(Option.of(sparkSession.emptyDataFrame()), overrideCheckpointStr.isEmpty() ? CheckpointUtils.offsetsToStr(offsetRanges) : overrideCheckpointStr);
+    } else {
+      try {
+        String schemaStr = schemaRegistryProvider.fetchSchemaFromRegistry(props.getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
+        Dataset<Row> dataset = toDataset(offsetRanges, offsetGen, schemaStr);
+        LOG.info(String.format("Spark schema of Kafka Payload for topic %s:\n%s", offsetGen.getTopicName(), dataset.schema().treeString()));
+        LOG.info(String.format("New checkpoint string: %s", CheckpointUtils.offsetsToStr(offsetRanges)));
+        return Pair.of(Option.of(dataset), overrideCheckpointStr.isEmpty() ? CheckpointUtils.offsetsToStr(offsetRanges) : overrideCheckpointStr);
+      } catch (IOException exc) {
+        LOG.error("Fatal error reading and parsing incoming debezium event", exc);
+        throw new HoodieException("Fatal error reading and parsing incoming debezium event", exc);
+      }
+    }
+  }
+
+  /**
+   * Debezium Kafka Payload has a nested structure, flatten it specific to the Database type.
+   * @param rawKafkaData Dataset of the Debezium CDC event from the kafka
+   * @return A flattened dataset.
+   */
+  protected abstract Dataset<Row> processDataset(Dataset<Row> rawKafkaData);
+
+  /**
+   * Converts a Kafka Topic offset into a Spark dataset.
+   *
+   * @param offsetRanges Offset ranges
+   * @param offsetGen    KafkaOffsetGen
+   * @return Spark dataset
+   */
+  private Dataset<Row> toDataset(OffsetRange[] offsetRanges, KafkaOffsetGen offsetGen, String schemaStr) {
+    AvroConvertor convertor = new AvroConvertor(schemaStr);
+    Dataset<Row> kafkaData;
+    if (deserializerClassName.equals(StringDeserializer.class.getName())) {
+      kafkaData = AvroConversionUtils.createDataFrame(
+          KafkaUtils.<String, String>createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent())
+              .map(obj -> convertor.fromJson(obj.value()))
+              .rdd(), schemaStr, sparkSession);
+    } else {
+      kafkaData = AvroConversionUtils.createDataFrame(
+          KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent())
+              .map(obj -> (GenericRecord) obj.value())
+              .rdd(), schemaStr, sparkSession);
+    }
+
+    // Flatten debezium payload, specific to each DB type (postgres/ mysql/ etc..)
+    Dataset<Row> debeziumDataset = processDataset(kafkaData);
+
+    // Some required transformations to ensure debezium data types are converted to spark supported types.
+    return convertArrayColumnsToString(convertColumnToNullable(sparkSession,
+        convertDateColumns(debeziumDataset, new Schema.Parser().parse(schemaStr))));
+  }
+
+  /**
+   * Converts string formatted date columns into Spark date columns.
+   *
+   * @param dataset Spark dataset
+   * @param schema  Avro schema from Debezium
+   * @return Converted dataset
+   */
+  public static Dataset<Row> convertDateColumns(Dataset<Row> dataset, Schema schema) {
+    if (schema.getField("before") != null) {
+      List<String> dateFields = schema.getField("before")
+          .schema()
+          .getTypes()
+          .get(1)
+          .getFields()
+          .stream()
+          .filter(field -> {
+            if (field.schema().getType() == Type.UNION) {
+              return field.schema().getTypes().stream().anyMatch(
+                  schemaInUnion -> DATE_CONNECT_NAME.equals(schemaInUnion.getProp(CONNECT_NAME_KEY))
+              );
+            } else {
+              return DATE_CONNECT_NAME.equals(field.schema().getProp(CONNECT_NAME_KEY));
+            }
+          }).map(Field::name).collect(Collectors.toList());
+
+      LOG.info("Date fields: " + dateFields.toString());
+
+      for (String dateCol : dateFields) {
+        dataset = dataset.withColumn(dateCol, functions.col(dateCol).cast(DataTypes.DateType));
+      }
+    }
+
+    return dataset;
+  }
+
+  /**
+   * Utility function for converting columns to nullable. This is useful when required to make a column nullable to match a nullable column from Debezium change
+   * events.
+   *
+   * @param sparkSession SparkSession object
+   * @param dataset      Dataframe to modify
+   * @return Modified dataframe
+   */
+  private static Dataset<Row> convertColumnToNullable(SparkSession sparkSession, Dataset<Row> dataset) {
+    List<String> columns = Arrays.asList(dataset.columns());
+    StructField[] modifiedStructFields = Arrays.stream(dataset.schema().fields()).map(field -> columns
+        .contains(field.name()) ? new StructField(field.name(), field.dataType(), true, field.metadata()) : field)
+        .toArray(StructField[]::new);
+
+    return sparkSession.createDataFrame(dataset.rdd(), new StructType(modifiedStructFields));
+  }
+
+  /**
+   * Converts Array types to String types because not all Debezium array columns are supported to be converted
+   * to Spark array columns.
+   *
+   * @param dataset Dataframe to modify
+   * @return Modified dataframe
+   */
+  private static Dataset<Row> convertArrayColumnsToString(Dataset<Row> dataset) {
+    List<String> arrayColumns = Arrays.stream(dataset.schema().fields())
+        .filter(field -> field.dataType().typeName().toLowerCase().startsWith("array"))
+        .map(StructField::name)
+        .collect(Collectors.toList());
+
+    for (String colName : arrayColumns) {
+      dataset = dataset.withColumn(colName, functions.col(colName).cast(DataTypes.StringType));
+    }
+
+    return dataset;
+  }
+}
+
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/MysqlDebeziumSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/MysqlDebeziumSource.java
new file mode 100644
index 0000000..cc8f645
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/MysqlDebeziumSource.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.debezium.DebeziumConstants;
+import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.api.java.UDF2;
+import org.apache.spark.sql.types.DataTypes;
+
+import static org.apache.spark.sql.functions.callUDF;
+
+/**
+ * Source for incrementally ingesting debezium generated change logs for Mysql DB.
+ */
+public class MysqlDebeziumSource extends DebeziumSource {
+
+  private final SQLContext sqlContext;
+  private final String generateUniqueSeqUdfFn = "mysql_generate_order_key";
+
+  public MysqlDebeziumSource(TypedProperties props, JavaSparkContext sparkContext,
+                             SparkSession sparkSession,
+                             SchemaProvider schemaProvider,
+                             HoodieDeltaStreamerMetrics metrics) {
+    super(props, sparkContext, sparkSession, schemaProvider, metrics);
+    this.sqlContext = sparkSession.sqlContext();
+    sqlContext.udf().register(generateUniqueSeqUdfFn, (UDF2<String, Long, String>) MysqlDebeziumSource::generateUniqueSequence, DataTypes.StringType);
+  }
+
+  /**
+   * Debezium Kafka Payload has a nested structure (see https://debezium.io/documentation/reference/1.4/connectors/mysql.html).
+   * This function flattens this nested structure for the Mysql data, and also extracts a subset of Debezium metadata fields.
+   *
+   * @param rowDataset Dataset containing Debezium Payloads
+   * @return New dataset with flattened columns
+   */
+  @Override
+  protected Dataset<Row> processDataset(Dataset<Row> rowDataset) {
+    Dataset<Row> flattenedDataset = rowDataset;
+    if (rowDataset.columns().length > 0) {
+      // Only flatten for non-empty schemas
+      Dataset<Row> insertedOrUpdatedData = rowDataset
+          .selectExpr(
+              String.format("%s as %s", DebeziumConstants.INCOMING_OP_FIELD, DebeziumConstants.FLATTENED_OP_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_TS_MS_FIELD, DebeziumConstants.UPSTREAM_PROCESSING_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_NAME_FIELD, DebeziumConstants.FLATTENED_SHARD_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TS_MS_FIELD, DebeziumConstants.FLATTENED_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TXID_FIELD, DebeziumConstants.FLATTENED_TX_ID_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_FILE_FIELD, DebeziumConstants.FLATTENED_FILE_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_POS_FIELD, DebeziumConstants.FLATTENED_POS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_ROW_FIELD, DebeziumConstants.FLATTENED_ROW_COL_NAME),
+              String.format("%s.*", DebeziumConstants.INCOMING_AFTER_FIELD)
+          )
+          .filter(rowDataset.col(DebeziumConstants.INCOMING_OP_FIELD).notEqual(DebeziumConstants.DELETE_OP));
+
+      Dataset<Row> deletedData = rowDataset
+          .selectExpr(
+              String.format("%s as %s", DebeziumConstants.INCOMING_OP_FIELD, DebeziumConstants.FLATTENED_OP_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_TS_MS_FIELD, DebeziumConstants.UPSTREAM_PROCESSING_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_NAME_FIELD, DebeziumConstants.FLATTENED_SHARD_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TS_MS_FIELD, DebeziumConstants.FLATTENED_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TXID_FIELD, DebeziumConstants.FLATTENED_TX_ID_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_FILE_FIELD, DebeziumConstants.FLATTENED_FILE_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_POS_FIELD, DebeziumConstants.FLATTENED_POS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_ROW_FIELD, DebeziumConstants.FLATTENED_ROW_COL_NAME),
+              String.format("%s.*", DebeziumConstants.INCOMING_BEFORE_FIELD)
+          )
+          .filter(rowDataset.col(DebeziumConstants.INCOMING_OP_FIELD).equalTo(DebeziumConstants.DELETE_OP));
+
+      flattenedDataset = insertedOrUpdatedData.union(deletedData);
+    }
+
+    return flattenedDataset.withColumn(DebeziumConstants.ADDED_SEQ_COL_NAME,
+            callUDF(generateUniqueSeqUdfFn, flattenedDataset.col(DebeziumConstants.FLATTENED_FILE_COL_NAME),
+                flattenedDataset.col(DebeziumConstants.FLATTENED_POS_COL_NAME)));
+  }
+
+  private static String generateUniqueSequence(String fileId, Long pos) {
+    return fileId.substring(fileId.lastIndexOf('.') + 1).concat("." + pos);
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/PostgresDebeziumSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/PostgresDebeziumSource.java
new file mode 100644
index 0000000..bf43817
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/PostgresDebeziumSource.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.debezium.DebeziumConstants;
+import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Source for incrementally ingesting debezium generated change logs for PostgresDB.
+ */
+public class PostgresDebeziumSource extends DebeziumSource {
+
+  public PostgresDebeziumSource(TypedProperties props, JavaSparkContext sparkContext,
+                                SparkSession sparkSession,
+                                SchemaProvider schemaProvider,
+                                HoodieDeltaStreamerMetrics metrics) {
+    super(props, sparkContext, sparkSession, schemaProvider, metrics);
+  }
+
+  /**
+   * Debezium Kafka Payload has a nested structure (see https://debezium.io/documentation/reference/1.4/connectors/postgresql.html#postgresql-create-events).
+   * This function flattens this nested structure for the Postgres data, and also extracts a subset of Debezium metadata fields.
+   *
+   * @param rowDataset Dataset containing Debezium Payloads
+   * @return New dataset with flattened columns
+   */
+  @Override
+  protected Dataset<Row> processDataset(Dataset<Row> rowDataset) {
+    if (rowDataset.columns().length > 0) {
+      // Pick selective debezium and postgres meta fields: pick the row values from before field for delete record
+      // and row values from after field for insert or update records.
+      Dataset<Row> insertedOrUpdatedData = rowDataset
+          .selectExpr(
+              String.format("%s as %s", DebeziumConstants.INCOMING_OP_FIELD, DebeziumConstants.FLATTENED_OP_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_TS_MS_FIELD, DebeziumConstants.UPSTREAM_PROCESSING_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_NAME_FIELD, DebeziumConstants.FLATTENED_SHARD_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TS_MS_FIELD, DebeziumConstants.FLATTENED_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TXID_FIELD, DebeziumConstants.FLATTENED_TX_ID_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_LSN_FIELD, DebeziumConstants.FLATTENED_LSN_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_XMIN_FIELD, DebeziumConstants.FLATTENED_XMIN_COL_NAME),
+              String.format("%s.*", DebeziumConstants.INCOMING_AFTER_FIELD)
+          )
+          .filter(rowDataset.col(DebeziumConstants.INCOMING_OP_FIELD).notEqual(DebeziumConstants.DELETE_OP));
+
+      Dataset<Row> deletedData = rowDataset
+          .selectExpr(
+              String.format("%s as %s", DebeziumConstants.INCOMING_OP_FIELD, DebeziumConstants.FLATTENED_OP_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_TS_MS_FIELD, DebeziumConstants.UPSTREAM_PROCESSING_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_NAME_FIELD, DebeziumConstants.FLATTENED_SHARD_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TS_MS_FIELD, DebeziumConstants.FLATTENED_TS_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_TXID_FIELD, DebeziumConstants.FLATTENED_TX_ID_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_LSN_FIELD, DebeziumConstants.FLATTENED_LSN_COL_NAME),
+              String.format("%s as %s", DebeziumConstants.INCOMING_SOURCE_XMIN_FIELD, DebeziumConstants.FLATTENED_XMIN_COL_NAME),
+              String.format("%s.*", DebeziumConstants.INCOMING_BEFORE_FIELD)
+          )
+          .filter(rowDataset.col(DebeziumConstants.INCOMING_OP_FIELD).equalTo(DebeziumConstants.DELETE_OP));
+
+      return insertedOrUpdatedData.union(deletedData);
+    } else {
+      return rowDataset;
+    }
+  }
+}
+
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java
new file mode 100644
index 0000000..f7dc6b9
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestAbstractDebeziumSource.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.debezium.DebeziumConstants;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.utilities.UtilHelpers;
+import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
+import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
+import org.apache.hudi.utilities.schema.SchemaProvider;
+import org.apache.hudi.utilities.schema.SchemaRegistryProvider;
+import org.apache.hudi.utilities.sources.InputBatch;
+import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.streaming.kafka010.KafkaTestUtils;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.UUID;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.params.provider.Arguments.arguments;
+import static org.mockito.Mockito.mock;
+
+public abstract class TestAbstractDebeziumSource extends UtilitiesTestBase {
+
+  private static final String TEST_TOPIC_NAME = "hoodie_test";
+
+  private final HoodieDeltaStreamerMetrics metrics = mock(HoodieDeltaStreamerMetrics.class);
+  private KafkaTestUtils testUtils;
+
+  @BeforeAll
+  public static void initClass() throws Exception {
+    UtilitiesTestBase.initClass(false);
+  }
+
+  @AfterAll
+  public static void cleanupClass() {
+    UtilitiesTestBase.cleanupClass();
+  }
+
+  @BeforeEach
+  public void setup() throws Exception {
+    super.setup();
+    testUtils = new KafkaTestUtils();
+    testUtils.setup();
+  }
+
+  @AfterEach
+  public void teardown() throws Exception {
+    super.teardown();
+    testUtils.teardown();
+  }
+
+  private TypedProperties createPropsForJsonSource() {
+    TypedProperties props = new TypedProperties();
+    props.setProperty("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME);
+    props.setProperty("bootstrap.servers", testUtils.brokerAddress());
+    props.setProperty("auto.offset.reset", "earliest");
+    props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+    props.setProperty("hoodie.deltastreamer.schemaprovider.registry.url", "localhost");
+    props.setProperty("hoodie.deltastreamer.source.kafka.value.deserializer.class", StringDeserializer.class.getName());
+    props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
+
+    return props;
+  }
+
+  protected abstract String getIndexName();
+
+  protected abstract String getSourceClass();
+
+  protected abstract String getSchema();
+
+  protected abstract GenericRecord generateMetaFields(GenericRecord record);
+
+  protected abstract void validateMetaFields(Dataset<Row> records);
+
+  @ParameterizedTest
+  @MethodSource("testArguments")
+  public void testDebeziumEvents(Operation operation) throws Exception {
+
+    String sourceClass = getSourceClass();
+
+    // topic setup.
+    testUtils.createTopic(TEST_TOPIC_NAME, 2);
+    TypedProperties props = createPropsForJsonSource();
+
+    SchemaProvider schemaProvider = new MockSchemaRegistryProvider(props, jsc, this);
+    SourceFormatAdapter debeziumSource = new SourceFormatAdapter(UtilHelpers.createSource(sourceClass, props, jsc, sparkSession, schemaProvider, metrics));
+
+    testUtils.sendMessages(TEST_TOPIC_NAME, new String[] {generateDebeziumEvent(operation).toString()});
+
+    InputBatch<Dataset<Row>> fetch = debeziumSource.fetchNewDataInRowFormat(Option.empty(), 10);
+    assertEquals(1, fetch.getBatch().get().count());
+
+    // Ensure the before fields are picked for DELETE CDC Events,
+    // and after fields are picked for INSERT and UPDATE CDC Events.
+    final String fieldPrefix = (operation.equals(Operation.DELETE)) ? "before_" : "after_";
+    assertTrue(fetch.getBatch().get().select("type").collectAsList().stream()
+        .allMatch(r -> r.getString(0).startsWith(fieldPrefix)));
+    assertTrue(fetch.getBatch().get().select("type").collectAsList().stream()
+        .allMatch(r -> r.getString(0).startsWith(fieldPrefix)));
+
+    // Validate DB specific meta fields
+    validateMetaFields(fetch.getBatch().get());
+  }
+
+  private GenericRecord generateDebeziumEvent(Operation op) {
+    Schema schema = new Schema.Parser().parse(getSchema());
+    String indexName = getIndexName().concat(".ghschema.gharchive.Value");
+    GenericRecord rec = new GenericData.Record(schema);
+    rec.put(DebeziumConstants.INCOMING_OP_FIELD, op.op);
+    rec.put(DebeziumConstants.INCOMING_TS_MS_FIELD, 100L);
+
+    // Before
+    Schema.Field beforeField = schema.getField(DebeziumConstants.INCOMING_BEFORE_FIELD);
+    Schema beforeSchema = beforeField.schema().getTypes().get(beforeField.schema().getIndexNamed(indexName));
+    GenericRecord beforeRecord = new GenericData.Record(beforeSchema);
+
+    beforeRecord.put("id", 1);
+    beforeRecord.put("date", "1/1/2020");
+    beforeRecord.put("type", "before_type");
+    beforeRecord.put("payload", "before_payload");
+    beforeRecord.put("timestamp", 1000L);
+    rec.put(DebeziumConstants.INCOMING_BEFORE_FIELD, beforeRecord);
+
+    // After
+    Schema.Field afterField = schema.getField(DebeziumConstants.INCOMING_AFTER_FIELD);
+    Schema afterSchema = afterField.schema().getTypes().get(afterField.schema().getIndexNamed(indexName));
+    GenericRecord afterRecord = new GenericData.Record(afterSchema);
+
+    afterRecord.put("id", 1);
+    afterRecord.put("date", "1/1/2021");
+    afterRecord.put("type", "after_type");
+    afterRecord.put("payload", "after_payload");
+    afterRecord.put("timestamp", 3000L);
+    rec.put(DebeziumConstants.INCOMING_AFTER_FIELD, afterRecord);
+
+    return generateMetaFields(rec);
+  }
+
+  private static class MockSchemaRegistryProvider extends SchemaRegistryProvider {
+
+    private final String schema;
+
+    public MockSchemaRegistryProvider(TypedProperties props, JavaSparkContext jssc, TestAbstractDebeziumSource source) {
+      super(props, jssc);
+      schema = source.getSchema();
+    }
+
+    @Override
+    public String fetchSchemaFromRegistry(String registryUrl) throws IOException {
+      return schema;
+    }
+  }
+
+  private static Stream<Arguments> testArguments() {
+    return Stream.of(
+        arguments(Operation.INSERT),
+        arguments(Operation.UPDATE),
+        arguments(Operation.DELETE)
+    );
+  }
+
+  private enum Operation {
+    INSERT("c"),
+    UPDATE("u"),
+    DELETE("d");
+
+    public final String op;
+
+    Operation(String op) {
+      this.op = op;
+    }
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestMysqlDebeziumSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestMysqlDebeziumSource.java
new file mode 100644
index 0000000..0886314
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestMysqlDebeziumSource.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import org.apache.hudi.common.model.debezium.DebeziumConstants;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestMysqlDebeziumSource extends TestAbstractDebeziumSource {
+
+  private static final String MYSQL_GITHUB_SCHEMA = "{\"connect.name\": \"mysql.ghschema.gharchive.Envelope\",\n"
+      + "  \"fields\": [{\"default\": null,\"name\": \"before\",\"type\": [\"null\",{\"connect.name\": \"mysql.ghschema.gharchive.Value\",\n"
+      + "  \"fields\": [{\"name\": \"id\",\"type\": \"string\"},{\"name\": \"date\",\"type\": \"string\"},{\"default\": null,\"name\": \"timestamp\",\n"
+      + "  \"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"type\",\"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"payload\",\n"
+      + "  \"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"org\",\"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"created_at\",\n"
+      + "  \"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"public\",\"type\": [\"null\",\"boolean\"]}],\"name\": \"Value\",\"type\": \"record\"\n"
+      + "  }]},{\"default\": null,\"name\": \"after\",\"type\": [\"null\",\"Value\"]},{\"name\": \"source\",\"type\": {\"connect.name\": \"io.debezium.connector.mysql.Source\",\n"
+      + "  \"fields\": [{\"name\": \"connector\",\"type\": \"string\"},{\"name\": \"name\",\"type\": \"string\"},{\"name\": \"ts_ms\",\"type\": \"long\"},\n"
+      + "  {\"name\": \"db\",\"type\": \"string\"},{\"name\": \"table\",\"type\": \"string\"},{\"default\": null,\n"
+      + "  \"name\": \"txId\",\"type\": [\"null\",\"long\"]},{\"name\": \"file\",\"type\": \"string\"},{\"default\": null,\"name\": \"pos\",\"type\": [\"null\",\"long\"]},{\"default\": null,\n"
+      + "  \"name\": \"row\",\"type\": [\"null\",\"long\"]}],\"name\": \"Source\",\"namespace\": \"io.debezium.connector.mysql\",\"type\": \"record\"\n"
+      + "  }},{\"name\": \"op\",\"type\": \"string\"},{\"default\": null,\"name\": \"ts_ms\",\"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"transaction\",\n"
+      + "  \"type\": [\"null\",{\"fields\": [{\"name\": \"id\",\"type\": \"string\"},{\"name\": \"total_order\",\"type\": \"long\"},{\"name\": \"data_collection_order\",\n"
+      + "  \"type\": \"long\"}],\"name\": \"ConnectDefault\",\"namespace\": \"io.confluent.connect.avro\",\"type\": \"record\"}]}],\"name\": \"Envelope\",\n"
+      + "  \"namespace\": \"mysql.ghschema.gharchive\",\"type\": \"record\"}";
+
+  private static final String TEST_DB = "ghschema";
+  private static final String TEST_TABLE = "gharchive";
+  private static final long TEST_TS_MS = 12345L;
+  private static final long TEST_TXID = 543L;
+  private static final String TEST_FILE = "mysql-bin.00007";
+  private static final long TEST_POS = 98765L;
+  private static final String EXPECTED_TEST_SEQ = "00007.98765";
+
+  @Override
+  protected String getIndexName() {
+    return "mysql";
+  }
+
+  @Override
+  protected String getSourceClass() {
+    return MysqlDebeziumSource.class.getName();
+  }
+
+  @Override
+  protected String getSchema() {
+    return MYSQL_GITHUB_SCHEMA;
+  }
+
+  @Override
+  protected GenericRecord generateMetaFields(GenericRecord rec) {
+    Schema schema = new Schema.Parser().parse(getSchema());
+    // Source fields specific to Mysql DB
+    GenericRecord sourceRecord = new GenericData.Record(schema.getField(DebeziumConstants.INCOMING_SOURCE_FIELD).schema());
+    sourceRecord.put("name", getIndexName());
+    sourceRecord.put("connector", getIndexName());
+    sourceRecord.put("db", TEST_DB);
+    sourceRecord.put("table", TEST_TABLE);
+    sourceRecord.put("ts_ms", TEST_TS_MS);
+    sourceRecord.put("txId", TEST_TXID);
+    sourceRecord.put("file", TEST_FILE);
+    sourceRecord.put("pos", TEST_POS);
+    rec.put(DebeziumConstants.INCOMING_SOURCE_FIELD, sourceRecord);
+    return rec;
+  }
+
+  @Override
+  protected void validateMetaFields(Dataset<Row> records) {
+    assertTrue(records.select(DebeziumConstants.FLATTENED_SHARD_NAME).collectAsList().stream()
+        .allMatch(r -> r.getString(0).equals(getIndexName())));
+    assertTrue(records.select(DebeziumConstants.FLATTENED_TS_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getLong(0) == TEST_TS_MS));
+    assertTrue(records.select(DebeziumConstants.FLATTENED_TX_ID_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getLong(0) == TEST_TXID));
+    assertTrue(records.select(DebeziumConstants.ADDED_SEQ_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getString(0).equals(EXPECTED_TEST_SEQ)));
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestPostgresDebeziumSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestPostgresDebeziumSource.java
new file mode 100644
index 0000000..ef75fc6
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/debezium/TestPostgresDebeziumSource.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.sources.debezium;
+
+import org.apache.hudi.common.model.debezium.DebeziumConstants;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestPostgresDebeziumSource extends TestAbstractDebeziumSource {
+
+  private static final String POSTGRES_GITHUB_SCHEMA = "{\"connect.name\": \"postgres.ghschema.gharchive.Envelope\",\n"
+      + "  \"fields\": [{\"default\": null,\"name\": \"before\",\"type\": [\"null\",{\"connect.name\": \"postgres.ghschema.gharchive.Value\",\n"
+      + "  \"fields\": [{\"name\": \"id\",\"type\": \"string\"},{\"name\": \"date\",\"type\": \"string\"},{\"default\": null,\"name\": \"timestamp\",\n"
+      + "  \"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"type\",\"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"payload\",\n"
+      + "  \"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"org\",\"type\": [\"null\",\"string\"]},{\"default\": null,\"name\": \"created_at\",\n"
+      + "  \"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"public\",\"type\": [\"null\",\"boolean\"]}],\"name\": \"Value\",\"type\": \"record\"\n"
+      + "  }]},{\"default\": null,\"name\": \"after\",\"type\": [\"null\",\"Value\"]},{\"name\": \"source\",\"type\": {\"connect.name\": \"io.debezium.connector.postgresql.Source\",\n"
+      + "  \"fields\": [{\"name\": \"connector\",\"type\": \"string\"},{\"name\": \"name\",\"type\": \"string\"},{\"name\": \"ts_ms\",\"type\": \"long\"},\n"
+      + "  {\"name\": \"db\",\"type\": \"string\"},{\"name\": \"schema\",\"type\": \"string\"},{\"name\": \"table\",\"type\": \"string\"},{\"default\": null,\n"
+      + "  \"name\": \"txId\",\"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"lsn\",\"type\": [\"null\",\"long\"]},{\"default\": null,\n"
+      + "  \"name\": \"xmin\",\"type\": [\"null\",\"long\"]}],\"name\": \"Source\",\"namespace\": \"io.debezium.connector.postgresql\",\"type\": \"record\"\n"
+      + "  }},{\"name\": \"op\",\"type\": \"string\"},{\"default\": null,\"name\": \"ts_ms\",\"type\": [\"null\",\"long\"]},{\"default\": null,\"name\": \"transaction\",\n"
+      + "  \"type\": [\"null\",{\"fields\": [{\"name\": \"id\",\"type\": \"string\"},{\"name\": \"total_order\",\"type\": \"long\"},{\"name\": \"data_collection_order\",\n"
+      + "  \"type\": \"long\"}],\"name\": \"ConnectDefault\",\"namespace\": \"io.confluent.connect.avro\",\"type\": \"record\"}]}],\"name\": \"Envelope\",\n"
+      + "  \"namespace\": \"postgres.ghschema.gharchive\",\"type\": \"record\"}";
+
+  private static final String TEST_DB = "postgres";
+  private static final String TEST_SCHEMA = "ghschema";
+  private static final String TEST_TABLE = "gharchive";
+  private static final long TEST_TS_MS = 12345L;
+  private static final long TEST_TXID = 543L;
+  private static final long TEST_LSN = 98765L;
+
+  @Override
+  protected String getIndexName() {
+    return "postgres";
+  }
+
+  @Override
+  protected String getSourceClass() {
+    return PostgresDebeziumSource.class.getName();
+  }
+
+  @Override
+  protected String getSchema() {
+    return POSTGRES_GITHUB_SCHEMA;
+  }
+
+  @Override
+  protected GenericRecord generateMetaFields(GenericRecord rec) {
+    Schema schema = new Schema.Parser().parse(getSchema());
+    // Source fields specific to Postgres DB
+    GenericRecord sourceRecord = new GenericData.Record(schema.getField(DebeziumConstants.INCOMING_SOURCE_FIELD).schema());
+    sourceRecord.put("name", getIndexName());
+    sourceRecord.put("connector", getIndexName());
+    sourceRecord.put("db", TEST_DB);
+    sourceRecord.put("schema", TEST_SCHEMA);
+    sourceRecord.put("table", TEST_TABLE);
+    sourceRecord.put("ts_ms", TEST_TS_MS);
+    sourceRecord.put("txId", TEST_TXID);
+    sourceRecord.put("lsn", TEST_LSN);
+    rec.put(DebeziumConstants.INCOMING_SOURCE_FIELD, sourceRecord);
+    return rec;
+  }
+
+  @Override
+  protected void validateMetaFields(Dataset<Row> records) {
+    assertTrue(records.select(DebeziumConstants.FLATTENED_TS_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getLong(0) == TEST_TS_MS));
+    assertTrue(records.select(DebeziumConstants.FLATTENED_TX_ID_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getLong(0) == TEST_TXID));
+    assertTrue(records.select(DebeziumConstants.FLATTENED_LSN_COL_NAME).collectAsList().stream()
+        .allMatch(r -> r.getLong(0) == TEST_LSN));
+  }
+}