You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2022/07/23 00:26:24 UTC

[hudi] branch master updated: [HUDI-4435] Fix Avro field not found issue introduced by Avro 1.10 (#6155)

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new af10a97e7a [HUDI-4435] Fix Avro field not found issue introduced by Avro 1.10 (#6155)
af10a97e7a is described below

commit af10a97e7ac00271f05964818cc2d3311707e81d
Author: Rahil C <32...@users.noreply.github.com>
AuthorDate: Fri Jul 22 17:26:16 2022 -0700

    [HUDI-4435] Fix Avro field not found issue introduced by Avro 1.10 (#6155)
    
    Co-authored-by: Wenning Ding <we...@amazon.com>
---
 .../hadoop/utils/HoodieRealtimeRecordReaderUtils.java   | 10 +++++++++-
 .../hadoop/realtime/TestHoodieRealtimeRecordReader.java | 17 +++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
index 0e4f9c304c..bf4cbff666 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.hadoop.utils;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.JsonProperties;
 import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
@@ -189,7 +190,14 @@ public class HoodieRealtimeRecordReaderUtils {
         Writable[] recordValues = new Writable[schema.getFields().size()];
         int recordValueIndex = 0;
         for (Schema.Field field : schema.getFields()) {
-          recordValues[recordValueIndex++] = avroToArrayWritable(record.get(field.name()), field.schema());
+          // TODO Revisit Avro exception handling in future
+          Object fieldValue = null;
+          try {
+            fieldValue = record.get(field.name());
+          } catch (AvroRuntimeException e) {
+            LOG.debug("Field:" + field.name() + "not found in Schema:" + schema.toString());
+          }
+          recordValues[recordValueIndex++] = avroToArrayWritable(fieldValue, field.schema());
         }
         return new ArrayWritable(Writable.class, recordValues);
       case ENUM:
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
index 74b7120fd0..f334bbf3bc 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
@@ -44,8 +44,10 @@ import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hadoop.RealtimeFileStatus;
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
+import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
 import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.hadoop.conf.Configuration;
@@ -69,6 +71,7 @@ import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -897,6 +900,20 @@ public class TestHoodieRealtimeRecordReader {
     assertTrue(splits.length == 0);
   }
 
+  @Test
+  public void testAvroToArrayWritable() throws IOException {
+    Schema schema = SchemaTestUtil.getEvolvedSchema();
+    GenericRecord record = SchemaTestUtil.generateAvroRecordFromJson(schema, 1, "100", "100", false);
+    ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(record, schema);
+    assertEquals(schema.getFields().size(), aWritable.get().length);
+
+    // In some queries, generic records that Hudi gets are just part of the full records.
+    // Here test the case that some fields are missing in the record.
+    Schema schemaWithMetaFields = HoodieAvroUtils.addMetadataFields(schema);
+    ArrayWritable aWritable2 = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(record, schemaWithMetaFields);
+    assertEquals(schemaWithMetaFields.getFields().size(), aWritable2.get().length);
+  }
+
   private File createCompactionFile(java.nio.file.Path basePath, String commitTime)
       throws IOException {
     File file = basePath.resolve(".hoodie")