You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by ak...@apache.org on 2022/10/20 20:01:41 UTC

[hudi] 02/02: Rebased `ByteBuffer` cloning onto the new utility

This is an automated email from the ASF dual-hosted git repository.

akudinkin pushed a commit to branch HUDI-4971-cancel-relocation
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit a3017d2773c14e1400380e92d415a183518604a0
Author: Alexey Kudinkin <al...@infinilake.com>
AuthorDate: Thu Oct 20 13:01:13 2022 -0700

    Rebased `ByteBuffer` cloning onto the new utility
---
 .../src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java     | 4 ++--
 hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java | 5 +++--
 .../src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala    | 6 ++----
 .../src/main/scala/org/apache/spark/sql/hudi/SerDeUtils.scala       | 6 ++----
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
index ca59c301c8..c83ec68976 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
@@ -54,6 +54,7 @@ import org.apache.hudi.exception.HoodieIOException;
 import org.apache.orc.TypeDescription;
 
 import static org.apache.avro.JsonProperties.NULL_VALUE;
+import static org.apache.hudi.common.util.BinaryUtils.toBytes;
 
 /**
  * Methods including addToVector, addUnionValue, createOrcSchema are originally from
@@ -221,8 +222,7 @@ public class AvroOrcUtils {
           binaryBytes = ((GenericData.Fixed)value).bytes();
         } else if (value instanceof ByteBuffer) {
           final ByteBuffer byteBuffer = (ByteBuffer) value;
-          binaryBytes = new byte[byteBuffer.remaining()];
-          byteBuffer.get(binaryBytes);
+          binaryBytes = toBytes(byteBuffer);
         } else if (value instanceof byte[]) {
           binaryBytes = (byte[]) value;
         } else {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index 0cc4059197..4cb55f3790 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -52,6 +52,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.util.BinaryUtils.toBytes;
+
 /**
  * Utility functions for ORC files.
  */
@@ -238,8 +240,7 @@ public class OrcUtils extends BaseFileUtils {
     try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
       if (reader.hasMetadataValue("orc.avro.schema")) {
         ByteBuffer metadataValue = reader.getMetadataValue("orc.avro.schema");
-        byte[] bytes = new byte[metadataValue.remaining()];
-        metadataValue.get(bytes);
+        byte[] bytes = toBytes(metadataValue);
         return new Schema.Parser().parse(new String(bytes));
       } else {
         TypeDescription orcSchema = reader.getSchema();
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
index 58511f791e..dc413afff1 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
@@ -29,6 +29,7 @@ import org.apache.hudi.common.data.HoodieData
 import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig
+import org.apache.hudi.common.util.BinaryUtils.toBytes
 import org.apache.hudi.common.util.ValidationUtils.checkState
 import org.apache.hudi.common.util.collection
 import org.apache.hudi.common.util.hash.ColumnIndexID
@@ -469,10 +470,7 @@ object ColumnStatsIndexSupport {
         }
       case BinaryType =>
         value match {
-          case b: ByteBuffer =>
-            val bytes = new Array[Byte](b.remaining)
-            b.get(bytes)
-            bytes
+          case b: ByteBuffer => toBytes(b)
           case other => other
         }
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/SerDeUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/SerDeUtils.scala
index 19d0a0a98b..294d282e3d 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/SerDeUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/SerDeUtils.scala
@@ -33,10 +33,8 @@ object SerDeUtils {
   }
 
   def toBytes(o: Any): Array[Byte] = {
-    val bb: ByteBuffer = SERIALIZER_THREAD_LOCAL.get.serialize(o)
-    val bytes = new Array[Byte](bb.capacity())
-    bb.get(bytes)
-    bytes
+    val buf = SERIALIZER_THREAD_LOCAL.get.serialize(o)
+    toBytes(buf)
   }
 
   def toObject(bytes: Array[Byte]): Any = {