You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by ao...@apache.org on 2021/03/24 18:41:16 UTC

[iceberg] 01/18: Hive: Fix writing of Date, Decimal, Time and UUID types. (#2126)

This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch 0.11.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git

commit e12d468f59993f3ed5dbd00b0d2721c4851cbba6
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Mon Jan 25 15:33:15 2021 +0100

    Hive: Fix writing of Date, Decimal, Time and UUID types. (#2126)
---
 .../IcebergDateObjectInspectorHive3.java           | 11 ++++-
 .../IcebergDecimalObjectInspector.java             |  9 ++++-
 .../IcebergTimeObjectInspector.java                |  5 ++-
 .../IcebergUUIDObjectInspector.java                |  5 ++-
 .../iceberg/mr/hive/HiveIcebergTestUtils.java      | 10 +++--
 .../TestHiveIcebergStorageHandlerWithEngine.java   | 47 ++++++++++++++++++++++
 .../TestIcebergTimeObjectInspector.java            |  5 ++-
 .../TestIcebergUUIDObjectInspector.java            | 11 ++---
 8 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java
index 6ffa600..ab64b36 100644
--- a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java
+++ b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.iceberg.util.DateTimeUtil;
 
 public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector
-    implements DateObjectInspector {
+    implements DateObjectInspector, WriteObjectInspector {
 
   private static final IcebergDateObjectInspectorHive3 INSTANCE = new IcebergDateObjectInspectorHive3();
 
@@ -69,4 +69,13 @@ public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJava
     }
   }
 
+  @Override
+  public LocalDate convert(Object o) {
+    if (o == null) {
+      return null;
+    }
+
+    Date date = (Date) o;
+    return LocalDate.of(date.getYear(), date.getMonth(), date.getDay());
+  }
 }
diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDecimalObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDecimalObjectInspector.java
index 3a16833..b30a3fa 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDecimalObjectInspector.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDecimalObjectInspector.java
@@ -80,6 +80,13 @@ public final class IcebergDecimalObjectInspector extends AbstractPrimitiveJavaOb
 
   @Override
   public BigDecimal convert(Object o) {
-    return o == null ? null : ((HiveDecimal) o).bigDecimalValue();
+    if (o == null) {
+      return null;
+    }
+
+    BigDecimal result = ((HiveDecimal) o).bigDecimalValue();
+    // during the HiveDecimal to BigDecimal conversion the scale is lost, when the value is 0
+    result = result.setScale(scale());
+    return result;
   }
 }
diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimeObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimeObjectInspector.java
index f5be1fe..f38ec5b 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimeObjectInspector.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimeObjectInspector.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.mr.hive.serde.objectinspector;
 
+import java.time.LocalTime;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -49,8 +50,8 @@ public class IcebergTimeObjectInspector extends AbstractPrimitiveJavaObjectInspe
   }
 
   @Override
-  public Object convert(Object o) {
-    return o == null ? null : o.toString();
+  public LocalTime convert(Object o) {
+    return o == null ? null : LocalTime.parse((String) o);
   }
 
   @Override
diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergUUIDObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergUUIDObjectInspector.java
index 0cb8464..e5c44f3 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergUUIDObjectInspector.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergUUIDObjectInspector.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.mr.hive.serde.objectinspector;
 
+import java.util.UUID;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -49,8 +50,8 @@ public class IcebergUUIDObjectInspector extends AbstractPrimitiveJavaObjectInspe
   }
 
   @Override
-  public String convert(Object o) {
-    return o == null ? null : o.toString();
+  public UUID convert(Object o) {
+    return o == null ? null : UUID.fromString(o.toString());
   }
 
   @Override
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java
index 7f497cd..0bb7a3a 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergTestUtils.java
@@ -29,12 +29,14 @@ import java.nio.file.Paths;
 import java.sql.Timestamp;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
+import java.time.LocalTime;
 import java.time.OffsetDateTime;
 import java.time.ZoneOffset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
+import java.util.UUID;
 import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -128,8 +130,8 @@ public class HiveIcebergTestUtils {
     record.set(9, new byte[]{0, 1, 2});
     record.set(10, ByteBuffer.wrap(new byte[]{0, 1, 2, 3}));
     record.set(11, new BigDecimal("0.0000000013"));
-    record.set(12, "11:33");
-    record.set(13, "73689599-d7fc-4dfb-b94e-106ff20284a5");
+    record.set(12, LocalTime.of(11, 33));
+    record.set(13, UUID.fromString("73689599-d7fc-4dfb-b94e-106ff20284a5"));
 
     return record;
   }
@@ -167,8 +169,8 @@ public class HiveIcebergTestUtils {
         new BytesWritable(record.get(9, byte[].class)),
         new BytesWritable(ByteBuffers.toByteArray(record.get(10, ByteBuffer.class))),
         new HiveDecimalWritable(HiveDecimal.create(record.get(11, BigDecimal.class))),
-        new Text(record.get(12, String.class)),
-        new Text(record.get(13, String.class))
+        new Text(record.get(12, LocalTime.class).toString()),
+        new Text(record.get(13, UUID.class).toString())
     );
   }
 
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
index 6a27d62..78f65d6 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
@@ -20,6 +20,9 @@
 package org.apache.iceberg.mr.hive;
 
 import java.io.IOException;
+import java.sql.Timestamp;
+import java.time.LocalDateTime;
+import java.time.OffsetDateTime;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -310,6 +313,36 @@ public class TestHiveIcebergStorageHandlerWithEngine {
     HiveIcebergTestUtils.validateData(table, new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), 0);
   }
 
+  @Test
+  public void testInsertSupportedTypes() throws IOException {
+    Assume.assumeTrue("Tez write is not implemented yet", executionEngine.equals("mr"));
+    for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
+      Type type = SUPPORTED_TYPES.get(i);
+      // TODO: remove this filter when issue #1881 is resolved
+      if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
+        continue;
+      }
+      // TODO: remove this filter when we figure out how we could test binary types
+      if (type.equals(Types.BinaryType.get()) || type.equals(Types.FixedType.ofLength(5))) {
+        continue;
+      }
+      String tableName = type.typeId().toString().toLowerCase() + "_table_" + i;
+      String columnName = type.typeId().toString().toLowerCase() + "_column";
+
+      Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, columnName, type));
+      List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
+
+      Table table = testTables.createTable(shell, tableName, schema, fileFormat, ImmutableList.of());
+      StringBuilder query = new StringBuilder("INSERT INTO ").append(tableName).append(" VALUES")
+              .append(expected.stream()
+                      .map(r -> String.format("(%s,%s)", r.get(0),
+                              getStringValueForInsert(r.get(1), type)))
+                      .collect(Collectors.joining(",")));
+      shell.executeStatement(query.toString());
+      HiveIcebergTestUtils.validateData(table, expected, 0);
+    }
+  }
+
   /**
    * Testing map only inserts.
    * @throws IOException If there is an underlying IOException
@@ -579,4 +612,18 @@ public class TestHiveIcebergStorageHandlerWithEngine {
     }
     return query;
   }
+
+  private String getStringValueForInsert(Object value, Type type) {
+    String template = "\'%s\'";
+    if (type.equals(Types.TimestampType.withoutZone())) {
+      return String.format(template, Timestamp.valueOf((LocalDateTime) value).toString());
+    } else if (type.equals(Types.TimestampType.withZone())) {
+      return String.format(template, Timestamp.from(((OffsetDateTime) value).toInstant()).toString());
+    } else if (type.equals(Types.BooleanType.get())) {
+      // in hive2 boolean type values must not be surrounded in apostrophes. Otherwise the value is translated to true.
+      return value.toString();
+    } else {
+      return String.format(template, value.toString());
+    }
+  }
 }
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimeObjectInspector.java b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimeObjectInspector.java
index 5ca5fe6..c635920 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimeObjectInspector.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimeObjectInspector.java
@@ -48,12 +48,13 @@ public class TestIcebergTimeObjectInspector {
     Assert.assertNull(oi.getPrimitiveWritableObject(null));
     Assert.assertNull(oi.convert(null));
 
-    String time = LocalTime.now().toString();
+    LocalTime localTime = LocalTime.now();
+    String time = localTime.toString();
     Text text = new Text(time);
 
     Assert.assertEquals(time, oi.getPrimitiveJavaObject(text));
     Assert.assertEquals(text, oi.getPrimitiveWritableObject(time));
-    Assert.assertEquals(time, oi.convert(text));
+    Assert.assertEquals(localTime, oi.convert(time));
 
     Text copy = (Text) oi.copyObject(text);
 
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergUUIDObjectInspector.java b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergUUIDObjectInspector.java
index da13b32..cbf55e3 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergUUIDObjectInspector.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergUUIDObjectInspector.java
@@ -47,12 +47,13 @@ public class TestIcebergUUIDObjectInspector {
     Assert.assertNull(oi.getPrimitiveWritableObject(null));
     Assert.assertNull(oi.convert(null));
 
-    String uuid = UUID.randomUUID().toString();
-    Text text = new Text(uuid);
+    UUID uuid = UUID.randomUUID();
+    String uuidStr = uuid.toString();
+    Text text = new Text(uuidStr);
 
-    Assert.assertEquals(uuid, oi.getPrimitiveJavaObject(text));
-    Assert.assertEquals(text, oi.getPrimitiveWritableObject(uuid));
-    Assert.assertEquals(uuid, oi.convert(text));
+    Assert.assertEquals(uuidStr, oi.getPrimitiveJavaObject(text));
+    Assert.assertEquals(text, oi.getPrimitiveWritableObject(uuidStr));
+    Assert.assertEquals(uuid, oi.convert(uuidStr));
 
     Text copy = (Text) oi.copyObject(text);