You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/07/08 10:40:23 UTC

[hive] branch master updated: HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 97d7630bca1 HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)
97d7630bca1 is described below

commit 97d7630bca10e96229519ab397f5cf122e5622e3
Author: Soumyakanti Das <so...@cloudera.com>
AuthorDate: Tue Jul 5 15:32:53 2022 -0700

    HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)
    
    Closes #3418
---
 data/files/nested_ts.avsc                          | 27 ++++++++++++
 .../queries/positive/hbase_avro_nested_timestamp.q | 22 ++++++++++
 .../positive/hbase_avro_nested_timestamp.q.out     | 45 +++++++++++++++++++
 .../apache/hadoop/hive/hbase/HBaseTestSetup.java   | 51 ++++++++++++++++++++++
 .../hive/serde2/avro/AvroLazyObjectInspector.java  |  3 +-
 5 files changed, 147 insertions(+), 1 deletion(-)

diff --git a/data/files/nested_ts.avsc b/data/files/nested_ts.avsc
new file mode 100644
index 00000000000..eac0ad29475
--- /dev/null
+++ b/data/files/nested_ts.avsc
@@ -0,0 +1,27 @@
+{
+  "type": "record",
+  "name": "TableRecord",
+  "namespace": "org.apache.hive",
+  "fields": [
+    {
+      "name": "id",
+      "type": "string"
+    },
+    {
+      "name": "dischargedate",
+      "type": {
+        "name": "DateRecord",
+        "type": "record",
+        "fields": [
+          {
+            "name": "value",
+            "type": {
+              "type": "long",
+              "logicalType": "timestamp-millis"
+            }
+          }
+        ]
+      }
+    }
+  ]
+}
diff --git a/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q
new file mode 100644
index 00000000000..5f3a22cc51a
--- /dev/null
+++ b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q
@@ -0,0 +1,22 @@
+dfs -cp ${system:hive.root}data/files/nested_ts.avsc ${system:test.tmp.dir}/nested_ts.avsc;
+
+CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+'data.frV4.avro.schema.url'='${system:test.tmp.dir}/nested_ts.avsc'
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true');
+
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=none;
+
+select data_frV4.dischargedate.value from hbase_avro_table;
diff --git a/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out
new file mode 100644
index 00000000000..6f08b83e3cf
--- /dev/null
+++ b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out
@@ -0,0 +1,45 @@
+PREHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+#### A masked pattern was here ####
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hbase_avro_table
+POSTHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+#### A masked pattern was here ####
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hbase_avro_table
+PREHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_avro_table
+#### A masked pattern was here ####
+2022-07-05 00:00:00
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
index 8baf1464b9a..202420854df 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
@@ -18,9 +18,21 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
 import java.util.Arrays;
 
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -29,6 +41,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
@@ -107,6 +120,7 @@ public class HBaseTestSetup extends QTestSetup {
       if (meta != null) meta.close();
     }
     createHBaseTable();
+    createAvroTable();
   }
 
   private void createHBaseTable() throws IOException {
@@ -158,6 +172,43 @@ public class HBaseTestSetup extends QTestSetup {
     }
   }
 
+  private static byte[] createAvroRecordWithNestedTimestamp() throws IOException {
+    File schemaFile = Paths.get(System.getProperty("test.data.dir"), "nested_ts.avsc").toFile();
+    Schema schema = new Schema.Parser().parse(schemaFile);
+    GenericData.Record rootRecord = new GenericData.Record(schema);
+    rootRecord.put("id", "X338092");
+    GenericData.Record dateRecord = new GenericData.Record(schema.getField("dischargedate").schema());
+    final LocalDateTime _2022_07_05 = LocalDate.of(2022, 7, 5).atStartOfDay();
+    // Store in UTC as required per Avro specification and as done by Hive in other parts of the system
+    dateRecord.put("value", _2022_07_05.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli());
+    rootRecord.put("dischargedate", dateRecord);
+
+    try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+      try (DataFileWriter<GenericRecord> dataFileWriter
+             = new DataFileWriter<GenericRecord>(new GenericDatumWriter<>(schema))) {
+        dataFileWriter.create(schema, out);
+        dataFileWriter.append(rootRecord);
+      }
+      return out.toByteArray();
+    }
+  }
+
+  private void createAvroTable() throws IOException {
+    final TableName hbaseTable = TableName.valueOf("HiveAvroTable");
+    HTableDescriptor htableDesc = new HTableDescriptor(hbaseTable);
+    htableDesc.addFamily(new HColumnDescriptor("data".getBytes()));
+
+    try (Admin hbaseAdmin = hbaseConn.getAdmin()) {
+      hbaseAdmin.createTable(htableDesc);
+      try (Table table = hbaseConn.getTable(hbaseTable)) {
+        Put p = new Put("1".getBytes());
+        p.add(new KeyValue("1".getBytes(), "data".getBytes(), "frV4".getBytes(),
+          createAvroRecordWithNestedTimestamp()));
+        table.put(p);
+      }
+    }
+  }
+
   @Override
   public void tearDown() throws Exception {
     if (hbaseCluster != null) {
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
index 5a857f2be65..d0956bde549 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.serde2.avro;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -498,6 +499,6 @@ public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector {
    * */
   private boolean isPrimitive(Class<?> clazz) {
     return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null
-        || clazz.getSimpleName().equals("String");
+      || Arrays.asList("String", "Timestamp").contains(clazz.getSimpleName());
   }
 }