You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/07/08 10:40:23 UTC
[hive] branch master updated: HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 97d7630bca1 HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)
97d7630bca1 is described below
commit 97d7630bca10e96229519ab397f5cf122e5622e3
Author: Soumyakanti Das <so...@cloudera.com>
AuthorDate: Tue Jul 5 15:32:53 2022 -0700
HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis)
Closes #3418
---
data/files/nested_ts.avsc | 27 ++++++++++++
.../queries/positive/hbase_avro_nested_timestamp.q | 22 ++++++++++
.../positive/hbase_avro_nested_timestamp.q.out | 45 +++++++++++++++++++
.../apache/hadoop/hive/hbase/HBaseTestSetup.java | 51 ++++++++++++++++++++++
.../hive/serde2/avro/AvroLazyObjectInspector.java | 3 +-
5 files changed, 147 insertions(+), 1 deletion(-)
diff --git a/data/files/nested_ts.avsc b/data/files/nested_ts.avsc
new file mode 100644
index 00000000000..eac0ad29475
--- /dev/null
+++ b/data/files/nested_ts.avsc
@@ -0,0 +1,27 @@
+{
+ "type": "record",
+ "name": "TableRecord",
+ "namespace": "org.apache.hive",
+ "fields": [
+ {
+ "name": "id",
+ "type": "string"
+ },
+ {
+ "name": "dischargedate",
+ "type": {
+ "name": "DateRecord",
+ "type": "record",
+ "fields": [
+ {
+ "name": "value",
+ "type": {
+ "type": "long",
+ "logicalType": "timestamp-millis"
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q
new file mode 100644
index 00000000000..5f3a22cc51a
--- /dev/null
+++ b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q
@@ -0,0 +1,22 @@
+dfs -cp ${system:hive.root}data/files/nested_ts.avsc ${system:test.tmp.dir}/nested_ts.avsc;
+
+CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+'data.frV4.avro.schema.url'='${system:test.tmp.dir}/nested_ts.avsc'
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true');
+
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=none;
+
+select data_frV4.dischargedate.value from hbase_avro_table;
diff --git a/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out
new file mode 100644
index 00000000000..6f08b83e3cf
--- /dev/null
+++ b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out
@@ -0,0 +1,45 @@
+PREHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+#### A masked pattern was here ####
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hbase_avro_table
+POSTHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table(
+`key` string COMMENT '',
+`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.hbase.HBaseSerDe'
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES (
+'serialization.format'='1',
+'hbase.columns.mapping' = ':key,data:frV4',
+'data.frV4.serialization.type'='avro',
+#### A masked pattern was here ####
+)
+TBLPROPERTIES (
+'hbase.table.name' = 'HiveAvroTable',
+'hbase.struct.autogenerate'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hbase_avro_table
+PREHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_avro_table
+#### A masked pattern was here ####
+2022-07-05 00:00:00
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
index 8baf1464b9a..202420854df 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java
@@ -18,9 +18,21 @@
package org.apache.hadoop.hive.hbase;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.IOException;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
import java.util.Arrays;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -29,6 +41,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
@@ -107,6 +120,7 @@ public class HBaseTestSetup extends QTestSetup {
if (meta != null) meta.close();
}
createHBaseTable();
+ createAvroTable();
}
private void createHBaseTable() throws IOException {
@@ -158,6 +172,43 @@ public class HBaseTestSetup extends QTestSetup {
}
}
+ private static byte[] createAvroRecordWithNestedTimestamp() throws IOException {
+ File schemaFile = Paths.get(System.getProperty("test.data.dir"), "nested_ts.avsc").toFile();
+ Schema schema = new Schema.Parser().parse(schemaFile);
+ GenericData.Record rootRecord = new GenericData.Record(schema);
+ rootRecord.put("id", "X338092");
+ GenericData.Record dateRecord = new GenericData.Record(schema.getField("dischargedate").schema());
+ final LocalDateTime _2022_07_05 = LocalDate.of(2022, 7, 5).atStartOfDay();
+ // Store in UTC as required per Avro specification and as done by Hive in other parts of the system
+ dateRecord.put("value", _2022_07_05.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli());
+ rootRecord.put("dischargedate", dateRecord);
+
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+ try (DataFileWriter<GenericRecord> dataFileWriter
+ = new DataFileWriter<GenericRecord>(new GenericDatumWriter<>(schema))) {
+ dataFileWriter.create(schema, out);
+ dataFileWriter.append(rootRecord);
+ }
+ return out.toByteArray();
+ }
+ }
+
+ private void createAvroTable() throws IOException {
+ final TableName hbaseTable = TableName.valueOf("HiveAvroTable");
+ HTableDescriptor htableDesc = new HTableDescriptor(hbaseTable);
+ htableDesc.addFamily(new HColumnDescriptor("data".getBytes()));
+
+ try (Admin hbaseAdmin = hbaseConn.getAdmin()) {
+ hbaseAdmin.createTable(htableDesc);
+ try (Table table = hbaseConn.getTable(hbaseTable)) {
+ Put p = new Put("1".getBytes());
+ p.add(new KeyValue("1".getBytes(), "data".getBytes(), "frV4".getBytes(),
+ createAvroRecordWithNestedTimestamp()));
+ table.put(p);
+ }
+ }
+ }
+
@Override
public void tearDown() throws Exception {
if (hbaseCluster != null) {
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
index 5a857f2be65..d0956bde549 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.serde2.avro;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -498,6 +499,6 @@ public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector {
* */
private boolean isPrimitive(Class<?> clazz) {
return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null
- || clazz.getSimpleName().equals("String");
+ || Arrays.asList("String", "Timestamp").contains(clazz.getSimpleName());
}
}