You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2022/02/28 09:21:34 UTC

[impala] 02/02: IMPALA-11112: Impala can't resolve json tables created by Hive

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7942a8ca2316357296fe351a40b6a87b9de769c8
Author: pranav.lodha <pr...@cloudera.com>
AuthorDate: Tue Feb 22 12:32:05 2022 +0530

    IMPALA-11112: Impala can't resolve json tables created by Hive
    
    Impala was using wrong json serde string which was causing the
    subsequent errors. The error got resolved on correcting the json
    serde string. Now impala can resolve json tables created by hive
    and also hive can resolve json tables created in impala.
    End-to-end tests are also included in
    tests/metadata/test_hms_integration.py.
    
    Change-Id: I9cac55b58dca88d900db3256ceaa25c17d7864d5
    Reviewed-on: http://gerrit.cloudera.org:8080/18263
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/catalog/HdfsFileFormat.java     |  4 ++--
 .../apache/impala/catalog/HdfsStorageDescriptor.java  |  2 +-
 tests/metadata/test_hms_integration.py                | 19 +++++++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
index 192bca5..332ddc5 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
@@ -51,7 +51,7 @@ public enum HdfsFileFormat {
       false, false, true),
   JSON("org.apache.hadoop.mapred.TextInputFormat",
       "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
-      "org.apache.hadoop.hive.serde2.lazy.JsonSerDe", false, false, true),
+      "org.apache.hadoop.hive.serde2.JsonSerDe", false, false, true),
   // LZO_TEXT is never used as an actual HdfsFileFormat. It is used only to store the
   // input format class and match against it (e.g. in HdfsCompression). Outside of this
   // file, tables that use the LZO input format class use HdfsFileFormat.TEXT.
@@ -122,7 +122,7 @@ public enum HdfsFileFormat {
       "parquet.hive.MapredParquetInputFormat"
   };
 
-  private static final String JSON_SERDE = "org.apache.hadoop.hive.serde2.lazy.JsonSerDe";
+  private static final String JSON_SERDE = "org.apache.hadoop.hive.serde2.JsonSerDe";
 
   private static Map<String, HdfsFileFormat> VALID_INPUT_FORMATS =
       ImmutableMap.<String, HdfsFileFormat>builder()
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsStorageDescriptor.java b/fe/src/main/java/org/apache/impala/catalog/HdfsStorageDescriptor.java
index 34c5d0d..5402fae 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsStorageDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsStorageDescriptor.java
@@ -74,7 +74,7 @@ public class HdfsStorageDescriptor {
       // support for the new input/output format classes. See IMPALA-4214.
       "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", // (parquet)
       "org.apache.iceberg.mr.hive.HiveIcebergSerDe", // (iceberg)
-      "org.apache.hadoop.hive.serde2.lazy.JsonSerDe"); // (json)
+      "org.apache.hadoop.hive.serde2.JsonSerDe");// (json)
 
   private final static Logger LOG = LoggerFactory.getLogger(HdfsStorageDescriptor.class);
 
diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py
index a4c023b..a8b6bf7 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -672,6 +672,25 @@ class TestHmsIntegration(ImpalaTestSuite):
         assert expected == self.hive_columns(table_name)
         assert expected == self.impala_columns(table_name)
 
+  def test_desc_json_table(self, vector):
+    """
+    This is to test whether json tables created in impala be
+     described in hive nad vice versa.
+    """
+
+    with self.ImpalaDbWrapper(self, self.unique_string()) as db_name:
+      with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(),
+                                   '(x int) stored as jsonfile') as table_name:
+        expected = self.client.execute('DESC %s' % table_name)
+        assert expected == self.run_stmt_in_hive('DESC %s' % table_name)
+
+    with self.HiveDbWrapper(self, self.unique_string()) as db_name:
+      with self.HiveTableWrapper(self, db_name + '.' + self.unique_string(),
+                                 '(x int) stored as jsonfile') as table_name:
+        expected = self.run_stmt_in_hive('DESC %s' % table_name)
+        self.client.execute('INVALIDATE METADATA %s' % table_name)
+        assert expected == self.client.execute('DESC %s' % table_name)
+
   @pytest.mark.execute_serially
   def test_drop_database(self, vector):
     """