You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2022/07/22 07:43:16 UTC

[hive] branch master updated: HIVE-26416: AcidUtils.isRawFormatFile() throws InvalidProtocolBufferException for non-ORC file (Denys Kuzmenko, reviewed by Ayush Saxena)

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 6ed41d6e7f2 HIVE-26416: AcidUtils.isRawFormatFile() throws InvalidProtocolBufferException for non-ORC file (Denys Kuzmenko, reviewed by Ayush Saxena)
6ed41d6e7f2 is described below

commit 6ed41d6e7f24aa4af558c23b5fa483983761e95c
Author: Denys Kuzmenko <dk...@cloudera.com>
AuthorDate: Fri Jul 22 09:43:09 2022 +0200

    HIVE-26416: AcidUtils.isRawFormatFile() throws InvalidProtocolBufferException for non-ORC file (Denys Kuzmenko, reviewed by Ayush Saxena)
    
    Closes #3460
---
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java    |  3 ++-
 .../org/apache/hadoop/hive/ql/TestTxnCommands.java | 25 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 5dd271c8caa..79fc6743384 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -52,6 +52,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.collect.Lists;
+import com.google.protobuf.InvalidProtocolBufferException;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -2569,7 +2570,7 @@ public class AcidUtils {
           List<String> columns = schema.getFieldNames();
          */
         return OrcInputFormat.isOriginal(reader);
-      } catch (FileFormatException ex) {
+      } catch (FileFormatException | InvalidProtocolBufferException ex) {
         //We may be parsing a delta for Insert-only table which may not even be an ORC file so
         //cannot have ROW_IDs in it.
         LOG.debug("isRawFormat() called on " + dataFile + " which is not an ORC file: " +
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index f5928f932d3..13d660f8fda 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -2092,4 +2092,29 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
       Assert.fail("Expecting partition data to be removed from FS");
     }
   }
+
+  @Test
+  public void testIsRawFormatFile() throws Exception {
+    dropTable(new String[]{"file_formats"});
+    
+    runStatementOnDriver("CREATE TABLE `file_formats`(`id` int, `name` string) " +
+      " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' " +
+      "WITH SERDEPROPERTIES ( " +
+      " 'field.delim'='|', " +
+      " 'line.delim'='\n'," +
+      " 'serialization.format'='|')  " +
+      "STORED AS " +
+      " INPUTFORMAT " +
+      "   'org.apache.hadoop.mapred.TextInputFormat' " +
+      " OUTPUTFORMAT " +
+      "   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " +
+      "TBLPROPERTIES ( " +
+      " 'transactional'='true'," +
+      " 'transactional_properties'='insert_only')");
+    
+    runStatementOnDriver("insert into file_formats (id, name) values (1, 'Avro'),(2, 'Parquet'),(3, 'ORC')");
+    
+    List<String> res = runStatementOnDriver("select * from file_formats");
+    Assert.assertEquals(3, res.size());
+  }
 }