You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/01/18 21:39:40 UTC

[iceberg] branch master updated: Hive: Fix ORC projection with orc.force.positional.evolution=true (#2111)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new e0de218  Hive: Fix ORC projection with orc.force.positional.evolution=true (#2111)
e0de218 is described below

commit e0de218380c1470d6ee1e5f415a3b8ac66249e9d
Author: pvary <pv...@cloudera.com>
AuthorDate: Mon Jan 18 22:39:33 2021 +0100

    Hive: Fix ORC projection with orc.force.positional.evolution=true (#2111)
---
 .../org/apache/iceberg/hive/TestHiveMetastore.java | 35 ++++++++++++++--------
 .../hive/HiveIcebergStorageHandlerTestUtils.java   |  4 +++
 .../org/apache/iceberg/mr/hive/TestHiveShell.java  |  3 +-
 orc/src/main/java/org/apache/iceberg/orc/ORC.java  |  3 ++
 4 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
index 83d3508..1381b54 100644
--- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
+++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
@@ -87,17 +87,26 @@ public class TestHiveMetastore {
   private HiveClientPool clientPool;
 
   /**
-   * Starts a TestHiveMetastore with the default connection pool size (5).
+   * Starts a TestHiveMetastore with the default connection pool size (5) and the default HiveConf.
    */
   public void start() {
-    start(DEFAULT_POOL_SIZE);
+    start(new HiveConf(new Configuration(), TestHiveMetastore.class), DEFAULT_POOL_SIZE);
   }
 
   /**
-   * Starts a TestHiveMetastore with a provided connection pool size.
+   * Starts a TestHiveMetastore with the default connection pool size (5) with the provided HiveConf.
+   * @param hiveConf The hive configuration to use
+   */
+  public void start(HiveConf conf) {
+    start(conf, DEFAULT_POOL_SIZE);
+  }
+
+  /**
+   * Starts a TestHiveMetastore with a provided connection pool size and HiveConf.
+   * @param hiveConf The hive configuration to use
    * @param poolSize The number of threads in the executor pool
    */
-  public void start(int poolSize) {
+  public void start(HiveConf conf, int poolSize) {
     try {
       this.hiveLocalDir = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile();
       File derbyLogFile = new File(hiveLocalDir, "derby.log");
@@ -106,7 +115,9 @@ public class TestHiveMetastore {
 
       TServerSocket socket = new TServerSocket(0);
       int port = socket.getServerSocket().getLocalPort();
-      this.hiveConf = newHiveConf(port);
+      initConf(conf, port);
+
+      this.hiveConf = conf;
       this.server = newThriftServer(socket, poolSize, hiveConf);
       this.executorService = Executors.newSingleThreadExecutor();
       this.executorService.submit(() -> server.serve());
@@ -196,14 +207,12 @@ public class TestHiveMetastore {
     return new TThreadPoolServer(args);
   }
 
-  private HiveConf newHiveConf(int port) {
-    HiveConf newHiveConf = new HiveConf(new Configuration(), TestHiveMetastore.class);
-    newHiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
-    newHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
-    newHiveConf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
-    newHiveConf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
-    newHiveConf.set("iceberg.hive.client-pool-size", "2");
-    return newHiveConf;
+  private void initConf(HiveConf conf, int port) {
+    conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
+    conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
+    conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
+    conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
+    conf.set("iceberg.hive.client-pool-size", "2");
   }
 
   private void setupMetastoreDB(String dbURL) throws SQLException, IOException {
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
index a02a997..1b01c1b 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
@@ -27,6 +27,7 @@ import org.apache.iceberg.Schema;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.mr.TestHelper;
 import org.apache.iceberg.types.Types;
+import org.apache.orc.OrcConf;
 import org.junit.rules.TemporaryFolder;
 
 import static org.apache.iceberg.types.Types.NestedField.optional;
@@ -61,6 +62,9 @@ public class HiveIcebergStorageHandlerTestUtils {
     TestHiveShell shell = new TestHiveShell();
     shell.setHiveConfValue("hive.notification.event.poll.interval", "-1");
     shell.setHiveConfValue("hive.tez.exec.print.summary", "true");
+    // We would like to make sure that ORC reading overrides this config, so reading Iceberg tables could work in
+    // systems (like Hive 3.2 and higher) where this value is set to true explicitly.
+    shell.setHiveConfValue(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), "true");
     shell.start();
     return shell;
   }
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
index e474b65..3fcf307 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
@@ -78,7 +78,8 @@ public class TestHiveShell {
   }
 
   public void start() {
-    metastore.start();
+    // Create a copy of the HiveConf for the metastore
+    metastore.start(new HiveConf(hs2Conf));
     hs2Conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS));
     hs2Conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
         metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE));
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORC.java b/orc/src/main/java/org/apache/iceberg/orc/ORC.java
index 05b968d..aff66e6 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ORC.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ORC.java
@@ -148,6 +148,9 @@ public class ORC {
       } else {
         this.conf = new Configuration();
       }
+
+      // We need to turn positional schema evolution off since we use column name based schema evolution for projection
+      this.conf.setBoolean(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), false);
     }
 
     /**