You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/01/18 21:39:40 UTC
[iceberg] branch master updated: Hive: Fix ORC projection with
orc.force.positional.evolution=true (#2111)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new e0de218 Hive: Fix ORC projection with orc.force.positional.evolution=true (#2111)
e0de218 is described below
commit e0de218380c1470d6ee1e5f415a3b8ac66249e9d
Author: pvary <pv...@cloudera.com>
AuthorDate: Mon Jan 18 22:39:33 2021 +0100
Hive: Fix ORC projection with orc.force.positional.evolution=true (#2111)
---
.../org/apache/iceberg/hive/TestHiveMetastore.java | 35 ++++++++++++++--------
.../hive/HiveIcebergStorageHandlerTestUtils.java | 4 +++
.../org/apache/iceberg/mr/hive/TestHiveShell.java | 3 +-
orc/src/main/java/org/apache/iceberg/orc/ORC.java | 3 ++
4 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
index 83d3508..1381b54 100644
--- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
+++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
@@ -87,17 +87,26 @@ public class TestHiveMetastore {
private HiveClientPool clientPool;
/**
- * Starts a TestHiveMetastore with the default connection pool size (5).
+ * Starts a TestHiveMetastore with the default connection pool size (5) and the default HiveConf.
*/
public void start() {
- start(DEFAULT_POOL_SIZE);
+ start(new HiveConf(new Configuration(), TestHiveMetastore.class), DEFAULT_POOL_SIZE);
}
/**
- * Starts a TestHiveMetastore with a provided connection pool size.
+ * Starts a TestHiveMetastore with the default connection pool size (5) with the provided HiveConf.
+ * @param hiveConf The hive configuration to use
+ */
+ public void start(HiveConf conf) {
+ start(conf, DEFAULT_POOL_SIZE);
+ }
+
+ /**
+ * Starts a TestHiveMetastore with a provided connection pool size and HiveConf.
+ * @param hiveConf The hive configuration to use
* @param poolSize The number of threads in the executor pool
*/
- public void start(int poolSize) {
+ public void start(HiveConf conf, int poolSize) {
try {
this.hiveLocalDir = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile();
File derbyLogFile = new File(hiveLocalDir, "derby.log");
@@ -106,7 +115,9 @@ public class TestHiveMetastore {
TServerSocket socket = new TServerSocket(0);
int port = socket.getServerSocket().getLocalPort();
- this.hiveConf = newHiveConf(port);
+ initConf(conf, port);
+
+ this.hiveConf = conf;
this.server = newThriftServer(socket, poolSize, hiveConf);
this.executorService = Executors.newSingleThreadExecutor();
this.executorService.submit(() -> server.serve());
@@ -196,14 +207,12 @@ public class TestHiveMetastore {
return new TThreadPoolServer(args);
}
- private HiveConf newHiveConf(int port) {
- HiveConf newHiveConf = new HiveConf(new Configuration(), TestHiveMetastore.class);
- newHiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
- newHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
- newHiveConf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
- newHiveConf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
- newHiveConf.set("iceberg.hive.client-pool-size", "2");
- return newHiveConf;
+ private void initConf(HiveConf conf, int port) {
+ conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
+ conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
+ conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
+ conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
+ conf.set("iceberg.hive.client-pool-size", "2");
}
private void setupMetastoreDB(String dbURL) throws SQLException, IOException {
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
index a02a997..1b01c1b 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerTestUtils.java
@@ -27,6 +27,7 @@ import org.apache.iceberg.Schema;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.mr.TestHelper;
import org.apache.iceberg.types.Types;
+import org.apache.orc.OrcConf;
import org.junit.rules.TemporaryFolder;
import static org.apache.iceberg.types.Types.NestedField.optional;
@@ -61,6 +62,9 @@ public class HiveIcebergStorageHandlerTestUtils {
TestHiveShell shell = new TestHiveShell();
shell.setHiveConfValue("hive.notification.event.poll.interval", "-1");
shell.setHiveConfValue("hive.tez.exec.print.summary", "true");
+ // We would like to make sure that ORC reading overrides this config, so reading Iceberg tables could work in
+ // systems (like Hive 3.2 and higher) where this value is set to true explicitly.
+ shell.setHiveConfValue(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), "true");
shell.start();
return shell;
}
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
index e474b65..3fcf307 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java
@@ -78,7 +78,8 @@ public class TestHiveShell {
}
public void start() {
- metastore.start();
+ // Create a copy of the HiveConf for the metastore
+ metastore.start(new HiveConf(hs2Conf));
hs2Conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS));
hs2Conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE));
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORC.java b/orc/src/main/java/org/apache/iceberg/orc/ORC.java
index 05b968d..aff66e6 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ORC.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ORC.java
@@ -148,6 +148,9 @@ public class ORC {
} else {
this.conf = new Configuration();
}
+
+ // We need to turn positional schema evolution off since we use column name based schema evolution for projection
+ this.conf.setBoolean(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), false);
}
/**