You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2020/01/14 18:04:52 UTC
[impala] 02/02: IMPALA-8046: Support CREATE TABLE from an ORC file
This is an automated email from the ASF dual-hosted git repository.
tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0511b44f9236c655e695185b33f412ec65a80a07
Author: norbert.luksa <no...@cloudera.com>
AuthorDate: Thu Dec 19 09:32:34 2019 +0100
IMPALA-8046: Support CREATE TABLE from an ORC file
Impala supports creating a table using the schema of a file.
However, only Parquet is supported currently. This commit adds
support for creating tables from ORC files
The change relies on the ORC Java API with version 1.5 or
greater, because of a bug in earlier versions. Therefore, ORC is
listed as an external dependency, instead of relying on Hive's
ORC version (from Hive3, Hive also lists it as a dependency).
Also, the commit performs a little clean-up on the ParquetHelper
class, renaming it to ParquetSchemaExtractor and removing outdated
comments.
To create a table from an ORC file, run:
CREATE TABLE tablename LIKE ORC '/path/to/file'
Tests:
* Added analysis tests for primitive and complex types.
* Added e2e tests for creating tables from ORC files.
Change-Id: I77cd84cda2ed86516937a67eb320fd41e3f1cf2d
Reviewed-on: http://gerrit.cloudera.org:8080/14811
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/impala-config.sh | 1 +
fe/pom.xml | 26 +++
.../impala/analysis/CreateTableLikeFileStmt.java | 14 +-
.../apache/impala/analysis/OrcSchemaExtractor.java | 200 +++++++++++++++++++++
...quetHelper.java => ParquetSchemaExtractor.java} | 38 ++--
.../org/apache/impala/common/FileSystemUtil.java | 8 +
.../org/apache/impala/util/FileAnalysisUtil.java | 50 ++++++
.../org/apache/impala/analysis/AnalyzeDDLTest.java | 74 ++++++--
impala-parent/pom.xml | 1 +
shaded-deps/pom.xml | 1 +
.../QueryTest/create-table-like-file-orc.test | 89 +++++++++
.../queries/QueryTest/create-table-like-file.test | 37 ----
.../queries/QueryTest/create-table-like-table.test | 27 +++
tests/common/skip.py | 3 +-
tests/metadata/test_ddl.py | 7 +
15 files changed, 495 insertions(+), 81 deletions(-)
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9848505..4758da4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -190,6 +190,7 @@ export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
+export IMPALA_ORC_JAVA_VERSION=1.6.2
# When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
# which will be substituted for the CDH platform label in bootstrap_toolchain.py
diff --git a/fe/pom.xml b/fe/pom.xml
index 046851d..d75d1c2 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -306,6 +306,22 @@ under the License.
</dependency>
<dependency>
+ <groupId>org.apache.orc</groupId>
+ <artifactId>orc-core</artifactId>
+ <version>${orc.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
@@ -713,6 +729,7 @@ under the License.
<exclude>org.apache.kudu:*</exclude>
<exclude>org.apache.sentry:*</exclude>
<exclude>org.apache.parquet:*</exclude>
+ <exclude>org.apache.orc:*</exclude>
</excludes>
<includes>
<!-- hadoop-yarn-common depends on some Jetty utilities. -->
@@ -725,6 +742,7 @@ under the License.
<include>org.apache.kudu:*:${kudu.version}</include>
<include>org.apache.sentry:*:${sentry.version}</include>
<include>org.apache.parquet:*:${parquet.version}</include>
+ <include>org.apache.orc:*:${orc.version}</include>
</includes>
</bannedDependencies>
</rules>
@@ -946,6 +964,14 @@ under the License.
<groupId>org.apache.ant</groupId>
<artifactId>*</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>orc</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.orc</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
</exclusions>
</dependency>
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
index 5053572..2d034b6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
@@ -26,6 +26,7 @@ import org.apache.impala.catalog.HdfsCompression;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.Pair;
+import org.apache.impala.compat.MetastoreShim;
import org.apache.impala.thrift.THdfsFileFormat;
@@ -71,11 +72,18 @@ public class CreateTableLikeFileStmt extends CreateTableStmt {
schemaLocation_.analyze(analyzer, Privilege.ALL, FsAction.READ);
switch (schemaFileFormat_) {
case PARQUET:
- getColumnDefs().addAll(ParquetHelper.extractParquetSchema(schemaLocation_));
+ getColumnDefs().addAll(ParquetSchemaExtractor.extract(schemaLocation_));
+ break;
+ case ORC:
+ if (MetastoreShim.getMajorVersion() < 3) {
+ throw new AnalysisException("Creating table like ORC file is unsupported for " +
+ "Hive with version < 3");
+ }
+ getColumnDefs().addAll(OrcSchemaExtractor.extract(schemaLocation_));
break;
default:
- throw new AnalysisException("Unsupported file type for schema inference: "
- + schemaFileFormat_);
+ throw new AnalysisException("Unsupported file type for schema inference: " +
+ schemaFileFormat_);
}
super.analyze(analyzer);
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
new file mode 100644
index 0000000..9515ff4
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.catalog.MapType;
+import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.StructType;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcFile.ReaderOptions;
+import org.apache.orc.Reader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.TypeDescription.Category;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * ORC file. Details of the ORC types:
+ * https://orc.apache.org/docs/types.html
+ */
+public class OrcSchemaExtractor {
+ private final static String ERROR_MSG =
+ "Failed to convert ORC type\n%s\nto an Impala %s type:\n%s\n";
+
+ /**
+ * Validates the path and loads the ORC schema of the file. The ORC schema is also an
+ * ORC type (TypeDescription), represented as a struct.
+ */
+ private static TypeDescription loadOrcSchema(Path pathToFile) throws AnalysisException {
+ FileAnalysisUtil.CheckIfFile(pathToFile);
+ Reader reader = null;
+ try {
+ reader = OrcFile.createReader(pathToFile,
+ new ReaderOptions(FileSystemUtil.getConfiguration()));
+ } catch (IOException e) {
+ // OrcFile.createReader throws IOException in case of any failure, including trying
+ // to open a non-ORC file.
+ throw new AnalysisException("Failed to open file as an ORC file: " + e);
+ }
+ return reader.getSchema();
+ }
+
+ /**
+ * Converts a primitive ORC type to an Impala Type.
+ */
+ static private Type convertPrimitiveOrcType(TypeDescription type) {
+ Category category = type.getCategory();
+ Preconditions.checkState(category.isPrimitive());
+ switch (category) {
+ case BINARY: return Type.STRING;
+ case BOOLEAN: return Type.BOOLEAN;
+ case BYTE: return Type.TINYINT;
+ case CHAR: return ScalarType.createCharType(type.getMaxLength());
+ case DATE: return Type.DATE;
+ case DECIMAL:
+ return ScalarType.createDecimalType(type.getPrecision(), type.getScale());
+ case DOUBLE: return Type.DOUBLE;
+ case FLOAT: return Type.FLOAT;
+ case INT: return Type.INT;
+ case LONG: return Type.BIGINT;
+ case SHORT: return Type.SMALLINT;
+ case STRING: return Type.STRING;
+ case TIMESTAMP: return Type.TIMESTAMP;
+ case VARCHAR: return ScalarType.createVarcharType(type.getMaxLength());
+ default:
+ Preconditions.checkState(false,
+ "Unexpected ORC primitive type: " + category.getName());
+ return null;
+ }
+ }
+
+ /**
+ * Converts an ORC list type to an Impala array Type. An ORC list contains one child,
+ * the TypeDescription of the elements.
+ */
+ private static ArrayType convertArray(TypeDescription listType)
+ throws AnalysisException {
+ Preconditions.checkState(listType.getChildren().size() == 1);
+ return new ArrayType(convertOrcType(listType.getChildren().get(0)));
+ }
+
+ /**
+ * Converts an ORC map type to an Impala map Type. An ORC map contains two children,
+ * the TypeDescriptions for the keys and values.
+ */
+ private static MapType convertMap(TypeDescription mapType) throws AnalysisException {
+ // ORC maps have two children, one for the keys, one for the values.
+ Preconditions.checkState(mapType.getChildren().size() == 2);
+
+ TypeDescription key = mapType.getChildren().get(0);
+ TypeDescription value = mapType.getChildren().get(1);
+
+ if (!key.getCategory().isPrimitive()) {
+ throw new AnalysisException(String.format(ERROR_MSG, mapType.toString(), "MAP",
+ "The key type of the MAP type must be primitive."));
+ }
+
+ return new MapType(convertOrcType(key), convertOrcType(value));
+ }
+
+ /**
+ * Converts an ORC struct type to an Impala struct Type.
+ */
+ private static StructType convertStruct(TypeDescription structType)
+ throws AnalysisException {
+ List<StructField> structFields = new ArrayList<>();
+ List<String> fieldNames = structType.getFieldNames();
+ List<TypeDescription> subTypes = structType.getChildren();
+ Preconditions.checkState(subTypes.size() == fieldNames.size());
+ for (int i = 0; i < subTypes.size(); i++) {
+ StructField f = new StructField(fieldNames.get(i), convertOrcType(subTypes.get(i)));
+ structFields.add(f);
+ }
+ return new StructType(structFields);
+ }
+
+ /**
+ * Converts a non-primitive ORC type to an Impala Type.
+ */
+ static private Type convertComplexOrcType(TypeDescription type)
+ throws AnalysisException {
+ Category category = type.getCategory();
+ Preconditions.checkState(!category.isPrimitive());
+
+ switch (category) {
+ case LIST: return convertArray(type);
+ case MAP: return convertMap(type);
+ case STRUCT: return convertStruct(type);
+ case UNION:
+ throw new AnalysisException(
+ "Unsupported ORC type UNION for field " + category.getName());
+ default:
+ Preconditions.checkState(false,
+ "Unexpected ORC primitive type: " + category.getName());
+ return null;
+ }
+ }
+
+ /**
+ * Converts an ORC type to an Impala Type.
+ */
+ static private Type convertOrcType(TypeDescription type) throws AnalysisException {
+ if (type.getCategory().isPrimitive()) {
+ return convertPrimitiveOrcType(type);
+ } else {
+ return convertComplexOrcType(type);
+ }
+ }
+
+ /**
+ * Parses an ORC file stored in HDFS and returns the corresponding Impala schema.
+ * This fails with an analysis exception if any errors occur reading the file,
+ * parsing the ORC schema, or if the ORC types cannot be represented in Impala.
+ */
+ static public List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+ List<ColumnDef> schema = new ArrayList<>();
+ TypeDescription orcSchema = loadOrcSchema(location.getPath()); // Returns a STRUCT.
+ List<TypeDescription> subTypes = orcSchema.getChildren();
+ List<String> fieldNames = orcSchema.getFieldNames();
+ Preconditions.checkState(subTypes.size() == fieldNames.size());
+ for (int i = 0; i < subTypes.size(); i++) {
+ TypeDescription orcType = subTypes.get(i);
+ Type type = convertOrcType(orcType);
+ Preconditions.checkNotNull(type);
+ String colName = fieldNames.get(i);
+ Map<ColumnDef.Option, Object> option = new HashMap<>();
+ option.put(ColumnDef.Option.COMMENT, "Inferred from ORC file.");
+ schema.add(new ColumnDef(colName, new TypeDef(type), option));
+ }
+ return schema;
+ }
+}
diff --git a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
similarity index 92%
rename from fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
rename to fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
index dc55a34..0e2d15a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
@@ -25,14 +25,14 @@ import java.util.List;
import java.util.Map;
import com.google.common.base.Preconditions;
-import org.apache.hadoop.fs.FileSystem;
+
import org.apache.hadoop.fs.Path;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.LogicalTypeAnnotation.*;
-
import org.apache.impala.catalog.ArrayType;
import org.apache.impala.catalog.MapType;
import org.apache.impala.catalog.ScalarType;
@@ -41,16 +41,13 @@ import org.apache.impala.catalog.StructType;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
/**
- * Provides extractParquetSchema() to extract a schema
- * from a parquet file.
- *
- * Because Parquet's Java package changed between Parquet 1.5
- * and 1.9, a second copy of this file, with "org.apache.parquet." replaced
- * with "org.apache.org.apache.parquet." is generated by the build system.
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * Parquet file.
*/
-class ParquetHelper {
+class ParquetSchemaExtractor {
private final static String ERROR_MSG =
"Failed to convert Parquet type\n%s\nto an Impala %s type:\n%s\n";
@@ -61,21 +58,11 @@ class ParquetHelper {
*/
private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile)
throws AnalysisException {
- try {
- FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
- if (!fs.isFile(pathToFile)) {
- throw new AnalysisException("Cannot infer schema, path is not a file: " +
- pathToFile);
- }
- } catch (IOException e) {
- throw new AnalysisException("Failed to connect to filesystem:" + e);
- } catch (IllegalArgumentException e) {
- throw new AnalysisException(e.getMessage());
- }
+ FileAnalysisUtil.CheckIfFile(pathToFile);
ParquetMetadata readFooter = null;
try {
readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
- pathToFile);
+ pathToFile, ParquetMetadataConverter.NO_FILTER);
} catch (FileNotFoundException e) {
throw new AnalysisException("File not found: " + e);
} catch (IOException e) {
@@ -95,7 +82,8 @@ class ParquetHelper {
* Converts a "primitive" Parquet type to an Impala type.
* A primitive type is a non-nested type with no annotations.
*/
- private static Type convertPrimitiveParquetType(org.apache.parquet.schema.Type parquetType)
+ private static Type convertPrimitiveParquetType(
+ org.apache.parquet.schema.Type parquetType)
throws AnalysisException {
Preconditions.checkState(parquetType.isPrimitive());
PrimitiveType prim = parquetType.asPrimitiveType();
@@ -339,9 +327,9 @@ class ParquetHelper {
* This fails with an analysis exception if any errors occur reading the file,
* parsing the Parquet schema, or if the Parquet types cannot be represented in Impala.
*/
- static List<ColumnDef> extractParquetSchema(HdfsUri location)
- throws AnalysisException {
- org.apache.parquet.schema.MessageType parquetSchema = loadParquetSchema(location.getPath());
+ static List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+ org.apache.parquet.schema.MessageType parquetSchema =
+ loadParquetSchema(location.getPath());
List<org.apache.parquet.schema.Type> fields = parquetSchema.getFields();
List<ColumnDef> schema = new ArrayList<>();
diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 7eccd13..f77fd55 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -599,6 +599,14 @@ public class FileSystemUtil {
}
/**
+ * Returns true if the path 'p' is a file, false if not. Throws if path does not exist.
+ */
+ public static boolean isFile(Path p) throws IOException, FileNotFoundException {
+ FileSystem fs = getFileSystemForPath(p);
+ return fs.getFileStatus(p).isFile();
+ }
+
+ /**
* Return the path of 'path' relative to the startPath. This may
* differ from simply the file name in the case of recursive listings.
*/
diff --git a/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
new file mode 100644
index 0000000..d4abb5e
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+
+/**
+ * Provides common utilities for ORCSchemeExtractor and ParquetSchemeExtractor.
+ */
+public class FileAnalysisUtil {
+
+ /**
+ * Throws if the given path is not a file.
+ */
+ public static void CheckIfFile(Path pathToFile) throws AnalysisException {
+ try {
+ if (!FileSystemUtil.isFile(pathToFile)) {
+ throw new AnalysisException("Cannot infer schema, path is not a file: " +
+ pathToFile);
+ }
+ } catch (FileNotFoundException e) {
+ throw new AnalysisException("Cannot infer schema, path does not exist: " +
+ pathToFile);
+ } catch (IOException e) {
+ throw new AnalysisException("Failed to connect to filesystem:" + e);
+ } catch (IllegalArgumentException e) {
+ throw new AnalysisException(e.getMessage());
+ }
+ }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index dff57a2..8a680b8 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -54,9 +54,8 @@ import org.apache.impala.thrift.TBackendGflags;
import org.apache.impala.thrift.TDescribeTableParams;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.util.MetaStoreUtil;
-import org.junit.AfterClass;
import org.junit.Assert;
-import org.junit.BeforeClass;
+import org.junit.Assume;
import org.junit.Test;
import com.google.common.base.Joiner;
@@ -1994,27 +1993,27 @@ public class AnalyzeDDLTest extends FrontendTestBase {
"Database does not exist: database_DNE");
// check invalid paths
- AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
- + "'/test-warehouse'",
+ AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+ "'/test-warehouse'",
"Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
AnalysisError("create table newtbl_DNE like parquet 'foobar'",
"URI path must be absolute: foobar");
AnalysisError("create table newtbl_DNE like parquet '/not/a/file/path'",
- "Cannot infer schema, path is not a file: "
- + "hdfs://localhost:20500/not/a/file/path");
- AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
- + "'file:///tmp/foobar'",
- "Cannot infer schema, path is not a file: file:/tmp/foobar");
+ "Cannot infer schema, path does not exist: " +
+ "hdfs://localhost:20500/not/a/file/path");
+ AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+ "'file:///tmp/foobar'",
+ "Cannot infer schema, path does not exist: file:/tmp/foobar");
// check valid paths with bad file contents
- AnalysisError("create table database_DNE.newtbl_DNE like parquet "
- + "'/test-warehouse/zipcode_incomes_rc/000000_0'",
- "File is not a parquet file: "
- + "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
+ AnalysisError("create table database_DNE.newtbl_DNE like parquet " +
+ "'/test-warehouse/zipcode_incomes_rc/000000_0'",
+ "File is not a parquet file: " +
+ "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
// this is a decimal file without annotations
- AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
- + "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
+ AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+ "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
"Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field c1");
// Invalid file format
@@ -2024,6 +2023,51 @@ public class AnalyzeDDLTest extends FrontendTestBase {
BackendConfig.INSTANCE.setZOrderSortUnlocked(false);
+
+ }
+
+ @Test
+ public void TestCreateTableLikeFileOrc() throws AnalysisException {
+ Assume.assumeTrue(
+ "Skipping this test; CREATE TABLE LIKE ORC is only supported when running " +
+ "against Hive-3 or greater", TestUtils.getHiveMajorVersion() >= 3);
+
+ AnalysisError("create table database_DNE.newtbl_DNE like ORC " +
+ "'/test-warehouse/schemas/alltypestiny.parquet'",
+ "Failed to open file as an ORC file: org.apache.orc.FileFormatException: " +
+ "Malformed ORC file " +
+ "hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet" +
+ ". Invalid postscript.");
+
+ // Inferring primitive and complex types
+ AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+ "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'");
+ AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+ "'/test-warehouse/functional_orc_def.db/complextypes_fileformat/000000_0'");
+
+ // check invalid paths
+ AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+ "'/test-warehouse'",
+ "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
+ AnalysisError("create table newtbl_DNE like ORC 'foobar'",
+ "URI path must be absolute: foobar");
+ AnalysisError("create table newtbl_DNE like ORC '/not/a/file/path'",
+ "Cannot infer schema, path does not exist: " +
+ "hdfs://localhost:20500/not/a/file/path");
+ AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+ "'file:///tmp/foobar'",
+ "Cannot infer schema, path does not exist: file:/tmp/foobar");
+ }
+
+ @Test
+ public void TestCreateTableLikeFileOrcWithHive2() throws AnalysisException {
+ // Testing if error is thrown when trying to create table like orc file with Hive-2.
+ Assume.assumeTrue(TestUtils.getHiveMajorVersion() < 3);
+
+ // Inferring primitive and complex types
+ AnalysisError("create table if not exists newtbl_DNE like orc " +
+ "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'",
+ "Creating table like ORC file is unsupported for Hive with version < 3");
}
@Test
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index cbb6c7b..f8cc667 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -38,6 +38,7 @@ under the License.
<postgres.jdbc.version>${env.IMPALA_POSTGRES_JDBC_DRIVER_VERSION}</postgres.jdbc.version>
<sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
+ <orc.version>${env.IMPALA_ORC_JAVA_VERSION}</orc.version>
<parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
<kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
<knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 5870894..eefd73b 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -94,6 +94,7 @@ the same dependencies
<include>org/apache/hadoop/hive/serde2/**</include>
<include>org/apache/hive/service/rpc/thrift/**</include>
<include>org/apache/hive/common/HiveVersionAnnotation.class</include>
+ <include>org/apache/orc/**</include>
<include>com/google/**</include>
</includes>
</filter>
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
new file mode 100644
index 0000000..71901ca
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
@@ -0,0 +1,89 @@
+====
+---- QUERY
+create table $DATABASE.temp_decimal_table_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/decimal_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_decimal_table_orc
+---- RESULTS
+'c1','decimal(10,4)','Inferred from ORC file.'
+'c2','decimal(15,5)','Inferred from ORC file.'
+'c3','decimal(1,1)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.temp_chars_table like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_chars_table
+---- RESULTS
+'cs','char(5)','Inferred from ORC file.'
+'cl','char(140)','Inferred from ORC file.'
+'vc','varchar(32)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_zipcodes_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/zipcode_incomes_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_zipcodes_file_orc
+---- RESULTS
+'id','string','Inferred from ORC file.'
+'zip','string','Inferred from ORC file.'
+'description1','string','Inferred from ORC file.'
+'description2','string','Inferred from ORC file.'
+'income','int','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_alltypestiny_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_alltypestiny_file_orc
+---- RESULTS
+'id','int','Inferred from ORC file.'
+'bool_col','boolean','Inferred from ORC file.'
+'tinyint_col','tinyint','Inferred from ORC file.'
+'smallint_col','smallint','Inferred from ORC file.'
+'int_col','int','Inferred from ORC file.'
+'bigint_col','bigint','Inferred from ORC file.'
+'float_col','float','Inferred from ORC file.'
+'double_col','double','Inferred from ORC file.'
+'date_string_col','string','Inferred from ORC file.'
+'string_col','string','Inferred from ORC file.'
+'timestamp_col','timestamp','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table allcomplextypes_clone_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/nullable.orc'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone_orc
+---- RESULTS
+'id','bigint','Inferred from ORC file.'
+'int_array','array<int>','Inferred from ORC file.'
+'int_array_array','array<array<int>>','Inferred from ORC file.'
+'int_map','map<string,int>','Inferred from ORC file.'
+'int_map_array','array<map<string,int>>','Inferred from ORC file.'
+'nested_struct','struct<\n a:int,\n b:array<int>,\n c:struct<\n d:array<array<struct<\n e:int,\n f:string\n >>>\n >,\n g:map<string,struct<\n h:struct<\n i:array<double>\n >\n >>\n>','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
index 7a80602..fd81aee 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
@@ -106,43 +106,6 @@ describe $DATABASE.like_alltypestiny_file
STRING, STRING, STRING
====
---- QUERY
-drop table if exists allcomplextypes_clone
----- RESULTS
-'Table does not exist.'
-====
----- QUERY
-create table allcomplextypes_clone like functional.allcomplextypes
-stored as parquet
----- RESULTS
-'Table has been created.'
-====
----- QUERY
-describe allcomplextypes_clone
----- RESULTS
-'id','int',''
-'int_array_col','array<int>',''
-'array_array_col','array<array<int>>',''
-'map_array_col','array<map<string,int>>',''
-'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
-'int_map_col','map<string,int>',''
-'array_map_col','map<string,array<int>>',''
-'map_map_col','map<string,map<string,int>>',''
-'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
-'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
-'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
-'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
-'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
-'year','int',''
-'month','int',''
----- TYPES
-STRING, STRING, STRING
-====
----- QUERY
-drop table allcomplextypes_clone
----- RESULTS
-'Table has been dropped.'
-====
----- QUERY
drop table if exists $DATABASE.temp_legacy_table
---- RESULTS
'Table does not exist.'
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
index ee16c37..456f499 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
@@ -251,3 +251,30 @@ describe formatted sortbytest_override;
---- TYPES
STRING,STRING,STRING
====
+---- QUERY
+create table allcomplextypes_clone like functional.allcomplextypes
+stored as parquet
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone
+---- RESULTS
+'id','int',''
+'int_array_col','array<int>',''
+'array_array_col','array<array<int>>',''
+'map_array_col','array<map<string,int>>',''
+'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
+'int_map_col','map<string,int>',''
+'array_map_col','map<string,array<int>>',''
+'map_map_col','map<string,map<string,int>>',''
+'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
+'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
+'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
+'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
+'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
+'year','int',''
+'month','int',''
+---- TYPES
+STRING, STRING, STRING
+====
\ No newline at end of file
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 2ab4250..3729649 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -227,7 +227,8 @@ class SkipIfHive2:
create_external_kudu_table = pytest.mark.skipif(HIVE_MAJOR_VERSION == 2,
reason="Hive 2 does not support creating external.table.purge Kudu tables."
" See IMPALA-9092 for details.")
-
+ orc = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
+ reason="CREATE TABLE LIKE ORC is only supported with Hive version >= 3")
class SkipIfCatalogV2:
"""Expose decorators as methods so that is_catalog_v2_cluster() can be evaluated lazily
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index dbb82f9..25f7032 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -294,6 +294,13 @@ class TestDdlStatements(TestDdlBase):
self.run_test_case('QueryTest/create-table-like-file', vector,
use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
+ @SkipIfHive2.orc
+ @UniqueDatabase.parametrize(sync_ddl=True)
+ def test_create_table_like_file_orc(self, vector, unique_database):
+ vector.get_value('exec_option')['abort_on_error'] = False
+ self.run_test_case('QueryTest/create-table-like-file-orc', vector,
+ use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
+
@UniqueDatabase.parametrize(sync_ddl=True)
def test_create_table_as_select(self, vector, unique_database):
vector.get_value('exec_option')['abort_on_error'] = False