You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2020/01/14 18:04:52 UTC

[impala] 02/02: IMPALA-8046: Support CREATE TABLE from an ORC file

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0511b44f9236c655e695185b33f412ec65a80a07
Author: norbert.luksa <no...@cloudera.com>
AuthorDate: Thu Dec 19 09:32:34 2019 +0100

    IMPALA-8046: Support CREATE TABLE from an ORC file
    
    Impala supports creating a table using the schema of a file.
    However, only Parquet is supported currently. This commit adds
    support for creating tables from ORC files
    
    The change relies on the ORC Java API with version 1.5 or
    greater, because of a bug in earlier versions. Therefore, ORC is
    listed as an external dependency, instead of relying on Hive's
    ORC version (from Hive3, Hive also lists it as a dependency).
    
    Also, the commit performs a little clean-up on the ParquetHelper
    class, renaming it to ParquetSchemaExtractor and removing outdated
    comments.
    
    To create a table from an ORC file, run:
    CREATE TABLE tablename LIKE ORC '/path/to/file'
    
    Tests:
     * Added analysis tests for primitive and complex types.
     * Added e2e tests for creating tables from ORC files.
    
    Change-Id: I77cd84cda2ed86516937a67eb320fd41e3f1cf2d
    Reviewed-on: http://gerrit.cloudera.org:8080/14811
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               |   1 +
 fe/pom.xml                                         |  26 +++
 .../impala/analysis/CreateTableLikeFileStmt.java   |  14 +-
 .../apache/impala/analysis/OrcSchemaExtractor.java | 200 +++++++++++++++++++++
 ...quetHelper.java => ParquetSchemaExtractor.java} |  38 ++--
 .../org/apache/impala/common/FileSystemUtil.java   |   8 +
 .../org/apache/impala/util/FileAnalysisUtil.java   |  50 ++++++
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  74 ++++++--
 impala-parent/pom.xml                              |   1 +
 shaded-deps/pom.xml                                |   1 +
 .../QueryTest/create-table-like-file-orc.test      |  89 +++++++++
 .../queries/QueryTest/create-table-like-file.test  |  37 ----
 .../queries/QueryTest/create-table-like-table.test |  27 +++
 tests/common/skip.py                               |   3 +-
 tests/metadata/test_ddl.py                         |   7 +
 15 files changed, 495 insertions(+), 81 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9848505..4758da4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -190,6 +190,7 @@ export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
 export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
+export IMPALA_ORC_JAVA_VERSION=1.6.2
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
 # which will be substituted for the CDH platform label in bootstrap_toolchain.py
diff --git a/fe/pom.xml b/fe/pom.xml
index 046851d..d75d1c2 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -306,6 +306,22 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.orc</groupId>
+      <artifactId>orc-core</artifactId>
+      <version>${orc.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </exclusion>
+       <exclusion>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
       <version>${hbase.version}</version>
@@ -713,6 +729,7 @@ under the License.
                     <exclude>org.apache.kudu:*</exclude>
                     <exclude>org.apache.sentry:*</exclude>
                     <exclude>org.apache.parquet:*</exclude>
+                    <exclude>org.apache.orc:*</exclude>
                   </excludes>
                   <includes>
                     <!-- hadoop-yarn-common depends on some Jetty utilities. -->
@@ -725,6 +742,7 @@ under the License.
                     <include>org.apache.kudu:*:${kudu.version}</include>
                     <include>org.apache.sentry:*:${sentry.version}</include>
                     <include>org.apache.parquet:*:${parquet.version}</include>
+                    <include>org.apache.orc:*:${orc.version}</include>
                   </includes>
                 </bannedDependencies>
               </rules>
@@ -946,6 +964,14 @@ under the License.
               <groupId>org.apache.ant</groupId>
               <artifactId>*</artifactId>
             </exclusion>
+            <exclusion>
+              <groupId>orc</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>org.apache.orc</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
index 5053572..2d034b6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
@@ -26,6 +26,7 @@ import org.apache.impala.catalog.HdfsCompression;
 import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.THdfsFileFormat;
 
 
@@ -71,11 +72,18 @@ public class CreateTableLikeFileStmt extends CreateTableStmt {
     schemaLocation_.analyze(analyzer, Privilege.ALL, FsAction.READ);
     switch (schemaFileFormat_) {
       case PARQUET:
-        getColumnDefs().addAll(ParquetHelper.extractParquetSchema(schemaLocation_));
+        getColumnDefs().addAll(ParquetSchemaExtractor.extract(schemaLocation_));
+        break;
+      case ORC:
+        if (MetastoreShim.getMajorVersion() < 3) {
+          throw new AnalysisException("Creating table like ORC file is unsupported for " +
+              "Hive with version < 3");
+        }
+        getColumnDefs().addAll(OrcSchemaExtractor.extract(schemaLocation_));
         break;
       default:
-        throw new AnalysisException("Unsupported file type for schema inference: "
-            + schemaFileFormat_);
+        throw new AnalysisException("Unsupported file type for schema inference: " +
+            schemaFileFormat_);
     }
     super.analyze(analyzer);
   }
diff --git a/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
new file mode 100644
index 0000000..9515ff4
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.catalog.MapType;
+import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.StructType;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcFile.ReaderOptions;
+import org.apache.orc.Reader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.TypeDescription.Category;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * ORC file. Details of the ORC types:
+ * https://orc.apache.org/docs/types.html
+ */
+public class OrcSchemaExtractor {
+  private final static String ERROR_MSG =
+      "Failed to convert ORC type\n%s\nto an Impala %s type:\n%s\n";
+
+  /**
+   * Validates the path and loads the ORC schema of the file. The ORC schema is also an
+   * ORC type (TypeDescription), represented as a struct.
+   */
+  private static TypeDescription loadOrcSchema(Path pathToFile) throws AnalysisException {
+    FileAnalysisUtil.CheckIfFile(pathToFile);
+    Reader reader = null;
+    try {
+      reader = OrcFile.createReader(pathToFile,
+          new ReaderOptions(FileSystemUtil.getConfiguration()));
+    } catch (IOException e) {
+      // OrcFile.createReader throws IOException in case of any failure, including trying
+      // to open a non-ORC file.
+      throw new AnalysisException("Failed to open file as an ORC file: " + e);
+    }
+    return reader.getSchema();
+  }
+
+  /**
+   * Converts a primitive ORC type to an Impala Type.
+   */
+  static private Type convertPrimitiveOrcType(TypeDescription type) {
+    Category category = type.getCategory();
+    Preconditions.checkState(category.isPrimitive());
+    switch (category) {
+      case BINARY: return Type.STRING;
+      case BOOLEAN: return Type.BOOLEAN;
+      case BYTE: return Type.TINYINT;
+      case CHAR: return ScalarType.createCharType(type.getMaxLength());
+      case DATE: return Type.DATE;
+      case DECIMAL:
+        return ScalarType.createDecimalType(type.getPrecision(), type.getScale());
+      case DOUBLE: return Type.DOUBLE;
+      case FLOAT: return Type.FLOAT;
+      case INT: return Type.INT;
+      case LONG: return Type.BIGINT;
+      case SHORT: return Type.SMALLINT;
+      case STRING: return Type.STRING;
+      case TIMESTAMP: return Type.TIMESTAMP;
+      case VARCHAR: return ScalarType.createVarcharType(type.getMaxLength());
+      default:
+        Preconditions.checkState(false,
+            "Unexpected ORC primitive type: " + category.getName());
+        return null;
+    }
+  }
+
+  /**
+   * Converts an ORC list type to an Impala array Type. An ORC list contains one child,
+   * the TypeDescription of the elements.
+   */
+  private static ArrayType convertArray(TypeDescription listType)
+      throws AnalysisException {
+    Preconditions.checkState(listType.getChildren().size() == 1);
+    return new ArrayType(convertOrcType(listType.getChildren().get(0)));
+  }
+
+  /**
+   * Converts an ORC map type to an Impala map Type. An ORC map contains two children,
+   * the TypeDescriptions for the keys and values.
+   */
+  private static MapType convertMap(TypeDescription mapType) throws AnalysisException {
+    // ORC maps have two children, one for the keys, one for the values.
+    Preconditions.checkState(mapType.getChildren().size() == 2);
+
+    TypeDescription key = mapType.getChildren().get(0);
+    TypeDescription value = mapType.getChildren().get(1);
+
+    if (!key.getCategory().isPrimitive()) {
+      throw new AnalysisException(String.format(ERROR_MSG, mapType.toString(), "MAP",
+          "The key type of the MAP type must be primitive."));
+    }
+
+    return new MapType(convertOrcType(key), convertOrcType(value));
+  }
+
+  /**
+   * Converts an ORC struct type to an Impala struct Type.
+   */
+  private static StructType convertStruct(TypeDescription structType)
+      throws AnalysisException {
+    List<StructField> structFields = new ArrayList<>();
+    List<String> fieldNames = structType.getFieldNames();
+    List<TypeDescription> subTypes = structType.getChildren();
+    Preconditions.checkState(subTypes.size() == fieldNames.size());
+    for (int i = 0; i < subTypes.size(); i++) {
+      StructField f = new StructField(fieldNames.get(i), convertOrcType(subTypes.get(i)));
+      structFields.add(f);
+    }
+    return new StructType(structFields);
+  }
+
+  /**
+   * Converts a non-primitive ORC type to an Impala Type.
+   */
+  static private Type convertComplexOrcType(TypeDescription type)
+      throws AnalysisException {
+    Category category = type.getCategory();
+    Preconditions.checkState(!category.isPrimitive());
+
+    switch (category) {
+      case LIST: return convertArray(type);
+      case MAP: return convertMap(type);
+      case STRUCT: return convertStruct(type);
+      case UNION:
+        throw new AnalysisException(
+            "Unsupported ORC type UNION for field " + category.getName());
+      default:
+        Preconditions.checkState(false,
+            "Unexpected ORC primitive type: " + category.getName());
+        return null;
+    }
+  }
+
+  /**
+   * Converts an ORC type to an Impala Type.
+   */
+  static private Type convertOrcType(TypeDescription type) throws AnalysisException {
+    if (type.getCategory().isPrimitive()) {
+      return convertPrimitiveOrcType(type);
+    } else {
+      return convertComplexOrcType(type);
+    }
+  }
+
+  /**
+   * Parses an ORC file stored in HDFS and returns the corresponding Impala schema.
+   * This fails with an analysis exception if any errors occur reading the file,
+   * parsing the ORC schema, or if the ORC types cannot be represented in Impala.
+   */
+  static public List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+    List<ColumnDef> schema = new ArrayList<>();
+    TypeDescription orcSchema = loadOrcSchema(location.getPath()); // Returns a STRUCT.
+    List<TypeDescription> subTypes = orcSchema.getChildren();
+    List<String> fieldNames = orcSchema.getFieldNames();
+    Preconditions.checkState(subTypes.size() == fieldNames.size());
+    for (int i = 0; i < subTypes.size(); i++) {
+      TypeDescription orcType = subTypes.get(i);
+      Type type = convertOrcType(orcType);
+      Preconditions.checkNotNull(type);
+      String colName = fieldNames.get(i);
+      Map<ColumnDef.Option, Object> option = new HashMap<>();
+      option.put(ColumnDef.Option.COMMENT, "Inferred from ORC file.");
+      schema.add(new ColumnDef(colName, new TypeDef(type), option));
+    }
+    return schema;
+  }
+}
diff --git a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
similarity index 92%
rename from fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
rename to fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
index dc55a34..0e2d15a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
@@ -25,14 +25,14 @@ import java.util.List;
 import java.util.Map;
 
 import com.google.common.base.Preconditions;
-import org.apache.hadoop.fs.FileSystem;
+
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.LogicalTypeAnnotation.*;
-
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.MapType;
 import org.apache.impala.catalog.ScalarType;
@@ -41,16 +41,13 @@ import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
 
 /**
- * Provides extractParquetSchema() to extract a schema
- * from a parquet file.
- *
- * Because Parquet's Java package changed between Parquet 1.5
- * and 1.9, a second copy of this file, with "org.apache.parquet." replaced
- * with "org.apache.org.apache.parquet." is generated by the build system.
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * Parquet file.
  */
-class ParquetHelper {
+class ParquetSchemaExtractor {
   private final static String ERROR_MSG =
       "Failed to convert Parquet type\n%s\nto an Impala %s type:\n%s\n";
 
@@ -61,21 +58,11 @@ class ParquetHelper {
    */
   private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile)
       throws AnalysisException {
-    try {
-      FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
-      if (!fs.isFile(pathToFile)) {
-        throw new AnalysisException("Cannot infer schema, path is not a file: " +
-                                    pathToFile);
-      }
-    } catch (IOException e) {
-      throw new AnalysisException("Failed to connect to filesystem:" + e);
-    } catch (IllegalArgumentException e) {
-      throw new AnalysisException(e.getMessage());
-    }
+    FileAnalysisUtil.CheckIfFile(pathToFile);
     ParquetMetadata readFooter = null;
     try {
       readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
-          pathToFile);
+          pathToFile, ParquetMetadataConverter.NO_FILTER);
     } catch (FileNotFoundException e) {
       throw new AnalysisException("File not found: " + e);
     } catch (IOException e) {
@@ -95,7 +82,8 @@ class ParquetHelper {
    * Converts a "primitive" Parquet type to an Impala type.
    * A primitive type is a non-nested type with no annotations.
    */
-  private static Type convertPrimitiveParquetType(org.apache.parquet.schema.Type parquetType)
+  private static Type convertPrimitiveParquetType(
+      org.apache.parquet.schema.Type parquetType)
       throws AnalysisException {
     Preconditions.checkState(parquetType.isPrimitive());
     PrimitiveType prim = parquetType.asPrimitiveType();
@@ -339,9 +327,9 @@ class ParquetHelper {
    * This fails with an analysis exception if any errors occur reading the file,
    * parsing the Parquet schema, or if the Parquet types cannot be represented in Impala.
    */
-  static List<ColumnDef> extractParquetSchema(HdfsUri location)
-      throws AnalysisException {
-    org.apache.parquet.schema.MessageType parquetSchema = loadParquetSchema(location.getPath());
+  static List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+    org.apache.parquet.schema.MessageType parquetSchema =
+        loadParquetSchema(location.getPath());
     List<org.apache.parquet.schema.Type> fields = parquetSchema.getFields();
     List<ColumnDef> schema = new ArrayList<>();
 
diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 7eccd13..f77fd55 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -599,6 +599,14 @@ public class FileSystemUtil {
   }
 
   /**
+   * Returns true if the path 'p' is a file, false if not. Throws if path does not exist.
+   */
+  public static boolean isFile(Path p) throws IOException, FileNotFoundException {
+    FileSystem fs = getFileSystemForPath(p);
+    return fs.getFileStatus(p).isFile();
+  }
+
+  /**
    * Return the path of 'path' relative to the startPath. This may
    * differ from simply the file name in the case of recursive listings.
    */
diff --git a/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
new file mode 100644
index 0000000..d4abb5e
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+
+/**
+ * Provides common utilities for ORCSchemeExtractor and ParquetSchemeExtractor.
+ */
+public class FileAnalysisUtil {
+
+  /**
+   * Throws if the given path is not a file.
+   */
+  public static void CheckIfFile(Path pathToFile) throws AnalysisException {
+    try {
+      if (!FileSystemUtil.isFile(pathToFile)) {
+        throw new AnalysisException("Cannot infer schema, path is not a file: " +
+            pathToFile);
+      }
+    } catch (FileNotFoundException e) {
+      throw new AnalysisException("Cannot infer schema, path does not exist: " +
+          pathToFile);
+    } catch (IOException e) {
+      throw new AnalysisException("Failed to connect to filesystem:" + e);
+    } catch (IllegalArgumentException e) {
+      throw new AnalysisException(e.getMessage());
+    }
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index dff57a2..8a680b8 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -54,9 +54,8 @@ import org.apache.impala.thrift.TBackendGflags;
 import org.apache.impala.thrift.TDescribeTableParams;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.MetaStoreUtil;
-import org.junit.AfterClass;
 import org.junit.Assert;
-import org.junit.BeforeClass;
+import org.junit.Assume;
 import org.junit.Test;
 
 import com.google.common.base.Joiner;
@@ -1994,27 +1993,27 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "Database does not exist: database_DNE");
 
     // check invalid paths
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'/test-warehouse'",
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'/test-warehouse'",
         "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
     AnalysisError("create table newtbl_DNE like parquet 'foobar'",
         "URI path must be absolute: foobar");
     AnalysisError("create table newtbl_DNE like parquet '/not/a/file/path'",
-        "Cannot infer schema, path is not a file: "
-        + "hdfs://localhost:20500/not/a/file/path");
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'file:///tmp/foobar'",
-        "Cannot infer schema, path is not a file: file:/tmp/foobar");
+        "Cannot infer schema, path does not exist: " +
+        "hdfs://localhost:20500/not/a/file/path");
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'file:///tmp/foobar'",
+        "Cannot infer schema, path does not exist: file:/tmp/foobar");
 
     // check valid paths with bad file contents
-    AnalysisError("create table database_DNE.newtbl_DNE like parquet "
-        + "'/test-warehouse/zipcode_incomes_rc/000000_0'",
-        "File is not a parquet file: "
-        + "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
+    AnalysisError("create table database_DNE.newtbl_DNE like parquet " +
+        "'/test-warehouse/zipcode_incomes_rc/000000_0'",
+        "File is not a parquet file: " +
+        "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
 
     // this is a decimal file without annotations
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
         "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field c1");
 
     // Invalid file format
@@ -2024,6 +2023,51 @@ public class AnalyzeDDLTest extends FrontendTestBase {
 
 
     BackendConfig.INSTANCE.setZOrderSortUnlocked(false);
+
+  }
+
+  @Test
+  public void TestCreateTableLikeFileOrc() throws AnalysisException {
+    Assume.assumeTrue(
+        "Skipping this test; CREATE TABLE LIKE ORC is only supported when running " +
+            "against Hive-3 or greater", TestUtils.getHiveMajorVersion() >= 3);
+
+    AnalysisError("create table database_DNE.newtbl_DNE like ORC " +
+        "'/test-warehouse/schemas/alltypestiny.parquet'",
+        "Failed to open file as an ORC file: org.apache.orc.FileFormatException: " +
+        "Malformed ORC file " +
+        "hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet" +
+        ". Invalid postscript.");
+
+    // Inferring primitive and complex types
+    AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'");
+    AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/functional_orc_def.db/complextypes_fileformat/000000_0'");
+
+    // check invalid paths
+    AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+        "'/test-warehouse'",
+        "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
+    AnalysisError("create table newtbl_DNE like ORC 'foobar'",
+        "URI path must be absolute: foobar");
+    AnalysisError("create table newtbl_DNE like ORC '/not/a/file/path'",
+        "Cannot infer schema, path does not exist: " +
+        "hdfs://localhost:20500/not/a/file/path");
+    AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+        "'file:///tmp/foobar'",
+        "Cannot infer schema, path does not exist: file:/tmp/foobar");
+  }
+
+  @Test
+  public void TestCreateTableLikeFileOrcWithHive2() throws AnalysisException {
+    // Testing if error is thrown when trying to create table like orc file with Hive-2.
+    Assume.assumeTrue(TestUtils.getHiveMajorVersion() < 3);
+
+    // Inferring primitive and complex types
+    AnalysisError("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'",
+        "Creating table like ORC file is unsupported for Hive with version < 3");
   }
 
   @Test
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index cbb6c7b..f8cc667 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -38,6 +38,7 @@ under the License.
     <postgres.jdbc.version>${env.IMPALA_POSTGRES_JDBC_DRIVER_VERSION}</postgres.jdbc.version>
     <sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
     <hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
+    <orc.version>${env.IMPALA_ORC_JAVA_VERSION}</orc.version>
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
     <kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
     <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 5870894..eefd73b 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -94,6 +94,7 @@ the same dependencies
                 <include>org/apache/hadoop/hive/serde2/**</include>
                 <include>org/apache/hive/service/rpc/thrift/**</include>
                 <include>org/apache/hive/common/HiveVersionAnnotation.class</include>
+                <include>org/apache/orc/**</include>
                 <include>com/google/**</include>
               </includes>
             </filter>
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
new file mode 100644
index 0000000..71901ca
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
@@ -0,0 +1,89 @@
+====
+---- QUERY
+create table $DATABASE.temp_decimal_table_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/decimal_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_decimal_table_orc
+---- RESULTS
+'c1','decimal(10,4)','Inferred from ORC file.'
+'c2','decimal(15,5)','Inferred from ORC file.'
+'c3','decimal(1,1)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.temp_chars_table like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_chars_table
+---- RESULTS
+'cs','char(5)','Inferred from ORC file.'
+'cl','char(140)','Inferred from ORC file.'
+'vc','varchar(32)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_zipcodes_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/zipcode_incomes_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_zipcodes_file_orc
+---- RESULTS
+'id','string','Inferred from ORC file.'
+'zip','string','Inferred from ORC file.'
+'description1','string','Inferred from ORC file.'
+'description2','string','Inferred from ORC file.'
+'income','int','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_alltypestiny_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_alltypestiny_file_orc
+---- RESULTS
+'id','int','Inferred from ORC file.'
+'bool_col','boolean','Inferred from ORC file.'
+'tinyint_col','tinyint','Inferred from ORC file.'
+'smallint_col','smallint','Inferred from ORC file.'
+'int_col','int','Inferred from ORC file.'
+'bigint_col','bigint','Inferred from ORC file.'
+'float_col','float','Inferred from ORC file.'
+'double_col','double','Inferred from ORC file.'
+'date_string_col','string','Inferred from ORC file.'
+'string_col','string','Inferred from ORC file.'
+'timestamp_col','timestamp','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table allcomplextypes_clone_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/nullable.orc'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone_orc
+---- RESULTS
+'id','bigint','Inferred from ORC file.'
+'int_array','array<int>','Inferred from ORC file.'
+'int_array_array','array<array<int>>','Inferred from ORC file.'
+'int_map','map<string,int>','Inferred from ORC file.'
+'int_map_array','array<map<string,int>>','Inferred from ORC file.'
+'nested_struct','struct<\n  a:int,\n  b:array<int>,\n  c:struct<\n    d:array<array<struct<\n      e:int,\n      f:string\n    >>>\n  >,\n  g:map<string,struct<\n    h:struct<\n      i:array<double>\n    >\n  >>\n>','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
index 7a80602..fd81aee 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
@@ -106,43 +106,6 @@ describe $DATABASE.like_alltypestiny_file
 STRING, STRING, STRING
 ====
 ---- QUERY
-drop table if exists allcomplextypes_clone
----- RESULTS
-'Table does not exist.'
-====
----- QUERY
-create table allcomplextypes_clone like functional.allcomplextypes
-stored as parquet
----- RESULTS
-'Table has been created.'
-====
----- QUERY
-describe allcomplextypes_clone
----- RESULTS
-'id','int',''
-'int_array_col','array<int>',''
-'array_array_col','array<array<int>>',''
-'map_array_col','array<map<string,int>>',''
-'struct_array_col','array<struct<\n  f1:bigint,\n  f2:string\n>>',''
-'int_map_col','map<string,int>',''
-'array_map_col','map<string,array<int>>',''
-'map_map_col','map<string,map<string,int>>',''
-'struct_map_col','map<string,struct<\n  f1:bigint,\n  f2:string\n>>',''
-'int_struct_col','struct<\n  f1:int,\n  f2:int\n>',''
-'complex_struct_col','struct<\n  f1:int,\n  f2:array<int>,\n  f3:map<string,int>\n>',''
-'nested_struct_col','struct<\n  f1:int,\n  f2:struct<\n    f11:bigint,\n    f12:struct<\n      f21:bigint\n    >\n  >\n>',''
-'complex_nested_struct_col','struct<\n  f1:int,\n  f2:array<struct<\n    f11:bigint,\n    f12:map<string,struct<\n      f21:bigint\n    >>\n  >>\n>',''
-'year','int',''
-'month','int',''
----- TYPES
-STRING, STRING, STRING
-====
----- QUERY
-drop table allcomplextypes_clone
----- RESULTS
-'Table has been dropped.'
-====
----- QUERY
 drop table if exists $DATABASE.temp_legacy_table
 ---- RESULTS
 'Table does not exist.'
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
index ee16c37..456f499 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
@@ -251,3 +251,30 @@ describe formatted sortbytest_override;
 ---- TYPES
 STRING,STRING,STRING
 ====
+---- QUERY
+create table allcomplextypes_clone like functional.allcomplextypes
+stored as parquet
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone
+---- RESULTS
+'id','int',''
+'int_array_col','array<int>',''
+'array_array_col','array<array<int>>',''
+'map_array_col','array<map<string,int>>',''
+'struct_array_col','array<struct<\n  f1:bigint,\n  f2:string\n>>',''
+'int_map_col','map<string,int>',''
+'array_map_col','map<string,array<int>>',''
+'map_map_col','map<string,map<string,int>>',''
+'struct_map_col','map<string,struct<\n  f1:bigint,\n  f2:string\n>>',''
+'int_struct_col','struct<\n  f1:int,\n  f2:int\n>',''
+'complex_struct_col','struct<\n  f1:int,\n  f2:array<int>,\n  f3:map<string,int>\n>',''
+'nested_struct_col','struct<\n  f1:int,\n  f2:struct<\n    f11:bigint,\n    f12:struct<\n      f21:bigint\n    >\n  >\n>',''
+'complex_nested_struct_col','struct<\n  f1:int,\n  f2:array<struct<\n    f11:bigint,\n    f12:map<string,struct<\n      f21:bigint\n    >>\n  >>\n>',''
+'year','int',''
+'month','int',''
+---- TYPES
+STRING, STRING, STRING
+====
\ No newline at end of file
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 2ab4250..3729649 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -227,7 +227,8 @@ class SkipIfHive2:
   create_external_kudu_table = pytest.mark.skipif(HIVE_MAJOR_VERSION == 2,
       reason="Hive 2 does not support creating external.table.purge Kudu tables."
              " See IMPALA-9092 for details.")
-
+  orc = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
+      reason="CREATE TABLE LIKE ORC is only supported with Hive version >= 3")
 
 class SkipIfCatalogV2:
   """Expose decorators as methods so that is_catalog_v2_cluster() can be evaluated lazily
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index dbb82f9..25f7032 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -294,6 +294,13 @@ class TestDdlStatements(TestDdlBase):
     self.run_test_case('QueryTest/create-table-like-file', vector,
         use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
 
+  @SkipIfHive2.orc
+  @UniqueDatabase.parametrize(sync_ddl=True)
+  def test_create_table_like_file_orc(self, vector, unique_database):
+    vector.get_value('exec_option')['abort_on_error'] = False
+    self.run_test_case('QueryTest/create-table-like-file-orc', vector,
+        use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
+
   @UniqueDatabase.parametrize(sync_ddl=True)
   def test_create_table_as_select(self, vector, unique_database):
     vector.get_value('exec_option')['abort_on_error'] = False