You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2020/01/14 18:04:50 UTC

[impala] branch master updated (915e811 -> 0511b44)

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 915e811  IMPALA-9262: De-flake TestBlacklist::test_kill_impalad_with_running_queries
     new c359c55  IMPALA-8501: Fix race condition on port in RpcMgrTest
     new 0511b44  IMPALA-8046: Support CREATE TABLE from an ORC file

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/rpc/rpc-mgr-test.h                          |   4 +-
 bin/impala-config.sh                               |   1 +
 fe/pom.xml                                         |  26 +++
 .../impala/analysis/CreateTableLikeFileStmt.java   |  14 +-
 .../apache/impala/analysis/OrcSchemaExtractor.java | 200 +++++++++++++++++++++
 ...quetHelper.java => ParquetSchemaExtractor.java} |  38 ++--
 .../org/apache/impala/common/FileSystemUtil.java   |   8 +
 .../org/apache/impala/util/FileAnalysisUtil.java   |  50 ++++++
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  74 ++++++--
 impala-parent/pom.xml                              |   1 +
 shaded-deps/pom.xml                                |   1 +
 .../QueryTest/create-table-like-file-orc.test      |  89 +++++++++
 .../queries/QueryTest/create-table-like-file.test  |  37 ----
 .../queries/QueryTest/create-table-like-table.test |  27 +++
 tests/common/skip.py                               |   3 +-
 tests/metadata/test_ddl.py                         |   7 +
 16 files changed, 496 insertions(+), 84 deletions(-)
 create mode 100644 fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
 rename fe/src/main/java/org/apache/impala/analysis/{ParquetHelper.java => ParquetSchemaExtractor.java} (92%)
 create mode 100644 fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
 create mode 100644 testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test


[impala] 01/02: IMPALA-8501: Fix race condition on port in RpcMgrTest

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c359c557eb299174c43ed0d27ab1c2247fc94f74
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Thu Jan 9 16:18:24 2020 -0800

    IMPALA-8501: Fix race condition on port in RpcMgrTest
    
    RpcMgrTest previously would choose a random, unused port when starting
    and then eventually start a krpc service on the port. This resulted in
    a race condition where the port could be taken by another process
    between when it was chosen and when RpcMgrTest actually used it,
    causing RpcMgrTest to fail.
    
    This patch significantly reduces the probability of such an error
    happening by choosing the port immediately before it is used.
    
    Change-Id: I841196f8557f29816f5f2960f36fc7772233b975
    Reviewed-on: http://gerrit.cloudera.org:8080/14997
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/rpc/rpc-mgr-test.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/be/src/rpc/rpc-mgr-test.h b/be/src/rpc/rpc-mgr-test.h
index f559409..8f4871a 100644
--- a/be/src/rpc/rpc-mgr-test.h
+++ b/be/src/rpc/rpc-mgr-test.h
@@ -67,8 +67,6 @@ static string CURRENT_EXECUTABLE_PATH;
 
 namespace impala {
 
-static int32_t SERVICE_PORT = FindUnusedEphemeralPort();
-
 const static string IMPALA_HOME(getenv("IMPALA_HOME"));
 const string& SERVER_CERT =
     Substitute("$0/be/src/testutil/server-cert.pem", IMPALA_HOME);
@@ -137,7 +135,7 @@ class RpcMgrTest : public testing::Test {
   virtual void SetUp() {
     IpAddr ip;
     ASSERT_OK(HostnameToIpAddr(FLAGS_hostname, &ip));
-    krpc_address_ = MakeNetworkAddress(ip, SERVICE_PORT);
+    krpc_address_ = MakeNetworkAddress(ip, FindUnusedEphemeralPort());
     exec_env_.reset(new ExecEnv());
     ASSERT_OK(rpc_mgr_.Init(krpc_address_));
   }


[impala] 02/02: IMPALA-8046: Support CREATE TABLE from an ORC file

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0511b44f9236c655e695185b33f412ec65a80a07
Author: norbert.luksa <no...@cloudera.com>
AuthorDate: Thu Dec 19 09:32:34 2019 +0100

    IMPALA-8046: Support CREATE TABLE from an ORC file
    
    Impala supports creating a table using the schema of a file.
    However, only Parquet is supported currently. This commit adds
    support for creating tables from ORC files
    
    The change relies on the ORC Java API with version 1.5 or
    greater, because of a bug in earlier versions. Therefore, ORC is
    listed as an external dependency, instead of relying on Hive's
    ORC version (from Hive3, Hive also lists it as a dependency).
    
    Also, the commit performs a little clean-up on the ParquetHelper
    class, renaming it to ParquetSchemaExtractor and removing outdated
    comments.
    
    To create a table from an ORC file, run:
    CREATE TABLE tablename LIKE ORC '/path/to/file'
    
    Tests:
     * Added analysis tests for primitive and complex types.
     * Added e2e tests for creating tables from ORC files.
    
    Change-Id: I77cd84cda2ed86516937a67eb320fd41e3f1cf2d
    Reviewed-on: http://gerrit.cloudera.org:8080/14811
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               |   1 +
 fe/pom.xml                                         |  26 +++
 .../impala/analysis/CreateTableLikeFileStmt.java   |  14 +-
 .../apache/impala/analysis/OrcSchemaExtractor.java | 200 +++++++++++++++++++++
 ...quetHelper.java => ParquetSchemaExtractor.java} |  38 ++--
 .../org/apache/impala/common/FileSystemUtil.java   |   8 +
 .../org/apache/impala/util/FileAnalysisUtil.java   |  50 ++++++
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  74 ++++++--
 impala-parent/pom.xml                              |   1 +
 shaded-deps/pom.xml                                |   1 +
 .../QueryTest/create-table-like-file-orc.test      |  89 +++++++++
 .../queries/QueryTest/create-table-like-file.test  |  37 ----
 .../queries/QueryTest/create-table-like-table.test |  27 +++
 tests/common/skip.py                               |   3 +-
 tests/metadata/test_ddl.py                         |   7 +
 15 files changed, 495 insertions(+), 81 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9848505..4758da4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -190,6 +190,7 @@ export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
 export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
+export IMPALA_ORC_JAVA_VERSION=1.6.2
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
 # which will be substituted for the CDH platform label in bootstrap_toolchain.py
diff --git a/fe/pom.xml b/fe/pom.xml
index 046851d..d75d1c2 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -306,6 +306,22 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.orc</groupId>
+      <artifactId>orc-core</artifactId>
+      <version>${orc.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </exclusion>
+       <exclusion>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
       <version>${hbase.version}</version>
@@ -713,6 +729,7 @@ under the License.
                     <exclude>org.apache.kudu:*</exclude>
                     <exclude>org.apache.sentry:*</exclude>
                     <exclude>org.apache.parquet:*</exclude>
+                    <exclude>org.apache.orc:*</exclude>
                   </excludes>
                   <includes>
                     <!-- hadoop-yarn-common depends on some Jetty utilities. -->
@@ -725,6 +742,7 @@ under the License.
                     <include>org.apache.kudu:*:${kudu.version}</include>
                     <include>org.apache.sentry:*:${sentry.version}</include>
                     <include>org.apache.parquet:*:${parquet.version}</include>
+                    <include>org.apache.orc:*:${orc.version}</include>
                   </includes>
                 </bannedDependencies>
               </rules>
@@ -946,6 +964,14 @@ under the License.
               <groupId>org.apache.ant</groupId>
               <artifactId>*</artifactId>
             </exclusion>
+            <exclusion>
+              <groupId>orc</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>org.apache.orc</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
index 5053572..2d034b6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
@@ -26,6 +26,7 @@ import org.apache.impala.catalog.HdfsCompression;
 import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.THdfsFileFormat;
 
 
@@ -71,11 +72,18 @@ public class CreateTableLikeFileStmt extends CreateTableStmt {
     schemaLocation_.analyze(analyzer, Privilege.ALL, FsAction.READ);
     switch (schemaFileFormat_) {
       case PARQUET:
-        getColumnDefs().addAll(ParquetHelper.extractParquetSchema(schemaLocation_));
+        getColumnDefs().addAll(ParquetSchemaExtractor.extract(schemaLocation_));
+        break;
+      case ORC:
+        if (MetastoreShim.getMajorVersion() < 3) {
+          throw new AnalysisException("Creating table like ORC file is unsupported for " +
+              "Hive with version < 3");
+        }
+        getColumnDefs().addAll(OrcSchemaExtractor.extract(schemaLocation_));
         break;
       default:
-        throw new AnalysisException("Unsupported file type for schema inference: "
-            + schemaFileFormat_);
+        throw new AnalysisException("Unsupported file type for schema inference: " +
+            schemaFileFormat_);
     }
     super.analyze(analyzer);
   }
diff --git a/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
new file mode 100644
index 0000000..9515ff4
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/OrcSchemaExtractor.java
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.catalog.MapType;
+import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.StructType;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcFile.ReaderOptions;
+import org.apache.orc.Reader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.TypeDescription.Category;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * ORC file. Details of the ORC types:
+ * https://orc.apache.org/docs/types.html
+ */
+public class OrcSchemaExtractor {
+  private final static String ERROR_MSG =
+      "Failed to convert ORC type\n%s\nto an Impala %s type:\n%s\n";
+
+  /**
+   * Validates the path and loads the ORC schema of the file. The ORC schema is also an
+   * ORC type (TypeDescription), represented as a struct.
+   */
+  private static TypeDescription loadOrcSchema(Path pathToFile) throws AnalysisException {
+    FileAnalysisUtil.CheckIfFile(pathToFile);
+    Reader reader = null;
+    try {
+      reader = OrcFile.createReader(pathToFile,
+          new ReaderOptions(FileSystemUtil.getConfiguration()));
+    } catch (IOException e) {
+      // OrcFile.createReader throws IOException in case of any failure, including trying
+      // to open a non-ORC file.
+      throw new AnalysisException("Failed to open file as an ORC file: " + e);
+    }
+    return reader.getSchema();
+  }
+
+  /**
+   * Converts a primitive ORC type to an Impala Type.
+   */
+  static private Type convertPrimitiveOrcType(TypeDescription type) {
+    Category category = type.getCategory();
+    Preconditions.checkState(category.isPrimitive());
+    switch (category) {
+      case BINARY: return Type.STRING;
+      case BOOLEAN: return Type.BOOLEAN;
+      case BYTE: return Type.TINYINT;
+      case CHAR: return ScalarType.createCharType(type.getMaxLength());
+      case DATE: return Type.DATE;
+      case DECIMAL:
+        return ScalarType.createDecimalType(type.getPrecision(), type.getScale());
+      case DOUBLE: return Type.DOUBLE;
+      case FLOAT: return Type.FLOAT;
+      case INT: return Type.INT;
+      case LONG: return Type.BIGINT;
+      case SHORT: return Type.SMALLINT;
+      case STRING: return Type.STRING;
+      case TIMESTAMP: return Type.TIMESTAMP;
+      case VARCHAR: return ScalarType.createVarcharType(type.getMaxLength());
+      default:
+        Preconditions.checkState(false,
+            "Unexpected ORC primitive type: " + category.getName());
+        return null;
+    }
+  }
+
+  /**
+   * Converts an ORC list type to an Impala array Type. An ORC list contains one child,
+   * the TypeDescription of the elements.
+   */
+  private static ArrayType convertArray(TypeDescription listType)
+      throws AnalysisException {
+    Preconditions.checkState(listType.getChildren().size() == 1);
+    return new ArrayType(convertOrcType(listType.getChildren().get(0)));
+  }
+
+  /**
+   * Converts an ORC map type to an Impala map Type. An ORC map contains two children,
+   * the TypeDescriptions for the keys and values.
+   */
+  private static MapType convertMap(TypeDescription mapType) throws AnalysisException {
+    // ORC maps have two children, one for the keys, one for the values.
+    Preconditions.checkState(mapType.getChildren().size() == 2);
+
+    TypeDescription key = mapType.getChildren().get(0);
+    TypeDescription value = mapType.getChildren().get(1);
+
+    if (!key.getCategory().isPrimitive()) {
+      throw new AnalysisException(String.format(ERROR_MSG, mapType.toString(), "MAP",
+          "The key type of the MAP type must be primitive."));
+    }
+
+    return new MapType(convertOrcType(key), convertOrcType(value));
+  }
+
+  /**
+   * Converts an ORC struct type to an Impala struct Type.
+   */
+  private static StructType convertStruct(TypeDescription structType)
+      throws AnalysisException {
+    List<StructField> structFields = new ArrayList<>();
+    List<String> fieldNames = structType.getFieldNames();
+    List<TypeDescription> subTypes = structType.getChildren();
+    Preconditions.checkState(subTypes.size() == fieldNames.size());
+    for (int i = 0; i < subTypes.size(); i++) {
+      StructField f = new StructField(fieldNames.get(i), convertOrcType(subTypes.get(i)));
+      structFields.add(f);
+    }
+    return new StructType(structFields);
+  }
+
+  /**
+   * Converts a non-primitive ORC type to an Impala Type.
+   */
+  static private Type convertComplexOrcType(TypeDescription type)
+      throws AnalysisException {
+    Category category = type.getCategory();
+    Preconditions.checkState(!category.isPrimitive());
+
+    switch (category) {
+      case LIST: return convertArray(type);
+      case MAP: return convertMap(type);
+      case STRUCT: return convertStruct(type);
+      case UNION:
+        throw new AnalysisException(
+            "Unsupported ORC type UNION for field " + category.getName());
+      default:
+        Preconditions.checkState(false,
+            "Unexpected ORC primitive type: " + category.getName());
+        return null;
+    }
+  }
+
+  /**
+   * Converts an ORC type to an Impala Type.
+   */
+  static private Type convertOrcType(TypeDescription type) throws AnalysisException {
+    if (type.getCategory().isPrimitive()) {
+      return convertPrimitiveOrcType(type);
+    } else {
+      return convertComplexOrcType(type);
+    }
+  }
+
+  /**
+   * Parses an ORC file stored in HDFS and returns the corresponding Impala schema.
+   * This fails with an analysis exception if any errors occur reading the file,
+   * parsing the ORC schema, or if the ORC types cannot be represented in Impala.
+   */
+  static public List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+    List<ColumnDef> schema = new ArrayList<>();
+    TypeDescription orcSchema = loadOrcSchema(location.getPath()); // Returns a STRUCT.
+    List<TypeDescription> subTypes = orcSchema.getChildren();
+    List<String> fieldNames = orcSchema.getFieldNames();
+    Preconditions.checkState(subTypes.size() == fieldNames.size());
+    for (int i = 0; i < subTypes.size(); i++) {
+      TypeDescription orcType = subTypes.get(i);
+      Type type = convertOrcType(orcType);
+      Preconditions.checkNotNull(type);
+      String colName = fieldNames.get(i);
+      Map<ColumnDef.Option, Object> option = new HashMap<>();
+      option.put(ColumnDef.Option.COMMENT, "Inferred from ORC file.");
+      schema.add(new ColumnDef(colName, new TypeDef(type), option));
+    }
+    return schema;
+  }
+}
diff --git a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
similarity index 92%
rename from fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
rename to fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
index dc55a34..0e2d15a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ParquetHelper.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ParquetSchemaExtractor.java
@@ -25,14 +25,14 @@ import java.util.List;
 import java.util.Map;
 
 import com.google.common.base.Preconditions;
-import org.apache.hadoop.fs.FileSystem;
+
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.LogicalTypeAnnotation.*;
-
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.MapType;
 import org.apache.impala.catalog.ScalarType;
@@ -41,16 +41,13 @@ import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.util.FileAnalysisUtil;
 
 /**
- * Provides extractParquetSchema() to extract a schema
- * from a parquet file.
- *
- * Because Parquet's Java package changed between Parquet 1.5
- * and 1.9, a second copy of this file, with "org.apache.parquet." replaced
- * with "org.apache.org.apache.parquet." is generated by the build system.
+ * Provides a helper function (extract()) which extracts the Impala schema from a given
+ * Parquet file.
  */
-class ParquetHelper {
+class ParquetSchemaExtractor {
   private final static String ERROR_MSG =
       "Failed to convert Parquet type\n%s\nto an Impala %s type:\n%s\n";
 
@@ -61,21 +58,11 @@ class ParquetHelper {
    */
   private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile)
       throws AnalysisException {
-    try {
-      FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
-      if (!fs.isFile(pathToFile)) {
-        throw new AnalysisException("Cannot infer schema, path is not a file: " +
-                                    pathToFile);
-      }
-    } catch (IOException e) {
-      throw new AnalysisException("Failed to connect to filesystem:" + e);
-    } catch (IllegalArgumentException e) {
-      throw new AnalysisException(e.getMessage());
-    }
+    FileAnalysisUtil.CheckIfFile(pathToFile);
     ParquetMetadata readFooter = null;
     try {
       readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
-          pathToFile);
+          pathToFile, ParquetMetadataConverter.NO_FILTER);
     } catch (FileNotFoundException e) {
       throw new AnalysisException("File not found: " + e);
     } catch (IOException e) {
@@ -95,7 +82,8 @@ class ParquetHelper {
    * Converts a "primitive" Parquet type to an Impala type.
    * A primitive type is a non-nested type with no annotations.
    */
-  private static Type convertPrimitiveParquetType(org.apache.parquet.schema.Type parquetType)
+  private static Type convertPrimitiveParquetType(
+      org.apache.parquet.schema.Type parquetType)
       throws AnalysisException {
     Preconditions.checkState(parquetType.isPrimitive());
     PrimitiveType prim = parquetType.asPrimitiveType();
@@ -339,9 +327,9 @@ class ParquetHelper {
    * This fails with an analysis exception if any errors occur reading the file,
    * parsing the Parquet schema, or if the Parquet types cannot be represented in Impala.
    */
-  static List<ColumnDef> extractParquetSchema(HdfsUri location)
-      throws AnalysisException {
-    org.apache.parquet.schema.MessageType parquetSchema = loadParquetSchema(location.getPath());
+  static List<ColumnDef> extract(HdfsUri location) throws AnalysisException {
+    org.apache.parquet.schema.MessageType parquetSchema =
+        loadParquetSchema(location.getPath());
     List<org.apache.parquet.schema.Type> fields = parquetSchema.getFields();
     List<ColumnDef> schema = new ArrayList<>();
 
diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 7eccd13..f77fd55 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -599,6 +599,14 @@ public class FileSystemUtil {
   }
 
   /**
+   * Returns true if the path 'p' is a file, false if not. Throws if path does not exist.
+   */
+  public static boolean isFile(Path p) throws IOException, FileNotFoundException {
+    FileSystem fs = getFileSystemForPath(p);
+    return fs.getFileStatus(p).isFile();
+  }
+
+  /**
    * Return the path of 'path' relative to the startPath. This may
    * differ from simply the file name in the case of recursive listings.
    */
diff --git a/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
new file mode 100644
index 0000000..d4abb5e
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/util/FileAnalysisUtil.java
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+
+/**
+ * Provides common utilities for ORCSchemeExtractor and ParquetSchemeExtractor.
+ */
+public class FileAnalysisUtil {
+
+  /**
+   * Throws if the given path is not a file.
+   */
+  public static void CheckIfFile(Path pathToFile) throws AnalysisException {
+    try {
+      if (!FileSystemUtil.isFile(pathToFile)) {
+        throw new AnalysisException("Cannot infer schema, path is not a file: " +
+            pathToFile);
+      }
+    } catch (FileNotFoundException e) {
+      throw new AnalysisException("Cannot infer schema, path does not exist: " +
+          pathToFile);
+    } catch (IOException e) {
+      throw new AnalysisException("Failed to connect to filesystem:" + e);
+    } catch (IllegalArgumentException e) {
+      throw new AnalysisException(e.getMessage());
+    }
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index dff57a2..8a680b8 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -54,9 +54,8 @@ import org.apache.impala.thrift.TBackendGflags;
 import org.apache.impala.thrift.TDescribeTableParams;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.MetaStoreUtil;
-import org.junit.AfterClass;
 import org.junit.Assert;
-import org.junit.BeforeClass;
+import org.junit.Assume;
 import org.junit.Test;
 
 import com.google.common.base.Joiner;
@@ -1994,27 +1993,27 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "Database does not exist: database_DNE");
 
     // check invalid paths
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'/test-warehouse'",
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'/test-warehouse'",
         "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
     AnalysisError("create table newtbl_DNE like parquet 'foobar'",
         "URI path must be absolute: foobar");
     AnalysisError("create table newtbl_DNE like parquet '/not/a/file/path'",
-        "Cannot infer schema, path is not a file: "
-        + "hdfs://localhost:20500/not/a/file/path");
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'file:///tmp/foobar'",
-        "Cannot infer schema, path is not a file: file:/tmp/foobar");
+        "Cannot infer schema, path does not exist: " +
+        "hdfs://localhost:20500/not/a/file/path");
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'file:///tmp/foobar'",
+        "Cannot infer schema, path does not exist: file:/tmp/foobar");
 
     // check valid paths with bad file contents
-    AnalysisError("create table database_DNE.newtbl_DNE like parquet "
-        + "'/test-warehouse/zipcode_incomes_rc/000000_0'",
-        "File is not a parquet file: "
-        + "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
+    AnalysisError("create table database_DNE.newtbl_DNE like parquet " +
+        "'/test-warehouse/zipcode_incomes_rc/000000_0'",
+        "File is not a parquet file: " +
+        "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
 
     // this is a decimal file without annotations
-    AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
-        + "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
+    AnalysisError("create table if not exists functional.zipcode_incomes like parquet " +
+        "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
         "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field c1");
 
     // Invalid file format
@@ -2024,6 +2023,51 @@ public class AnalyzeDDLTest extends FrontendTestBase {
 
 
     BackendConfig.INSTANCE.setZOrderSortUnlocked(false);
+
+  }
+
+  @Test
+  public void TestCreateTableLikeFileOrc() throws AnalysisException {
+    Assume.assumeTrue(
+        "Skipping this test; CREATE TABLE LIKE ORC is only supported when running " +
+            "against Hive-3 or greater", TestUtils.getHiveMajorVersion() >= 3);
+
+    AnalysisError("create table database_DNE.newtbl_DNE like ORC " +
+        "'/test-warehouse/schemas/alltypestiny.parquet'",
+        "Failed to open file as an ORC file: org.apache.orc.FileFormatException: " +
+        "Malformed ORC file " +
+        "hdfs://localhost:20500/test-warehouse/schemas/alltypestiny.parquet" +
+        ". Invalid postscript.");
+
+    // Inferring primitive and complex types
+    AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'");
+    AnalyzesOk("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/functional_orc_def.db/complextypes_fileformat/000000_0'");
+
+    // check invalid paths
+    AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+        "'/test-warehouse'",
+        "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
+    AnalysisError("create table newtbl_DNE like ORC 'foobar'",
+        "URI path must be absolute: foobar");
+    AnalysisError("create table newtbl_DNE like ORC '/not/a/file/path'",
+        "Cannot infer schema, path does not exist: " +
+        "hdfs://localhost:20500/not/a/file/path");
+    AnalysisError("create table if not exists functional.zipcode_incomes like ORC " +
+        "'file:///tmp/foobar'",
+        "Cannot infer schema, path does not exist: file:/tmp/foobar");
+  }
+
+  @Test
+  public void TestCreateTableLikeFileOrcWithHive2() throws AnalysisException {
+    // Testing if error is thrown when trying to create table like orc file with Hive-2.
+    Assume.assumeTrue(TestUtils.getHiveMajorVersion() < 3);
+
+    // Inferring primitive and complex types
+    AnalysisError("create table if not exists newtbl_DNE like orc " +
+        "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'",
+        "Creating table like ORC file is unsupported for Hive with version < 3");
   }
 
   @Test
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index cbb6c7b..f8cc667 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -38,6 +38,7 @@ under the License.
     <postgres.jdbc.version>${env.IMPALA_POSTGRES_JDBC_DRIVER_VERSION}</postgres.jdbc.version>
     <sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
     <hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
+    <orc.version>${env.IMPALA_ORC_JAVA_VERSION}</orc.version>
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
     <kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
     <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 5870894..eefd73b 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -94,6 +94,7 @@ the same dependencies
                 <include>org/apache/hadoop/hive/serde2/**</include>
                 <include>org/apache/hive/service/rpc/thrift/**</include>
                 <include>org/apache/hive/common/HiveVersionAnnotation.class</include>
+                <include>org/apache/orc/**</include>
                 <include>com/google/**</include>
               </includes>
             </filter>
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
new file mode 100644
index 0000000..71901ca
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test
@@ -0,0 +1,89 @@
+====
+---- QUERY
+create table $DATABASE.temp_decimal_table_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/decimal_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_decimal_table_orc
+---- RESULTS
+'c1','decimal(10,4)','Inferred from ORC file.'
+'c2','decimal(15,5)','Inferred from ORC file.'
+'c3','decimal(1,1)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.temp_chars_table like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.temp_chars_table
+---- RESULTS
+'cs','char(5)','Inferred from ORC file.'
+'cl','char(140)','Inferred from ORC file.'
+'vc','varchar(32)','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_zipcodes_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/zipcode_incomes_orc_def/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_zipcodes_file_orc
+---- RESULTS
+'id','string','Inferred from ORC file.'
+'zip','string','Inferred from ORC file.'
+'description1','string','Inferred from ORC file.'
+'description2','string','Inferred from ORC file.'
+'income','int','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table $DATABASE.like_alltypestiny_file_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe $DATABASE.like_alltypestiny_file_orc
+---- RESULTS
+'id','int','Inferred from ORC file.'
+'bool_col','boolean','Inferred from ORC file.'
+'tinyint_col','tinyint','Inferred from ORC file.'
+'smallint_col','smallint','Inferred from ORC file.'
+'int_col','int','Inferred from ORC file.'
+'bigint_col','bigint','Inferred from ORC file.'
+'float_col','float','Inferred from ORC file.'
+'double_col','double','Inferred from ORC file.'
+'date_string_col','string','Inferred from ORC file.'
+'string_col','string','Inferred from ORC file.'
+'timestamp_col','timestamp','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
+---- QUERY
+create table allcomplextypes_clone_orc like ORC
+'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/nullable.orc'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone_orc
+---- RESULTS
+'id','bigint','Inferred from ORC file.'
+'int_array','array<int>','Inferred from ORC file.'
+'int_array_array','array<array<int>>','Inferred from ORC file.'
+'int_map','map<string,int>','Inferred from ORC file.'
+'int_map_array','array<map<string,int>>','Inferred from ORC file.'
+'nested_struct','struct<\n  a:int,\n  b:array<int>,\n  c:struct<\n    d:array<array<struct<\n      e:int,\n      f:string\n    >>>\n  >,\n  g:map<string,struct<\n    h:struct<\n      i:array<double>\n    >\n  >>\n>','Inferred from ORC file.'
+---- TYPES
+STRING, STRING, STRING
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
index 7a80602..fd81aee 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file.test
@@ -106,43 +106,6 @@ describe $DATABASE.like_alltypestiny_file
 STRING, STRING, STRING
 ====
 ---- QUERY
-drop table if exists allcomplextypes_clone
----- RESULTS
-'Table does not exist.'
-====
----- QUERY
-create table allcomplextypes_clone like functional.allcomplextypes
-stored as parquet
----- RESULTS
-'Table has been created.'
-====
----- QUERY
-describe allcomplextypes_clone
----- RESULTS
-'id','int',''
-'int_array_col','array<int>',''
-'array_array_col','array<array<int>>',''
-'map_array_col','array<map<string,int>>',''
-'struct_array_col','array<struct<\n  f1:bigint,\n  f2:string\n>>',''
-'int_map_col','map<string,int>',''
-'array_map_col','map<string,array<int>>',''
-'map_map_col','map<string,map<string,int>>',''
-'struct_map_col','map<string,struct<\n  f1:bigint,\n  f2:string\n>>',''
-'int_struct_col','struct<\n  f1:int,\n  f2:int\n>',''
-'complex_struct_col','struct<\n  f1:int,\n  f2:array<int>,\n  f3:map<string,int>\n>',''
-'nested_struct_col','struct<\n  f1:int,\n  f2:struct<\n    f11:bigint,\n    f12:struct<\n      f21:bigint\n    >\n  >\n>',''
-'complex_nested_struct_col','struct<\n  f1:int,\n  f2:array<struct<\n    f11:bigint,\n    f12:map<string,struct<\n      f21:bigint\n    >>\n  >>\n>',''
-'year','int',''
-'month','int',''
----- TYPES
-STRING, STRING, STRING
-====
----- QUERY
-drop table allcomplextypes_clone
----- RESULTS
-'Table has been dropped.'
-====
----- QUERY
 drop table if exists $DATABASE.temp_legacy_table
 ---- RESULTS
 'Table does not exist.'
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
index ee16c37..456f499 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
@@ -251,3 +251,30 @@ describe formatted sortbytest_override;
 ---- TYPES
 STRING,STRING,STRING
 ====
+---- QUERY
+create table allcomplextypes_clone like functional.allcomplextypes
+stored as parquet
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe allcomplextypes_clone
+---- RESULTS
+'id','int',''
+'int_array_col','array<int>',''
+'array_array_col','array<array<int>>',''
+'map_array_col','array<map<string,int>>',''
+'struct_array_col','array<struct<\n  f1:bigint,\n  f2:string\n>>',''
+'int_map_col','map<string,int>',''
+'array_map_col','map<string,array<int>>',''
+'map_map_col','map<string,map<string,int>>',''
+'struct_map_col','map<string,struct<\n  f1:bigint,\n  f2:string\n>>',''
+'int_struct_col','struct<\n  f1:int,\n  f2:int\n>',''
+'complex_struct_col','struct<\n  f1:int,\n  f2:array<int>,\n  f3:map<string,int>\n>',''
+'nested_struct_col','struct<\n  f1:int,\n  f2:struct<\n    f11:bigint,\n    f12:struct<\n      f21:bigint\n    >\n  >\n>',''
+'complex_nested_struct_col','struct<\n  f1:int,\n  f2:array<struct<\n    f11:bigint,\n    f12:map<string,struct<\n      f21:bigint\n    >>\n  >>\n>',''
+'year','int',''
+'month','int',''
+---- TYPES
+STRING, STRING, STRING
+====
\ No newline at end of file
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 2ab4250..3729649 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -227,7 +227,8 @@ class SkipIfHive2:
   create_external_kudu_table = pytest.mark.skipif(HIVE_MAJOR_VERSION == 2,
       reason="Hive 2 does not support creating external.table.purge Kudu tables."
              " See IMPALA-9092 for details.")
-
+  orc = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
+      reason="CREATE TABLE LIKE ORC is only supported with Hive version >= 3")
 
 class SkipIfCatalogV2:
   """Expose decorators as methods so that is_catalog_v2_cluster() can be evaluated lazily
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index dbb82f9..25f7032 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -294,6 +294,13 @@ class TestDdlStatements(TestDdlBase):
     self.run_test_case('QueryTest/create-table-like-file', vector,
         use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
 
+  @SkipIfHive2.orc
+  @UniqueDatabase.parametrize(sync_ddl=True)
+  def test_create_table_like_file_orc(self, vector, unique_database):
+    vector.get_value('exec_option')['abort_on_error'] = False
+    self.run_test_case('QueryTest/create-table-like-file-orc', vector,
+        use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
+
   @UniqueDatabase.parametrize(sync_ddl=True)
   def test_create_table_as_select(self, vector, unique_database):
     vector.get_value('exec_option')['abort_on_error'] = False