You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2022/07/19 04:48:07 UTC

[hive] branch master updated: HIVE-26395: Add support for CREATE TABLE LIKE FILE PARQUET (John Sherman, reviewed by Krisztian Kasa, Aman Sinha)

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 855c0642342 HIVE-26395: Add support for CREATE TABLE LIKE FILE PARQUET (John Sherman, reviewed by Krisztian Kasa, Aman Sinha)
855c0642342 is described below

commit 855c0642342f83a80f30ecf41f1f7f08048d6f80
Author: John Sherman <jf...@cloudera.com>
AuthorDate: Mon Jul 18 21:47:57 2022 -0700

    HIVE-26395: Add support for CREATE TABLE LIKE FILE PARQUET (John Sherman, reviewed by Krisztian Kasa, Aman Sinha)
    
    - Add support for CREATE TABLE LIKE FILE PARQUET
      - Attempts to derive the schema for a new table from an existing parquet file
      - Example:
        CREATE TABLE ctlf_table LIKE FILE PARQUET 's3a://testbucket/files/schema.parq';
    - Add hive.parquet.infer.binary.as configuration option
      - Determines what the unannotated Parquet binary type gets interpreted as
        - either binary or string
        - default is binary
      - This configuration option is helpful since some systems expect binary to be
      interpreted as string.
    - This patch also modifies HCatalog code path and removed a section of code that
      seemed incorrect.
      - It seemed to attempt to force a STORED AS clause but never worked correctly
      - The check would never fail due to the fact every CREATE TABLE AST included
        TOK_LIKETABLE.
      - The code also may have been a remnant of a time when "STORED AS" was required
      for CREATE TABLE statements (before there was a default value)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   3 +
 .../java/org/apache/hadoop/hive/ql/ErrorMsg.java   |   8 +-
 .../cli/SemanticAnalysis/CreateTableHook.java      |  16 +-
 .../apache/hadoop/hive/ql/parse/CreateDDLParser.g  |  21 +-
 .../org/apache/hadoop/hive/ql/parse/HiveParser.g   |   1 +
 .../hive/ql/ddl/table/create/CreateTableDesc.java  |  30 +-
 .../ql/ddl/table/create/CreateTableOperation.java  |  17 +
 .../hadoop/hive/ql/io/SchemaInferenceUtils.java    |  74 +++
 .../hive/ql/io/parquet/serde/ParquetHiveSerDe.java | 202 ++++++-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  34 +-
 .../clientnegative/create_table_like_invalid.q     |   1 +
 .../clientpositive/create_table_like_file.q        |  90 +++
 .../clientnegative/create_table_like_invalid.q.out |   1 +
 .../llap/create_table_like_file.q.out              | 611 +++++++++++++++++++++
 .../apache/hadoop/hive/serde2/SchemaInference.java |  35 ++
 15 files changed, 1108 insertions(+), 36 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 50cfb85ba99..044040f8f11 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2237,6 +2237,9 @@ public class HiveConf extends Configuration {
         "Whether to use former Java date/time APIs to convert between timezones when writing timestamps in " +
         "Parquet files. Once data are written to the file the effect is permanent (also reflected in the metadata)." +
         "Changing the value of this property affects only new data written to the file."),
+    HIVE_PARQUET_INFER_BINARY_AS("hive.parquet.infer.binary.as", "binary", new StringSet("binary", "string"),
+        "This setting controls what the parquet binary type gets inferred as by CREATE TABLE LIKE FILE. This is helpful " +
+        "since some systems specify the parquet schema for strings as binary."),
     HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION("hive.avro.timestamp.skip.conversion", false,
         "Some older Hive implementations (pre-3.1) wrote Avro timestamps in a UTC-normalized" +
         "manner, while from version 3.1 until now Hive wrote time zone agnostic timestamps. " +
diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index d22cc7288de..8f7887d73a9 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -482,7 +482,7 @@ public enum ErrorMsg {
   COMPACTION_REFUSED(10432, "Compaction request for {0}.{1}{2} is refused, details: {3}.", true),
   CBO_IS_REQUIRED(10433,
           "The following functionality requires CBO (" + HiveConf.ConfVars.HIVE_CBO_ENABLED.varname + "): {0}", true),
-
+  CTLF_UNSUPPORTED_FORMAT(10434, "CREATE TABLE LIKE FILE is not supported by the ''{0}'' file format", true),
 
   //========================== 20000 range starts here ========================//
 
@@ -517,6 +517,12 @@ public enum ErrorMsg {
   REPL_EXTERNAL_SERVICE_CONNECTION_ERROR(20017, "Failed to connect to {0} service. Error code {1}.",true),
   CLIENT_POLLING_OPSTATUS_INTERRUPTED(20018, "Interrupted while polling on the operation status", "70100"),
 
+  CTLF_FAILED_INFERENCE(20019, "Failed to infer schema:"),
+  CTLF_CLASS_NOT_FOUND(20020, "Failed to find SerDe class ({0}) for ''{1}''", true),
+  CTLF_MISSING_STORAGE_FORMAT_DESCRIPTOR(20021, "Failed to find StorageFormatDescriptor for file format ''{0}''", true),
+  PARQUET_FOOTER_ERROR(20022, "Failed to read parquet footer:"),
+  PARQUET_UNHANDLED_TYPE(20023, "Unhandled type {0}", true),
+
   // An exception from runtime that will show the full stack to client
   UNRESOLVED_RT_EXCEPTION(29999, "Runtime Error: {0}", "58004", true),
 
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java
index 9b66e6be74a..041a4d48458 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java
@@ -64,16 +64,13 @@ final class CreateTableHook extends HCatSemanticAnalyzerBase {
     // Analyze and create tbl properties object
     int numCh = ast.getChildCount();
 
-    tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast
-      .getChild(0));
-    boolean likeTable = false;
+    tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0));
     StorageFormat format = new StorageFormat(context.getConf());
 
     for (int num = 1; num < numCh; num++) {
       ASTNode child = (ASTNode) ast.getChild(num);
       if (format.fillStorageFormat(child)) {
-        if (org.apache.commons.lang3.StringUtils
-            .isNotEmpty(format.getStorageHandler())) {
+        if (StringUtils.isNotEmpty(format.getStorageHandler())) {
             return ast;
         }
         continue;
@@ -88,10 +85,6 @@ final class CreateTableHook extends HCatSemanticAnalyzerBase {
       case HiveParser.TOK_ALTERTABLE_BUCKETS:
         break;
 
-      case HiveParser.TOK_LIKETABLE:
-        likeTable = true;
-        break;
-
       case HiveParser.TOK_IFNOTEXISTS:
         try {
           List<String> tables = db.getTablesByPattern(tableName);
@@ -121,11 +114,6 @@ final class CreateTableHook extends HCatSemanticAnalyzerBase {
       }
     }
 
-    if (!likeTable && (format.getInputFormat() == null || format.getOutputFormat() == null)) {
-      throw new SemanticException(
-        "STORED AS specification is either incomplete or incorrect.");
-    }
-
     return ast;
   }
 
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/CreateDDLParser.g b/parser/src/java/org/apache/hadoop/hive/ql/parse/CreateDDLParser.g
index 69da7c78ae7..97f04f8dc1f 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/CreateDDLParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/CreateDDLParser.g
@@ -1,9 +1,9 @@
 /**
-   Licensed to the Apache Software Foundation (ASF) under one or more 
-   contributor license agreements.  See the NOTICE file distributed with 
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with 
+   (the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at
 
        http://www.apache.org/licenses/LICENSE-2.0
@@ -43,12 +43,19 @@ catch (RecognitionException e) {
 }
 }
 
+likeTableOrFile
+    : (KW_LIKE KW_FILE) => KW_LIKE KW_FILE
+    | (KW_LIKE KW_FILE format=identifier uri=StringLiteral) -> ^(TOK_LIKEFILE $format $uri)
+    | (KW_LIKE likeName=tableName) -> ^(TOK_LIKETABLE $likeName)
+    ;
+
 //----------------------- Rules for parsing createtable -----------------------------
 createTableStatement
 @init { gParent.pushMsg("create table statement", state); }
 @after { gParent.popMsg(state); }
     : KW_CREATE (temp=KW_TEMPORARY)? (trans=KW_TRANSACTIONAL)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName
-      (  like=KW_LIKE likeName=tableName
+      (  likeTableOrFile
+         createTablePartitionSpec?
          tableRowFormat?
          tableFileFormat?
          tableLocation?
@@ -65,7 +72,7 @@ createTableStatement
          (KW_AS selectStatementWithCTE)?
       )
     -> ^(TOK_CREATETABLE $name $temp? $trans? $ext? ifNotExists?
-         ^(TOK_LIKETABLE $likeName?)
+         likeTableOrFile?
          columnNameTypeOrConstraintList?
          tableComment?
          createTablePartitionSpec?
@@ -78,7 +85,7 @@ createTableStatement
          selectStatementWithCTE?
         )
     | KW_CREATE mgd=KW_MANAGED KW_TABLE ifNotExists? name=tableName
-      (  like=KW_LIKE likeName=tableName
+      (  likeTableOrFile
          tableRowFormat?
          tableFileFormat?
          tableLocation?
@@ -95,7 +102,7 @@ createTableStatement
          (KW_AS selectStatementWithCTE)?
       )
     -> ^(TOK_CREATETABLE $name $mgd ifNotExists?
-         ^(TOK_LIKETABLE $likeName?)
+         likeTableOrFile?
          columnNameTypeOrConstraintList?
          tableComment?
          createTablePartitionSpec?
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 25bd5a259f4..3efadee97df 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -161,6 +161,7 @@ TOK_CREATEDATABASE;
 TOK_CREATEDATACONNECTOR;
 TOK_CREATETABLE;
 TOK_TRUNCATETABLE;
+TOK_LIKEFILE;
 TOK_LIKETABLE;
 TOK_DATACONNECTOR;
 TOK_DATACONNECTORCOMMENT;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
index 297b0857a0b..b484428cc07 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
@@ -127,6 +127,8 @@ public class CreateTableDesc implements DDLDesc, Serializable {
   private transient FileSinkDesc writer;
   private Long replWriteId; // to be used by repl task to get the txn and valid write id list
   private String ownerName = null;
+  private String likeFile = null;
+  private String likeFileFormat = null;
 
   public CreateTableDesc() {
   }
@@ -230,6 +232,22 @@ public class CreateTableDesc implements DDLDesc, Serializable {
     return copy == null ? null : new ArrayList<T>(copy);
   }
 
+  public void setLikeFile(String likeFile) {
+    this.likeFile = likeFile;
+  }
+
+  public void setLikeFileFormat(String likeFileFormat) {
+    this.likeFileFormat = likeFileFormat;
+  }
+
+  public String getLikeFile() {
+    return likeFile;
+  }
+
+  public String getLikeFileFormat() {
+    return likeFileFormat;
+  }
+
   @Explain(displayName = "columns")
   public List<String> getColsString() {
     return Utilities.getFieldSchemaString(getCols());
@@ -268,7 +286,7 @@ public class CreateTableDesc implements DDLDesc, Serializable {
     return cols;
   }
 
-  public void setCols(ArrayList<FieldSchema> cols) {
+  public void setCols(List<FieldSchema> cols) {
     this.cols = cols;
   }
 
@@ -544,13 +562,13 @@ public class CreateTableDesc implements DDLDesc, Serializable {
     this.skewedColValues = skewedColValues;
   }
 
-  public void validate(HiveConf conf)
-      throws SemanticException {
+  public void validate(HiveConf conf) throws SemanticException {
 
     if ((this.getCols() == null) || (this.getCols().size() == 0)) {
-      // for now make sure that serde exists
-      if (Table.hasMetastoreBasedSchema(conf, serName) &&
-              StringUtils.isEmpty(getStorageHandler())) {
+      // if the table has no columns and is a HMS backed SerDe - it should have a storage handler OR
+      // is a CREATE TABLE LIKE FILE statement.
+      if (Table.hasMetastoreBasedSchema(conf, serName) && StringUtils.isEmpty(getStorageHandler())
+          && this.getLikeFile() == null) {
         throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
       }
       return;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java
index 6dacbb0b7ec..c3d0a320942 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.ql.ddl.DDLOperation;
 import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.HdfsUtils;
+import org.apache.hadoop.hive.ql.io.SchemaInferenceUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
@@ -53,8 +55,23 @@ public class CreateTableOperation extends DDLOperation<CreateTableDesc> {
     super(context, desc);
   }
 
+  // Sets the tables columns using the FieldSchema inferred from the SerDe's SchemaInference
+  // implementation. This is used by CREATE TABLE LIKE FILE.
+  private void readSchemaFromFile() throws HiveException {
+    String fileFormat = desc.getLikeFileFormat();
+    String filePath = desc.getLikeFile();
+    List<FieldSchema> fieldSchema = SchemaInferenceUtils.readSchemaFromFile(context.getConf(), fileFormat, filePath);
+    LOG.debug("Inferred field schema for {} file {} was {}", fileFormat, filePath, fieldSchema);
+    desc.setCols(fieldSchema);
+  }
+
   @Override
   public int execute() throws HiveException {
+    // check if schema is being inferred via LIKE FILE
+    if (desc.getLikeFile() != null) {
+      readSchemaFromFile();
+    }
+
     // create the table
     Table tbl = desc.toTable(context.getConf());
     LOG.debug("creating table {} on {}", tbl.getFullyQualifiedName(), tbl.getDataLocation());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SchemaInferenceUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SchemaInferenceUtils.java
new file mode 100644
index 00000000000..5f555b7f63c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SchemaInferenceUtils.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SchemaInference;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import java.util.List;
+
+public class SchemaInferenceUtils {
+  private static Class<AbstractSerDe> getSerde(Configuration conf, String fileFormat) throws HiveException {
+    StorageFormatFactory storageFormatFactory = new StorageFormatFactory();
+    StorageFormatDescriptor descriptor = storageFormatFactory.get(fileFormat);
+    if (descriptor == null) {
+      throw new HiveException(ErrorMsg.CTLF_MISSING_STORAGE_FORMAT_DESCRIPTOR.getErrorCodedMsg(fileFormat));
+    }
+    String serde = descriptor.getSerde();
+    try {
+      return (Class<AbstractSerDe>) conf.getClassByName(serde);
+    } catch (ClassNotFoundException e) {
+      throw new HiveException(ErrorMsg.CTLF_CLASS_NOT_FOUND.getErrorCodedMsg(serde, fileFormat), e);
+    }
+  }
+
+  /**
+   * Determines if a supplied fileFormat supports Schema Inference for CREATE TABLE LIKE FILE.
+   *
+   * @param conf Configuration object used to get class.
+   * @param fileFormat File format to check for Schema Inference support.
+   * @throws HiveException if unable to get SerDe class for fileFormat
+   */
+  public static boolean doesSupportSchemaInference(Configuration conf, String fileFormat) throws HiveException {
+    return SchemaInference.class.isAssignableFrom(getSerde(conf, fileFormat));
+  }
+
+  /**
+   * Returns a List containing FieldSchema as determined by the readSchema method of the provided file format.
+   *
+   * @param conf Hadoop Configuration object used to look up class and provided to the readSchema method.
+   * @param fileFormat File format in which to use SerDe from.
+   * @param filePath Path to the file to read.
+   * @throws HiveException if unable to read the schema
+   */
+  public static List<FieldSchema> readSchemaFromFile(Configuration conf, String fileFormat, String filePath)
+          throws HiveException {
+    Class<AbstractSerDe> asClass = getSerde(conf, fileFormat);
+    SchemaInference sd = (SchemaInference) ReflectionUtils.newInstance(asClass, conf);
+    try {
+      return sd.readSchema(conf, filePath);
+    } catch (SerDeException e) {
+      throw new HiveException(ErrorMsg.CTLF_FAILED_INFERENCE.getErrorCodedMsg(), e);
+    }
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
index ce35b885cc5..881d56cd31f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
@@ -22,11 +22,16 @@ import java.util.Properties;
 import com.google.common.base.Preconditions;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.FieldNode;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.SchemaInference;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeSpec;
 import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord;
@@ -39,7 +44,23 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.ParquetOutputFormat;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A ParquetHiveSerDe for Hive (with the deprecated package mapred). Parquet
@@ -48,7 +69,9 @@ import org.apache.parquet.hadoop.ParquetOutputFormat;
  */
 @SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES,
         ParquetOutputFormat.COMPRESSION})
-public class ParquetHiveSerDe extends AbstractSerDe {
+public class ParquetHiveSerDe extends AbstractSerDe implements SchemaInference {
+  private static final Logger LOG = LoggerFactory.getLogger(ParquetHiveSerDe.class);
+
   public static final Text MAP_KEY = new Text("key");
   public static final Text MAP_VALUE = new Text("value");
   public static final Text MAP = new Text("map");
@@ -57,6 +80,7 @@ public class ParquetHiveSerDe extends AbstractSerDe {
 
   // Map precision to the number bytes needed for binary conversion.
   public static final int PRECISION_TO_BYTE_COUNT[] = new int[38];
+
   static {
     for (int prec = 1; prec <= 38; prec++) {
       // Estimated number of bytes needed.
@@ -82,8 +106,8 @@ public class ParquetHiveSerDe extends AbstractSerDe {
         (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(getColumnNames(), getColumnTypes());
     StructTypeInfo prunedTypeInfo = null;
     if (this.configuration.isPresent()) {
-      String rawPrunedColumnPaths =
-          this.configuration.get().get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
+      Configuration conf = this.configuration.get();
+      String rawPrunedColumnPaths = conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
       if (rawPrunedColumnPaths != null) {
         List<String> prunedColumnPaths = processRawPrunedPaths(rawPrunedColumnPaths);
         prunedTypeInfo = pruneFromPaths(completeTypeInfo, prunedColumnPaths);
@@ -234,4 +258,176 @@ public class ParquetHiveSerDe extends AbstractSerDe {
       return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(newNames, newTypes);
     }
   }
+
+  // ReadSchema interface implementation
+  private String convertGroupType(GroupType group, boolean inferBinaryAsString) throws SerDeException {
+    boolean first = true;
+    StringBuilder sb = new StringBuilder(serdeConstants.STRUCT_TYPE_NAME + "<");
+    for (Type field: group.getFields()) {
+      if (first) {
+        first = false;
+      } else {
+        sb.append(",");
+      }
+      // fieldName:typeName
+      sb.append(field.getName()).append(":").append(convertParquetTypeToFieldType(field, inferBinaryAsString));
+    }
+    sb.append(">");
+    // struct<fieldName1:int, fieldName2:map<string : int>, etc
+    return sb.toString();
+  }
+
+  private String convertPrimitiveType(PrimitiveType primitive, boolean inferBinaryAsString) throws SerDeException {
+    switch (primitive.getPrimitiveTypeName()) {
+      case INT96:
+        return serdeConstants.TIMESTAMP_TYPE_NAME;
+      case INT32:
+        return serdeConstants.INT_TYPE_NAME;
+      case INT64:
+        return serdeConstants.BIGINT_TYPE_NAME;
+      case BOOLEAN:
+        return serdeConstants.BOOLEAN_TYPE_NAME;
+      case FLOAT:
+        return serdeConstants.FLOAT_TYPE_NAME;
+      case DOUBLE:
+        return serdeConstants.DOUBLE_TYPE_NAME;
+      case BINARY:
+        if (inferBinaryAsString) {
+          return serdeConstants.STRING_TYPE_NAME;
+        } else {
+          return serdeConstants.BINARY_TYPE_NAME;
+        }
+      default:
+        throw new SerDeException(ErrorMsg.PARQUET_UNHANDLED_TYPE.getErrorCodedMsg(primitive.getPrimitiveTypeName().name()));
+    }
+  }
+
+  private String convertParquetIntLogicalType(Type parquetType) throws SerDeException {
+    IntLogicalTypeAnnotation intLogicalType = (IntLogicalTypeAnnotation) parquetType.getLogicalTypeAnnotation();
+    PrimitiveType primitiveType = parquetType.asPrimitiveType();
+    // check to see if primitive type handling is implemented
+    switch (primitiveType.getPrimitiveTypeName()) {
+      case INT32:
+      case INT64:
+      break;
+      default:
+      throw new SerDeException(ErrorMsg.PARQUET_UNHANDLED_TYPE.getErrorCodedMsg(intLogicalType.toString()));
+    }
+
+    if (!intLogicalType.isSigned()) {
+      // signed types are not supported
+      throw new SerDeException(ErrorMsg.PARQUET_UNHANDLED_TYPE.getErrorCodedMsg(intLogicalType.toString()));
+    }
+
+    switch (intLogicalType.getBitWidth()) {
+      case 8: return serdeConstants.TINYINT_TYPE_NAME;
+      case 16: return serdeConstants.SMALLINT_TYPE_NAME;
+      case 32: return serdeConstants.INT_TYPE_NAME;
+      case 64: return serdeConstants.BIGINT_TYPE_NAME;
+    }
+
+    throw new SerDeException(ErrorMsg.PARQUET_UNHANDLED_TYPE.getErrorCodedMsg(intLogicalType.toString()));
+  }
+
+  private String createMapType(String keyType, String valueType) {
+    // examples: map<string, int>, map<string : struct<i : int>>
+    return serdeConstants.MAP_TYPE_NAME + "<" + keyType + "," + valueType + ">";
+  }
+
+  private String convertParquetMapLogicalTypeAnnotation(Type parquetType, boolean inferBinaryAsString)
+          throws SerDeException {
+    MapLogicalTypeAnnotation mType = (MapLogicalTypeAnnotation) parquetType.getLogicalTypeAnnotation();
+    GroupType gType = parquetType.asGroupType();
+    Type innerField = gType.getType(0);
+    GroupType innerGroup = innerField.asGroupType();
+    Type key = innerGroup.getType(0);
+    Type value = innerGroup.getType(1);
+    return createMapType(convertParquetTypeToFieldType(key, inferBinaryAsString),
+            convertParquetTypeToFieldType(value, inferBinaryAsString));
+  }
+
+  private String createArrayType(String fieldType) {
+    // examples: array<int>, array<struct<i:int>>, array<map<string : int>>
+    return serdeConstants.LIST_TYPE_NAME + "<" + fieldType + ">";
+  }
+
+  private String convertParquetListLogicalTypeAnnotation(Type parquetType, boolean inferBinaryAsString)
+          throws SerDeException {
+    ListLogicalTypeAnnotation mType = (ListLogicalTypeAnnotation) parquetType.getLogicalTypeAnnotation();
+    GroupType gType = parquetType.asGroupType();
+    Type innerField = gType.getType(0);
+    if (innerField.isPrimitive() || innerField.getOriginalType() != null) {
+      return createArrayType(convertParquetTypeToFieldType(innerField, inferBinaryAsString));
+    }
+
+    GroupType innerGroup = innerField.asGroupType();
+    if (innerGroup.getFieldCount() != 1) {
+      return createArrayType(convertGroupType(innerGroup, inferBinaryAsString));
+    }
+
+    return createArrayType(convertParquetTypeToFieldType(innerGroup.getType(0), inferBinaryAsString));
+  }
+
+  private String createDecimalType(int precision, int scale) {
+    // example: decimal(10, 4)
+    return serdeConstants.DECIMAL_TYPE_NAME + "(" + precision + "," + scale + ")";
+  }
+
+  private String convertLogicalType(Type type, boolean inferBinaryAsString) throws SerDeException {
+    LogicalTypeAnnotation lType = type.getLogicalTypeAnnotation();
+    if (lType instanceof IntLogicalTypeAnnotation) {
+      return convertParquetIntLogicalType(type);
+    } else if (lType instanceof StringLogicalTypeAnnotation) {
+      return serdeConstants.STRING_TYPE_NAME;
+    } else if (lType instanceof DecimalLogicalTypeAnnotation) {
+      DecimalLogicalTypeAnnotation dType = (DecimalLogicalTypeAnnotation) lType;
+      return createDecimalType(dType.getPrecision(), dType.getScale());
+    } else if (lType instanceof MapLogicalTypeAnnotation) {
+      return convertParquetMapLogicalTypeAnnotation(type, inferBinaryAsString);
+    } else if (lType instanceof ListLogicalTypeAnnotation) {
+      return convertParquetListLogicalTypeAnnotation(type, inferBinaryAsString);
+    } else if (lType instanceof DateLogicalTypeAnnotation) {
+      // assuming 32 bit int
+      return serdeConstants.DATE_TYPE_NAME;
+    }
+    throw new SerDeException(ErrorMsg.PARQUET_UNHANDLED_TYPE.getErrorCodedMsg(lType.toString()));
+  }
+
+  private String convertParquetTypeToFieldType(Type type, boolean inferBinaryAsString) throws SerDeException {
+    if (type.getLogicalTypeAnnotation() != null) {
+      return convertLogicalType(type, inferBinaryAsString);
+    } else if (type.isPrimitive()) {
+      return convertPrimitiveType(type.asPrimitiveType(), inferBinaryAsString);
+    }
+    return convertGroupType(type.asGroupType(), inferBinaryAsString);
+  }
+
+  private FieldSchema convertParquetTypeToFieldSchema(Type type, boolean inferBinaryAsString) throws SerDeException {
+    String columnName = type.getName();
+    String typeName = convertParquetTypeToFieldType(type, inferBinaryAsString);
+    return new FieldSchema(columnName, typeName, "Inferred from Parquet file.");
+  }
+
+  public List<FieldSchema> readSchema(Configuration conf, String file) throws SerDeException {
+      FileMetaData metadata;
+      try {
+        HadoopInputFile inputFile = HadoopInputFile.fromPath(new Path(file), conf);
+        ParquetFileReader reader = ParquetFileReader.open(inputFile);
+        metadata = reader.getFileMetaData();
+      } catch (Exception e) {
+        throw new SerDeException(ErrorMsg.PARQUET_FOOTER_ERROR.getErrorCodedMsg(), e);
+      }
+
+      MessageType msg = metadata.getSchema();
+      List<FieldSchema> schema = new ArrayList<>();
+      String inferBinaryAsStringValue = conf.get(HiveConf.ConfVars.HIVE_PARQUET_INFER_BINARY_AS.varname);
+      boolean inferBinaryAsString = "string".equalsIgnoreCase(inferBinaryAsStringValue);
+
+      for (Type field: msg.getFields()) {
+        FieldSchema fieldSchema = convertParquetTypeToFieldSchema(field, inferBinaryAsString);
+        schema.add(fieldSchema);
+        LOG.debug("Inferred field schema {}", fieldSchema);
+      }
+      return schema;
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 48e0cf9b2ed..485089e4ad3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -164,6 +164,7 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.SchemaInferenceUtils;
 import org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
@@ -13477,6 +13478,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
     return false;
   }
+
   /**
    * Analyze the create table command. If it is a regular create-table or
    * create-table-like statements, we create a DDLWork and return true. If it is
@@ -13516,7 +13518,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     final int CREATE_TABLE = 0; // regular CREATE TABLE
     final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT)
     final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS)
-    final int ctt = 3; // CREATE TRANSACTIONAL TABLE
+    final int CTT = 3; // CREATE TRANSACTIONAL TABLE
+    final int CTLF = 4; // CREATE TABLE LIKE FILE
     int command_type = CREATE_TABLE;
     List<String> skewedColNames = new ArrayList<String>();
     List<List<String>> skewedValues = new ArrayList<List<String>>();
@@ -13524,6 +13527,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     boolean storedAsDirs = false;
     boolean isUserStorageFormat = false;
     boolean partitionTransformSpecExists = false;
+    String likeFile = null;
+    String likeFileFormat = null;
 
     RowFormatParams rowFormatParams = new RowFormatParams();
     StorageFormat storageFormat = new StorageFormat(conf);
@@ -13569,7 +13574,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         break;
       case HiveParser.KW_TRANSACTIONAL:
         isTransactional = true;
-        command_type = ctt;
+        command_type = CTT;
+        break;
+      case HiveParser.TOK_LIKEFILE:
+        if (cols.size() != 0) {
+          throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE
+              .getMsg());
+        }
+        likeFileFormat = getUnescapedName((ASTNode) child.getChild(0));
+        likeFile = getUnescapedName((ASTNode) child.getChild(1));
+        command_type = CTLF;
         break;
       case HiveParser.TOK_LIKETABLE:
         if (child.getChildCount() > 0) {
@@ -13719,7 +13733,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       }
     }
 
-    if (command_type == CREATE_TABLE || command_type == CTLT || command_type == ctt) {
+    if (command_type == CREATE_TABLE || command_type == CTLT || command_type == CTT || command_type == CTLF) {
       queryState.setCommandType(HiveOperation.CREATETABLE);
     } else if (command_type == CTAS) {
       queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT);
@@ -13785,7 +13799,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       }
     }
     switch (command_type) {
-
+    case CTLF:
+      try {
+        if (!SchemaInferenceUtils.doesSupportSchemaInference(conf, likeFileFormat)) {
+          throw new SemanticException(ErrorMsg.CTLF_UNSUPPORTED_FORMAT.getErrorCodedMsg(likeFileFormat));
+        }
+      } catch (HiveException e) {
+        throw new SemanticException(e.getMessage(), e);
+      }
+    // fall through
     case CREATE_TABLE: // REGULAR CREATE TABLE DDL
       if (!CollectionUtils.isEmpty(partColNames)) {
         throw new SemanticException(
@@ -13810,6 +13832,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
                                                        checkConstraints);
       crtTblDesc.setStoredAsSubDirectories(storedAsDirs);
       crtTblDesc.setNullFormat(rowFormatParams.nullFormat);
+      crtTblDesc.setLikeFile(likeFile);
+      crtTblDesc.setLikeFileFormat(likeFileFormat);
 
       crtTblDesc.validate(conf);
       // outputs is empty, which means this create table happens in the current
@@ -13832,7 +13856,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
             "Query state attached to Session state must be not null. Table location cannot be saved.");
       }
       break;
-    case ctt: // CREATE TRANSACTIONAL TABLE
+    case CTT: // CREATE TRANSACTIONAL TABLE
       if (isExt && !isDefaultTableTypeChanged) {
         throw new SemanticException(
             qualifiedTabName.getTable() + " cannot be declared transactional because it's an external table");
diff --git a/ql/src/test/queries/clientnegative/create_table_like_invalid.q b/ql/src/test/queries/clientnegative/create_table_like_invalid.q
new file mode 100644
index 00000000000..ac91a8cbaaf
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/create_table_like_invalid.q
@@ -0,0 +1 @@
+CREATE TABLE test LIKE FILE AVRO 'hdfs://madeuppath';
diff --git a/ql/src/test/queries/clientpositive/create_table_like_file.q b/ql/src/test/queries/clientpositive/create_table_like_file.q
new file mode 100644
index 00000000000..980c7bb2a2a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/create_table_like_file.q
@@ -0,0 +1,90 @@
+-- all primitive types
+-- timestamp_w_tz TIMESTAMP WITH LOCAL TIME ZONE is not supported by hive's parquet implementation
+CREATE EXTERNAL TABLE test_all_types(tinyint_type TINYINT, smallint_type SMALLINT, bigint_type BIGINT, int_type INT, float_type FLOAT, double_type double, decimal_type DECIMAL(4,2), timestamp_type TIMESTAMP, date_type DATE, string_type STRING, varchar_type VARCHAR(100), char_type CHAR(34), boolean_type BOOLEAN, binary_type BINARY) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_all_types';
+-- insert two rows (the other tables only have 1 row)
+INSERT INTO test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
+SELECT * FROM test_all_types;
+DESCRIBE test_all_types;
+-- CREATE A LIKE table
+CREATE TABLE like_test_all_types LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0';
+INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
+SELECT * FROM like_test_all_types;
+DESCRIBE like_test_all_types;
+DROP TABLE test_all_types;
+DROP TABLE like_test_all_types;
+
+-- test hive.parquet.infer.binary.as string
+SET hive.parquet.infer.binary.as = String;
+CREATE TABLE like_test_all_types LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0';
+INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
+SELECT * FROM like_test_all_types;
+DESCRIBE like_test_all_types;
+DROP TABLE test_all_types;
+DROP TABLE like_test_all_types;
+SET hive.parquet.infer.binary.as = binary;
+
+-- complex types (struct, array, map, union)
+-- union type is not supported by PARQUET in hive
+-- array
+CREATE EXTERNAL TABLE test_array(str_array array<string>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_array';
+DESCRIBE test_array;
+INSERT INTO test_array SELECT array("bob", "sue");
+SELECT * FROM test_array;
+CREATE TABLE like_test_array LIKE FILE PARQUET '${system:test.tmp.dir}/test_array/000000_0';
+DESCRIBE like_test_array;
+INSERT INTO like_test_array SELECT array("bob", "sue");
+SELECT * FROM like_test_array;
+DROP TABLE like_test_array;
+
+-- map
+CREATE EXTERNAL TABLE test_map(simple_map map<int, string>, map_to_struct map<string, struct<i : int>>, map_to_map map<date,map<int, string>>, map_to_array map<binary, array<array<int>>>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_map';
+DESCRIBE test_map;
+INSERT INTO test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)));
+SELECT * FROM test_map;
+CREATE TABLE like_test_map LIKE FILE PARQUET '${system:test.tmp.dir}/test_map/000000_0';
+DESCRIBE like_test_map;
+INSERT INTO like_test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)));
+SELECT * FROM like_test_map;
+DROP TABLE like_test_map;
+
+-- struct
+CREATE EXTERNAL TABLE test_complex_struct(struct_type struct<tinyint_type : tinyint, smallint_type : smallint, bigint_type : bigint, int_type : int, float_type : float, double_type : double, decimal_type : DECIMAL(4,2), timestamp_type : TIMESTAMP, date_type : DATE, string_type : STRING, varchar_type : VARCHAR(100), char_type : CHAR(34), boolean_type : boolean, binary_type : binary>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_complex_struct';
+DESCRIBE test_complex_struct;
+-- disable CBO due to the fact that type conversion causes CBO failure which causes the test to fail
+-- non-CBO path works (HIVE-26398)
+SET hive.cbo.enable=false;
+INSERT INTO test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", cast('varchar1' as varchar(100)), "char_type", cast('char' as  [...]
+SET hive.cbo.enable=true;
+SELECT * FROM test_complex_struct;
+-- varchar/char get created as string due to the fact that the parquet file has no information to derive this types and they are stored as string
+CREATE TABLE like_test_complex_struct LIKE FILE PARQUET '${system:test.tmp.dir}/test_complex_struct/000000_0';
+DESCRIBE like_test_complex_struct;
+-- disable CBO due to the fact that type conversion causes CBO failure which causes the test to fail
+-- non-CBO path works (HIVE-26398)
+SET hive.cbo.enable=false;
+INSERT INTO like_test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", 'varchar1', "char_type", 'char', "boolean_type", true, "b [...]
+SET hive.cbo.enable=true;
+SELECT * FROM like_test_complex_struct;
+DROP TABLE like_test_complex_struct;
+
+-- test complex types that contain other complex types
+CREATE EXTERNAL TABLE test_complex_complex(struct_type struct<i : int, s : string, m : map<string, array<int>>, struct_i : struct<str : string>>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_complex_complex';
+DESCRIBE test_complex_complex;
+INSERT INTO test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"));
+SELECT * FROM test_complex_complex;
+CREATE TABLE like_test_complex_complex LIKE FILE PARQUET '${system:test.tmp.dir}/test_complex_complex/000000_0';
+DESCRIBE like_test_complex_complex;
+INSERT INTO like_test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"));
+SELECT * FROM like_test_complex_complex;
+DROP TABLE like_test_complex_complex;
+
+-- test adding partitioning to the destination table
+CREATE TABLE like_test_partitioning LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0' PARTITIONED BY (year STRING, month STRING);
+DESCRIBE like_test_partitioning;
+INSERT INTO like_test_partitioning PARTITION (year='1984', month='1') VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
+SELECT * FROM like_test_partitioning;
+DROP TABLE like_test_partitioning;
diff --git a/ql/src/test/results/clientnegative/create_table_like_invalid.q.out b/ql/src/test/results/clientnegative/create_table_like_invalid.q.out
new file mode 100644
index 00000000000..987da391e7c
--- /dev/null
+++ b/ql/src/test/results/clientnegative/create_table_like_invalid.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10434]: CREATE TABLE LIKE FILE is not supported by the 'AVRO' file format
diff --git a/ql/src/test/results/clientpositive/llap/create_table_like_file.q.out b/ql/src/test/results/clientpositive/llap/create_table_like_file.q.out
new file mode 100644
index 00000000000..5899d8fda4b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/create_table_like_file.q.out
@@ -0,0 +1,611 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_all_types
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_all_types
+PREHOOK: query: INSERT INTO test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_all_types
+POSTHOOK: query: INSERT INTO test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_all_types
+POSTHOOK: Lineage: test_all_types.bigint_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.binary_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.boolean_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.char_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.date_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.decimal_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.double_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.float_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.int_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.smallint_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.string_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.timestamp_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.tinyint_type SCRIPT []
+POSTHOOK: Lineage: test_all_types.varchar_type SCRIPT []
+PREHOOK: query: SELECT * FROM test_all_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM test_all_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_all_types
+#### A masked pattern was here ####
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char                              	true	binary_maybe
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char                              	true	binary_maybe
+PREHOOK: query: DESCRIBE test_all_types
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_all_types
+POSTHOOK: query: DESCRIBE test_all_types
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_all_types
+tinyint_type        	tinyint             	                    
+smallint_type       	smallint            	                    
+bigint_type         	bigint              	                    
+int_type            	int                 	                    
+float_type          	float               	                    
+double_type         	double              	                    
+decimal_type        	decimal(4,2)        	                    
+timestamp_type      	timestamp           	                    
+date_type           	date                	                    
+string_type         	string              	                    
+varchar_type        	varchar(100)        	                    
+char_type           	char(34)            	                    
+boolean_type        	boolean             	                    
+binary_type         	binary              	                    
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_all_types
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_all_types
+PREHOOK: query: INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_all_types
+POSTHOOK: query: INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_all_types
+POSTHOOK: Lineage: like_test_all_types.bigint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.binary_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.boolean_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.char_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.date_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.decimal_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.double_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.float_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.int_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.smallint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.string_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.timestamp_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.tinyint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.varchar_type SCRIPT []
+PREHOOK: query: SELECT * FROM like_test_all_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_all_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_all_types
+#### A masked pattern was here ####
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe
+PREHOOK: query: DESCRIBE like_test_all_types
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_all_types
+POSTHOOK: query: DESCRIBE like_test_all_types
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_all_types
+tinyint_type        	tinyint             	Inferred from Parquet file.
+smallint_type       	smallint            	Inferred from Parquet file.
+bigint_type         	bigint              	Inferred from Parquet file.
+int_type            	int                 	Inferred from Parquet file.
+float_type          	float               	Inferred from Parquet file.
+double_type         	double              	Inferred from Parquet file.
+decimal_type        	decimal(4,2)        	Inferred from Parquet file.
+timestamp_type      	timestamp           	Inferred from Parquet file.
+date_type           	date                	Inferred from Parquet file.
+string_type         	string              	Inferred from Parquet file.
+varchar_type        	string              	Inferred from Parquet file.
+char_type           	string              	Inferred from Parquet file.
+boolean_type        	boolean             	Inferred from Parquet file.
+binary_type         	binary              	Inferred from Parquet file.
+PREHOOK: query: DROP TABLE test_all_types
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_all_types
+PREHOOK: Output: default@test_all_types
+POSTHOOK: query: DROP TABLE test_all_types
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_all_types
+POSTHOOK: Output: default@test_all_types
+PREHOOK: query: DROP TABLE like_test_all_types
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_all_types
+PREHOOK: Output: default@like_test_all_types
+POSTHOOK: query: DROP TABLE like_test_all_types
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_all_types
+POSTHOOK: Output: default@like_test_all_types
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_all_types
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_all_types
+PREHOOK: query: INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_all_types
+POSTHOOK: query: INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_all_types
+POSTHOOK: Lineage: like_test_all_types.bigint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.binary_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.boolean_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.char_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.date_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.decimal_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.double_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.float_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.int_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.smallint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.string_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.timestamp_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.tinyint_type SCRIPT []
+POSTHOOK: Lineage: like_test_all_types.varchar_type SCRIPT []
+PREHOOK: query: SELECT * FROM like_test_all_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_all_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_all_types
+#### A masked pattern was here ####
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe
+PREHOOK: query: DESCRIBE like_test_all_types
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_all_types
+POSTHOOK: query: DESCRIBE like_test_all_types
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_all_types
+tinyint_type        	tinyint             	Inferred from Parquet file.
+smallint_type       	smallint            	Inferred from Parquet file.
+bigint_type         	bigint              	Inferred from Parquet file.
+int_type            	int                 	Inferred from Parquet file.
+float_type          	float               	Inferred from Parquet file.
+double_type         	double              	Inferred from Parquet file.
+decimal_type        	decimal(4,2)        	Inferred from Parquet file.
+timestamp_type      	timestamp           	Inferred from Parquet file.
+date_type           	date                	Inferred from Parquet file.
+string_type         	string              	Inferred from Parquet file.
+varchar_type        	string              	Inferred from Parquet file.
+char_type           	string              	Inferred from Parquet file.
+boolean_type        	boolean             	Inferred from Parquet file.
+binary_type         	string              	Inferred from Parquet file.
+PREHOOK: query: DROP TABLE test_all_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE test_all_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE like_test_all_types
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_all_types
+PREHOOK: Output: default@like_test_all_types
+POSTHOOK: query: DROP TABLE like_test_all_types
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_all_types
+POSTHOOK: Output: default@like_test_all_types
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_array
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_array
+PREHOOK: query: DESCRIBE test_array
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_array
+POSTHOOK: query: DESCRIBE test_array
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_array
+str_array           	array<string>       	                    
+PREHOOK: query: INSERT INTO test_array SELECT array("bob", "sue")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_array
+POSTHOOK: query: INSERT INTO test_array SELECT array("bob", "sue")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_array
+POSTHOOK: Lineage: test_array.str_array EXPRESSION []
+PREHOOK: query: SELECT * FROM test_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_array
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM test_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_array
+#### A masked pattern was here ####
+["bob","sue"]
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_array
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_array
+PREHOOK: query: DESCRIBE like_test_array
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_array
+POSTHOOK: query: DESCRIBE like_test_array
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_array
+str_array           	array<string>       	Inferred from Parquet file.
+PREHOOK: query: INSERT INTO like_test_array SELECT array("bob", "sue")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_array
+POSTHOOK: query: INSERT INTO like_test_array SELECT array("bob", "sue")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_array
+POSTHOOK: Lineage: like_test_array.str_array EXPRESSION []
+PREHOOK: query: SELECT * FROM like_test_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_array
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_array
+#### A masked pattern was here ####
+["bob","sue"]
+PREHOOK: query: DROP TABLE like_test_array
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_array
+PREHOOK: Output: default@like_test_array
+POSTHOOK: query: DROP TABLE like_test_array
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_array
+POSTHOOK: Output: default@like_test_array
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_map
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_map
+PREHOOK: query: DESCRIBE test_map
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_map
+POSTHOOK: query: DESCRIBE test_map
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_map
+simple_map          	map<int,string>     	                    
+map_to_struct       	map<string,struct<i:int>>	                    
+map_to_map          	map<date,map<int,string>>	                    
+map_to_array        	map<binary,array<array<int>>>	                    
+PREHOOK: query: INSERT INTO test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_map
+POSTHOOK: query: INSERT INTO test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_map
+POSTHOOK: Lineage: test_map.map_to_array EXPRESSION []
+POSTHOOK: Lineage: test_map.map_to_map EXPRESSION []
+POSTHOOK: Lineage: test_map.map_to_struct EXPRESSION []
+POSTHOOK: Lineage: test_map.simple_map EXPRESSION []
+PREHOOK: query: SELECT * FROM test_map
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_map
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM test_map
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_map
+#### A masked pattern was here ####
+{10:"foo"}	{"bar":{"i":99}}	{"1984-01-01":{10:"goodbye"}}	{binary:[[1,2,3]]}
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_map
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_map
+PREHOOK: query: DESCRIBE like_test_map
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_map
+POSTHOOK: query: DESCRIBE like_test_map
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_map
+simple_map          	map<int,string>     	Inferred from Parquet file.
+map_to_struct       	map<string,struct<i:int>>	Inferred from Parquet file.
+map_to_map          	map<date,map<int,string>>	Inferred from Parquet file.
+map_to_array        	map<binary,array<array<int>>>	Inferred from Parquet file.
+PREHOOK: query: INSERT INTO like_test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_map
+POSTHOOK: query: INSERT INTO like_test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_map
+POSTHOOK: Lineage: like_test_map.map_to_array EXPRESSION []
+POSTHOOK: Lineage: like_test_map.map_to_map EXPRESSION []
+POSTHOOK: Lineage: like_test_map.map_to_struct EXPRESSION []
+POSTHOOK: Lineage: like_test_map.simple_map EXPRESSION []
+PREHOOK: query: SELECT * FROM like_test_map
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_map
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_map
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_map
+#### A masked pattern was here ####
+{10:"foo"}	{"bar":{"i":99}}	{"1984-01-01":{10:"goodbye"}}	{binary:[[1,2,3]]}
+PREHOOK: query: DROP TABLE like_test_map
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_map
+PREHOOK: Output: default@like_test_map
+POSTHOOK: query: DROP TABLE like_test_map
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_map
+POSTHOOK: Output: default@like_test_map
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_complex_struct
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_complex_struct
+PREHOOK: query: DESCRIBE test_complex_struct
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_complex_struct
+POSTHOOK: query: DESCRIBE test_complex_struct
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_complex_struct
+struct_type         	struct<tinyint_type:tinyint,smallint_type:smallint,bigint_type:bigint,int_type:int,float_type:float,double_type:double,decimal_type:decimal(4,2),timestamp_type:timestamp,date_type:date,string_type:string,varchar_type:varchar(100),char_type:char(34),boolean_type:boolean,binary_type:binary>	                    
+PREHOOK: query: INSERT INTO test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", cast('varchar1' as varchar(100)), "char_type", [...]
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_complex_struct
+POSTHOOK: query: INSERT INTO test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", cast('varchar1' as varchar(100)), "char_type" [...]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_complex_struct
+POSTHOOK: Lineage: test_complex_struct.struct_type EXPRESSION []
+PREHOOK: query: SELECT * FROM test_complex_struct
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_complex_struct
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM test_complex_struct
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_complex_struct
+#### A masked pattern was here ####
+{"tinyint_type":1,"smallint_type":2,"bigint_type":3,"int_type":4,"float_type":2.2,"double_type":2.2,"decimal_type":20.22,"timestamp_type":"2022-06-30 10:20:30","date_type":"2020-04-23","string_type":"str1","varchar_type":"varchar1","char_type":"char                              ","boolean_type":true,"binary_type":binary_maybe}
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_complex_struct
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_complex_struct
+PREHOOK: query: DESCRIBE like_test_complex_struct
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_complex_struct
+POSTHOOK: query: DESCRIBE like_test_complex_struct
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_complex_struct
+struct_type         	struct<tinyint_type:tinyint,smallint_type:smallint,bigint_type:bigint,int_type:int,float_type:float,double_type:double,decimal_type:decimal(4,2),timestamp_type:timestamp,date_type:date,string_type:string,varchar_type:string,char_type:string,boolean_type:boolean,binary_type:binary>	Inferred from Parquet file.
+PREHOOK: query: INSERT INTO like_test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", 'varchar1', "char_type", 'char', "boolean [...]
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_complex_struct
+POSTHOOK: query: INSERT INTO like_test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", 'varchar1', "char_type", 'char', "boolea [...]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_complex_struct
+POSTHOOK: Lineage: like_test_complex_struct.struct_type EXPRESSION []
+PREHOOK: query: SELECT * FROM like_test_complex_struct
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_complex_struct
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_complex_struct
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_complex_struct
+#### A masked pattern was here ####
+{"tinyint_type":1,"smallint_type":2,"bigint_type":3,"int_type":4,"float_type":2.2,"double_type":2.2,"decimal_type":20.22,"timestamp_type":"2022-06-30 10:20:30","date_type":"2020-04-23","string_type":"str1","varchar_type":"varchar1","char_type":"char","boolean_type":true,"binary_type":binary_maybe}
+PREHOOK: query: DROP TABLE like_test_complex_struct
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_complex_struct
+PREHOOK: Output: default@like_test_complex_struct
+POSTHOOK: query: DROP TABLE like_test_complex_struct
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_complex_struct
+POSTHOOK: Output: default@like_test_complex_struct
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_complex_complex
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_complex_complex
+PREHOOK: query: DESCRIBE test_complex_complex
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_complex_complex
+POSTHOOK: query: DESCRIBE test_complex_complex
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_complex_complex
+struct_type         	struct<i:int,s:string,m:map<string,array<int>>,struct_i:struct<str:string>>	                    
+PREHOOK: query: INSERT INTO test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_complex_complex
+POSTHOOK: query: INSERT INTO test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_complex_complex
+POSTHOOK: Lineage: test_complex_complex.struct_type EXPRESSION []
+PREHOOK: query: SELECT * FROM test_complex_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_complex_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM test_complex_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_complex_complex
+#### A masked pattern was here ####
+{"i":10,"s":"hello, world","m":{"arr":[1,2,3,4]},"struct_i":{"str":"test_str"}}
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_complex_complex
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_complex_complex
+PREHOOK: query: DESCRIBE like_test_complex_complex
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_complex_complex
+POSTHOOK: query: DESCRIBE like_test_complex_complex
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_complex_complex
+struct_type         	struct<i:int,s:string,m:map<string,array<int>>,struct_i:struct<str:string>>	Inferred from Parquet file.
+PREHOOK: query: INSERT INTO like_test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_complex_complex
+POSTHOOK: query: INSERT INTO like_test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_complex_complex
+POSTHOOK: Lineage: like_test_complex_complex.struct_type EXPRESSION []
+PREHOOK: query: SELECT * FROM like_test_complex_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_complex_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_complex_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_complex_complex
+#### A masked pattern was here ####
+{"i":10,"s":"hello, world","m":{"arr":[1,2,3,4]},"struct_i":{"str":"test_str"}}
+PREHOOK: query: DROP TABLE like_test_complex_complex
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_complex_complex
+PREHOOK: Output: default@like_test_complex_complex
+POSTHOOK: query: DROP TABLE like_test_complex_complex
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_complex_complex
+POSTHOOK: Output: default@like_test_complex_complex
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@like_test_partitioning
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@like_test_partitioning
+PREHOOK: query: DESCRIBE like_test_partitioning
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@like_test_partitioning
+POSTHOOK: query: DESCRIBE like_test_partitioning
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@like_test_partitioning
+tinyint_type        	tinyint             	Inferred from Parquet file.
+smallint_type       	smallint            	Inferred from Parquet file.
+bigint_type         	bigint              	Inferred from Parquet file.
+int_type            	int                 	Inferred from Parquet file.
+float_type          	float               	Inferred from Parquet file.
+double_type         	double              	Inferred from Parquet file.
+decimal_type        	decimal(4,2)        	Inferred from Parquet file.
+timestamp_type      	timestamp           	Inferred from Parquet file.
+date_type           	date                	Inferred from Parquet file.
+string_type         	string              	Inferred from Parquet file.
+varchar_type        	string              	Inferred from Parquet file.
+char_type           	string              	Inferred from Parquet file.
+boolean_type        	boolean             	Inferred from Parquet file.
+binary_type         	binary              	Inferred from Parquet file.
+year                	string              	                    
+month               	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+year                	string              	                    
+month               	string              	                    
+PREHOOK: query: INSERT INTO like_test_partitioning PARTITION (year='1984', month='1') VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@like_test_partitioning@year=1984/month=1
+POSTHOOK: query: INSERT INTO like_test_partitioning PARTITION (year='1984', month='1') VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
+       (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@like_test_partitioning@year=1984/month=1
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).bigint_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).binary_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).boolean_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).char_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).date_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).decimal_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).double_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).float_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).int_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).smallint_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).string_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).timestamp_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).tinyint_type SCRIPT []
+POSTHOOK: Lineage: like_test_partitioning PARTITION(year=1984,month=1).varchar_type SCRIPT []
+PREHOOK: query: SELECT * FROM like_test_partitioning
+PREHOOK: type: QUERY
+PREHOOK: Input: default@like_test_partitioning
+PREHOOK: Input: default@like_test_partitioning@year=1984/month=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM like_test_partitioning
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@like_test_partitioning
+POSTHOOK: Input: default@like_test_partitioning@year=1984/month=1
+#### A masked pattern was here ####
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe	1984	1
+1	2	3	4	2.2	2.2	20.20	2022-06-30 10:20:30	2020-04-23	str1	varchar1	char	true	binary_maybe	1984	1
+PREHOOK: query: DROP TABLE like_test_partitioning
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@like_test_partitioning
+PREHOOK: Output: default@like_test_partitioning
+POSTHOOK: query: DROP TABLE like_test_partitioning
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@like_test_partitioning
+POSTHOOK: Output: default@like_test_partitioning
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SchemaInference.java b/serde/src/java/org/apache/hadoop/hive/serde2/SchemaInference.java
new file mode 100644
index 00000000000..4b41cc1204e
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/SchemaInference.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2;
+
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+
+public interface SchemaInference {
+  /**
+   * Infer Hive compatible schema from provided file. The purpose of this method is to optionally
+   * allow SerDes to implement schema inference for CREATE TABLE LIKE FILE support.
+   *
+   * @param file Fully qualified path to file to infer schema from (hadoop compatible URI + filename)
+   * @return List of FieldSchema that was derived from the provided file
+   * @throws SerDeException
+   */
+   List<FieldSchema> readSchema(Configuration conf, String file) throws SerDeException;
+}