You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/05/23 23:51:47 UTC

hive git commit: HIVE-12643 : For self describing InputFormat don't replicate schema information in partitions (Ashutosh Chauhan via Matt McCline)

Repository: hive
Updated Branches:
  refs/heads/master 7fb4b1fed -> 2ed47838d


HIVE-12643 : For self describing InputFormat don't replicate schema information in partitions (Ashutosh Chauhan via Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ed47838
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ed47838
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ed47838

Branch: refs/heads/master
Commit: 2ed47838dc6cfee3fb6f4470427e50a4495b2eba
Parents: 7fb4b1f
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Dec 9 17:26:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon May 23 16:51:12 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/MetaStoreUtils.java   | 68 ++++++++++++--------
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  2 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |  6 --
 .../hive/ql/optimizer/physical/Vectorizer.java  |  6 +-
 .../hadoop/hive/ql/plan/PartitionDesc.java      | 14 +++-
 .../clientpositive/quotedid_tblproperty.q.out   |  4 +-
 .../tez/vector_partition_diff_num_cols.q.out    |  2 +
 .../vector_partition_diff_num_cols.q.out        |  2 +
 8 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 6bc882a..84b24ab 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -1012,8 +1012,38 @@ public class MetaStoreUtils {
     return schema;
   }
 
-  public static Properties getSchema(
-      org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+  public static Properties addCols(Properties schema, List<FieldSchema> cols) {
+
+    StringBuilder colNameBuf = new StringBuilder();
+    StringBuilder colTypeBuf = new StringBuilder();
+    StringBuilder colComment = new StringBuilder();
+
+    boolean first = true;
+    for (FieldSchema col : cols) {
+      if (!first) {
+        colNameBuf.append(",");
+        colTypeBuf.append(":");
+        colComment.append('\0');
+      }
+      colNameBuf.append(col.getName());
+      colTypeBuf.append(col.getType());
+      colComment.append((null != col.getComment()) ? col.getComment() : "");
+      first = false;
+    }
+    schema.setProperty(
+        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
+        colNameBuf.toString());
+    String colTypes = colTypeBuf.toString();
+    schema.setProperty(
+        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
+        colTypes);
+    schema.setProperty("columns.comments", colComment.toString());
+
+    return schema;
+
+  }
+
+  public static Properties getSchemaWithoutCols(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
       org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
       Map<String, String> parameters, String databaseName, String tableName,
       List<FieldSchema> partitionKeys) {
@@ -1063,30 +1093,7 @@ public class MetaStoreUtils {
                 .getSerdeInfo().getSerializationLib());
       }
     }
-    StringBuilder colNameBuf = new StringBuilder();
-    StringBuilder colTypeBuf = new StringBuilder();
-    StringBuilder colComment = new StringBuilder();
-    boolean first = true;
-    for (FieldSchema col : tblsd.getCols()) {
-      if (!first) {
-        colNameBuf.append(",");
-        colTypeBuf.append(":");
-        colComment.append('\0');
-      }
-      colNameBuf.append(col.getName());
-      colTypeBuf.append(col.getType());
-      colComment.append((null != col.getComment()) ? col.getComment() : "");
-      first = false;
-    }
-    String colNames = colNameBuf.toString();
-    String colTypes = colTypeBuf.toString();
-    schema.setProperty(
-        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
-        colNames);
-    schema.setProperty(
-        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
-        colTypes);
-    schema.setProperty("columns.comments", colComment.toString());
+
     if (sd.getCols() != null) {
       schema.setProperty(
           org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL,
@@ -1130,6 +1137,15 @@ public class MetaStoreUtils {
     return schema;
   }
 
+  public static Properties getSchema(
+      org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+      org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
+      Map<String, String> parameters, String databaseName, String tableName,
+      List<FieldSchema> partitionKeys) {
+
+    return addCols(getSchemaWithoutCols(sd, tblsd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols());
+  }
+
   /**
    * Convert FieldSchemas to columnNames.
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 2ab9ed2..8144c3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -732,7 +732,7 @@ public final class Utilities {
   }
 
   public static PartitionDesc getPartitionDesc(Partition part) throws HiveException {
-    return (new PartitionDesc(part));
+    return new PartitionDesc(part);
   }
 
   public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part,

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 812af9a..7595065 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -527,9 +527,6 @@ public final class GenMapRedUtils {
     Map<String, String> props = tsOp.getConf().getOpProps();
     if (props != null) {
       Properties target = aliasPartnDesc.getProperties();
-      if (target == null) {
-        aliasPartnDesc.setProperties(target = new Properties());
-      }
       target.putAll(props);
     }
 
@@ -668,9 +665,6 @@ public final class GenMapRedUtils {
 
       if (props != null) {
         Properties target = tblDesc.getProperties();
-        if (target == null) {
-          tblDesc.setProperties(target = new Properties());
-        }
         target.putAll(props);
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 51e7a17..c1d6582 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1147,7 +1147,7 @@ public class Vectorizer implements PhysicalPlanResolver {
   class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
 
     private final MapWork mWork;
-    private VectorTaskColumnInfo vectorTaskColumnInfo;
+    private final VectorTaskColumnInfo vectorTaskColumnInfo;
     private final boolean isTez;
 
     public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez,
@@ -1205,9 +1205,9 @@ public class Vectorizer implements PhysicalPlanResolver {
 
   class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
 
-    private VectorTaskColumnInfo vectorTaskColumnInfo;
+    private final VectorTaskColumnInfo vectorTaskColumnInfo;
 
-    private boolean isTez;
+    private final boolean isTez;
 
     private Operator<? extends OperatorDesc> rootVectorOp;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 4d627ef..fe09bdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -29,6 +29,7 @@ import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -85,10 +86,17 @@ public class PartitionDesc implements Serializable, Cloneable {
 
   public PartitionDesc(final Partition part) throws HiveException {
     PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true);
-    setProperties(part.getMetadataFromPartitionSchema());
+    if(Utilities.isInputFileFormatSelfDescribing(this)) {
+      // if IF is self describing no need to send column info per partition, since its not used anyway.
+      Table tbl = part.getTable();
+      setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(),
+          part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()));
+    } else {
+      setProperties(part.getMetadataFromPartitionSchema());
+    }
   }
 
-  /** 
+  /**
    * @param part Partition
    * @param tblDesc Table Descriptor
    * @param usePartSchemaProperties Use Partition Schema Properties to set the
@@ -190,7 +198,7 @@ public class PartitionDesc implements Serializable, Cloneable {
     Class<? extends OutputFormat> outputClass = outputFileFormatClass == null ? null :
       HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass);
     if (outputClass != null) {
-      this.outputFileFormatClass = (Class<? extends HiveOutputFormat>) 
+      this.outputFileFormatClass = (Class<? extends HiveOutputFormat>)
         CLASS_INTERNER.intern(outputClass);
     } else {
       this.outputFileFormatClass = outputClass;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
index ca1dbe6..3204c7d 100644
--- a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
@@ -16,5 +16,5 @@ PREHOOK: Input: default@xyz
 POSTHOOK: query: describe xyz
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@xyz
-valid_colname       	string              	                    
-invalid.colname     	string              	                    
+key                 	string              	                    
+value               	string              	                    

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
index f23a359..9b75892 100644
--- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
@@ -368,6 +368,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
         Reducer 2 
             Execution mode: vectorized
             Reduce Operator Tree:
@@ -477,6 +478,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
         Reducer 2 
             Execution mode: vectorized
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
index ef92b89..b224da8 100644
--- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
@@ -346,6 +346,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
@@ -447,6 +448,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)