You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/05/23 23:51:47 UTC
hive git commit: HIVE-12643 : For self describing InputFormat don't
replicate schema information in partitions (Ashutosh Chauhan via Matt
McCline)
Repository: hive
Updated Branches:
refs/heads/master 7fb4b1fed -> 2ed47838d
HIVE-12643 : For self describing InputFormat don't replicate schema information in partitions (Ashutosh Chauhan via Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ed47838
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ed47838
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ed47838
Branch: refs/heads/master
Commit: 2ed47838dc6cfee3fb6f4470427e50a4495b2eba
Parents: 7fb4b1f
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Dec 9 17:26:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon May 23 16:51:12 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/MetaStoreUtils.java | 68 ++++++++++++--------
.../apache/hadoop/hive/ql/exec/Utilities.java | 2 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 6 --
.../hive/ql/optimizer/physical/Vectorizer.java | 6 +-
.../hadoop/hive/ql/plan/PartitionDesc.java | 14 +++-
.../clientpositive/quotedid_tblproperty.q.out | 4 +-
.../tez/vector_partition_diff_num_cols.q.out | 2 +
.../vector_partition_diff_num_cols.q.out | 2 +
8 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 6bc882a..84b24ab 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -1012,8 +1012,38 @@ public class MetaStoreUtils {
return schema;
}
- public static Properties getSchema(
- org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+ public static Properties addCols(Properties schema, List<FieldSchema> cols) {
+
+ StringBuilder colNameBuf = new StringBuilder();
+ StringBuilder colTypeBuf = new StringBuilder();
+ StringBuilder colComment = new StringBuilder();
+
+ boolean first = true;
+ for (FieldSchema col : cols) {
+ if (!first) {
+ colNameBuf.append(",");
+ colTypeBuf.append(":");
+ colComment.append('\0');
+ }
+ colNameBuf.append(col.getName());
+ colTypeBuf.append(col.getType());
+ colComment.append((null != col.getComment()) ? col.getComment() : "");
+ first = false;
+ }
+ schema.setProperty(
+ org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
+ colNameBuf.toString());
+ String colTypes = colTypeBuf.toString();
+ schema.setProperty(
+ org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
+ colTypes);
+ schema.setProperty("columns.comments", colComment.toString());
+
+ return schema;
+
+ }
+
+ public static Properties getSchemaWithoutCols(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
Map<String, String> parameters, String databaseName, String tableName,
List<FieldSchema> partitionKeys) {
@@ -1063,30 +1093,7 @@ public class MetaStoreUtils {
.getSerdeInfo().getSerializationLib());
}
}
- StringBuilder colNameBuf = new StringBuilder();
- StringBuilder colTypeBuf = new StringBuilder();
- StringBuilder colComment = new StringBuilder();
- boolean first = true;
- for (FieldSchema col : tblsd.getCols()) {
- if (!first) {
- colNameBuf.append(",");
- colTypeBuf.append(":");
- colComment.append('\0');
- }
- colNameBuf.append(col.getName());
- colTypeBuf.append(col.getType());
- colComment.append((null != col.getComment()) ? col.getComment() : "");
- first = false;
- }
- String colNames = colNameBuf.toString();
- String colTypes = colTypeBuf.toString();
- schema.setProperty(
- org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
- colNames);
- schema.setProperty(
- org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
- colTypes);
- schema.setProperty("columns.comments", colComment.toString());
+
if (sd.getCols() != null) {
schema.setProperty(
org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL,
@@ -1130,6 +1137,15 @@ public class MetaStoreUtils {
return schema;
}
+ public static Properties getSchema(
+ org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+ org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
+ Map<String, String> parameters, String databaseName, String tableName,
+ List<FieldSchema> partitionKeys) {
+
+ return addCols(getSchemaWithoutCols(sd, tblsd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols());
+ }
+
/**
* Convert FieldSchemas to columnNames.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 2ab9ed2..8144c3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -732,7 +732,7 @@ public final class Utilities {
}
public static PartitionDesc getPartitionDesc(Partition part) throws HiveException {
- return (new PartitionDesc(part));
+ return new PartitionDesc(part);
}
public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part,
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 812af9a..7595065 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -527,9 +527,6 @@ public final class GenMapRedUtils {
Map<String, String> props = tsOp.getConf().getOpProps();
if (props != null) {
Properties target = aliasPartnDesc.getProperties();
- if (target == null) {
- aliasPartnDesc.setProperties(target = new Properties());
- }
target.putAll(props);
}
@@ -668,9 +665,6 @@ public final class GenMapRedUtils {
if (props != null) {
Properties target = tblDesc.getProperties();
- if (target == null) {
- tblDesc.setProperties(target = new Properties());
- }
target.putAll(props);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 51e7a17..c1d6582 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1147,7 +1147,7 @@ public class Vectorizer implements PhysicalPlanResolver {
class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
private final MapWork mWork;
- private VectorTaskColumnInfo vectorTaskColumnInfo;
+ private final VectorTaskColumnInfo vectorTaskColumnInfo;
private final boolean isTez;
public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez,
@@ -1205,9 +1205,9 @@ public class Vectorizer implements PhysicalPlanResolver {
class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
- private VectorTaskColumnInfo vectorTaskColumnInfo;
+ private final VectorTaskColumnInfo vectorTaskColumnInfo;
- private boolean isTez;
+ private final boolean isTez;
private Operator<? extends OperatorDesc> rootVectorOp;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 4d627ef..fe09bdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -29,6 +29,7 @@ import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -85,10 +86,17 @@ public class PartitionDesc implements Serializable, Cloneable {
public PartitionDesc(final Partition part) throws HiveException {
PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true);
- setProperties(part.getMetadataFromPartitionSchema());
+ if(Utilities.isInputFileFormatSelfDescribing(this)) {
+ // if IF is self describing no need to send column info per partition, since its not used anyway.
+ Table tbl = part.getTable();
+ setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(),
+ part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()));
+ } else {
+ setProperties(part.getMetadataFromPartitionSchema());
+ }
}
- /**
+ /**
* @param part Partition
* @param tblDesc Table Descriptor
* @param usePartSchemaProperties Use Partition Schema Properties to set the
@@ -190,7 +198,7 @@ public class PartitionDesc implements Serializable, Cloneable {
Class<? extends OutputFormat> outputClass = outputFileFormatClass == null ? null :
HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass);
if (outputClass != null) {
- this.outputFileFormatClass = (Class<? extends HiveOutputFormat>)
+ this.outputFileFormatClass = (Class<? extends HiveOutputFormat>)
CLASS_INTERNER.intern(outputClass);
} else {
this.outputFileFormatClass = outputClass;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
index ca1dbe6..3204c7d 100644
--- a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
@@ -16,5 +16,5 @@ PREHOOK: Input: default@xyz
POSTHOOK: query: describe xyz
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@xyz
-valid_colname string
-invalid.colname string
+key string
+value string
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
index f23a359..9b75892 100644
--- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
@@ -368,6 +368,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reducer 2
Execution mode: vectorized
Reduce Operator Tree:
@@ -477,6 +478,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reducer 2
Execution mode: vectorized
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
index ef92b89..b224da8 100644
--- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
@@ -346,6 +346,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -447,6 +448,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)