You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/03/12 21:24:56 UTC
hive git commit: HIVE-14792: AvroSerde reads the remote schema-file
at least once per mapper, per table reference. (Addendum)
Repository: hive
Updated Branches:
refs/heads/master 190c72e77 -> 1629ec058
HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1629ec05
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1629ec05
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1629ec05
Branch: refs/heads/master
Commit: 1629ec058faf2dce581de4f393f0c6485c7425d7
Parents: 190c72e
Author: Aihua Xu <ai...@apache.org>
Authored: Thu Mar 8 11:33:37 2018 -0800
Committer: Aihua Xu <ai...@apache.org>
Committed: Mon Mar 12 14:16:38 2018 -0700
----------------------------------------------------------------------
.../TablePropertyEnrichmentOptimizer.java | 45 +++-
.../avro_tableproperty_optimize.q | 63 ++++++
.../avro_tableproperty_optimize.q.out | 226 +++++++++++++++++++
3 files changed, 324 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
index d806775..bc17691 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hive.common.util.ReflectionUtil;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform {
}
}
+ /**
+ * Retrieves the table properties as well as the properties from Serde.
+ */
+ private static Map<String, String> getTableParameters(Table table) {
+ Map<String, String> originalTableParameters = new HashMap<>(table.getParameters());
+ Properties tableMetadata = MetaStoreUtils.getTableMetadata(table);
+ for (String property : tableMetadata.stringPropertyNames()) {
+ if (!originalTableParameters.containsKey(property)) {
+ originalTableParameters.put(property, tableMetadata.getProperty(property));
+ }
+ }
+ return originalTableParameters;
+ }
+
private static class Processor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator tsOp = (TableScanOperator) nd;
WalkerCtx context = (WalkerCtx)procCtx;
-
TableScanDesc tableScanDesc = tsOp.getConf();
Table table = tsOp.getConf().getTableMetadata().getTTable();
- Map<String, String> tableParameters = table.getParameters();
- Properties tableProperties = new Properties();
- tableProperties.putAll(tableParameters);
- Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer();
- String deserializerClassName = deserializer.getClass().getName();
+ Map<String, String> originalTableParameters = getTableParameters(table);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Original Table parameters: " + originalTableParameters);
+ }
+ Properties clonedTableParameters = new Properties();
+ clonedTableParameters.putAll(originalTableParameters);
+
+ String deserializerClassName = null;
try {
+ deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib();
+ Deserializer deserializer = ReflectionUtil.newInstance(
+ context.conf.getClassByName(deserializerClassName)
+ .asSubclass(Deserializer.class),
+ context.conf);
+
if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) {
- deserializer.initialize(context.conf, tableProperties);
+ deserializer.initialize(context.conf, clonedTableParameters);
LOG.debug("SerDe init succeeded for class: " + deserializerClassName);
- for (Map.Entry property : tableProperties.entrySet()) {
- if (!property.getValue().equals(tableParameters.get(property.getKey()))) {
+ for (Map.Entry property : clonedTableParameters.entrySet()) {
+ if (!property.getValue().equals(originalTableParameters.get(property.getKey()))) {
LOG.debug("Resolving changed parameters! key=" + property.getKey() + ", value=" + property.getValue());
- tableParameters.put((String) property.getKey(), (String) property.getValue());
+ table.getParameters().put((String) property.getKey(), (String) property.getValue());
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
new file mode 100644
index 0000000..e6b75c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
@@ -0,0 +1,63 @@
+-- Check the queries work fine with the following property set to true
+SET hive.optimize.update.table.properties.from.serde=true;
+
+dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/;
+
+CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }');
+INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal;
+SELECT * FROM avro_extschema_literal;
+
+CREATE TABLE avro_extschema_url
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc');
+INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url;
+SELECT * FROM avro_extschema_url;
+
+CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal1;
+SELECT * FROM avro_extschema_literal1;
+
+CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url1;
+SELECT * FROM avro_extschema_url1;
http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
new file mode 100644
index 0000000..8660c44
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
@@ -0,0 +1,226 @@
+PREHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal
+POSTHOOK: Lineage: avro_extschema_literal.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal
+col1 string
+col2 bigint
+col3 string
+
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+s1 1 s2
+PREHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url
+POSTHOOK: Lineage: avro_extschema_url.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url
+col1 string
+col2 bigint
+col3 string
+
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+s1 1 s2
+PREHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+ "namespace": "org.apache.hive",
+ "name": "ext_schema",
+ "type": "record",
+ "fields": [
+ { "name":"col1", "type":"string" },
+ { "name":"col2", "type":"long" },
+ { "name":"col3", "type":"string" }
+ ] }')
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: Lineage: avro_extschema_literal1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal1
+col1 string
+col2 bigint
+col3 string
+
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+s1 1 s2
+PREHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url1
+POSTHOOK: Lineage: avro_extschema_url1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url1
+col1 string
+col2 bigint
+col3 string
+
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+s1 1 s2