You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/03/12 21:24:56 UTC

hive git commit: HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)

Repository: hive
Updated Branches:
  refs/heads/master 190c72e77 -> 1629ec058


HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1629ec05
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1629ec05
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1629ec05

Branch: refs/heads/master
Commit: 1629ec058faf2dce581de4f393f0c6485c7425d7
Parents: 190c72e
Author: Aihua Xu <ai...@apache.org>
Authored: Thu Mar 8 11:33:37 2018 -0800
Committer: Aihua Xu <ai...@apache.org>
Committed: Mon Mar 12 14:16:38 2018 -0700

----------------------------------------------------------------------
 .../TablePropertyEnrichmentOptimizer.java       |  45 +++-
 .../avro_tableproperty_optimize.q               |  63 ++++++
 .../avro_tableproperty_optimize.q.out           | 226 +++++++++++++++++++
 3 files changed, 324 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
index d806775..bc17691 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hive.common.util.ReflectionUtil;
 
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform {
     }
   }
 
+  /**
+   * Retrieves the table properties as well as the properties from Serde.
+   */
+  private static Map<String, String> getTableParameters(Table table) {
+    Map<String, String> originalTableParameters = new HashMap<>(table.getParameters());
+    Properties tableMetadata = MetaStoreUtils.getTableMetadata(table);
+    for (String property : tableMetadata.stringPropertyNames()) {
+      if (!originalTableParameters.containsKey(property)) {
+        originalTableParameters.put(property, tableMetadata.getProperty(property));
+      }
+    }
+    return originalTableParameters;
+  }
+
   private static class Processor implements NodeProcessor {
 
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
       TableScanOperator tsOp = (TableScanOperator) nd;
       WalkerCtx context = (WalkerCtx)procCtx;
-
       TableScanDesc tableScanDesc = tsOp.getConf();
       Table table = tsOp.getConf().getTableMetadata().getTTable();
-      Map<String, String> tableParameters = table.getParameters();
-      Properties tableProperties = new Properties();
-      tableProperties.putAll(tableParameters);
 
-      Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer();
-      String deserializerClassName = deserializer.getClass().getName();
+      Map<String, String> originalTableParameters = getTableParameters(table);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Original Table parameters: " + originalTableParameters);
+      }
+      Properties clonedTableParameters = new Properties();
+      clonedTableParameters.putAll(originalTableParameters);
+
+      String deserializerClassName = null;
       try {
+        deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib();
+        Deserializer deserializer = ReflectionUtil.newInstance(
+            context.conf.getClassByName(deserializerClassName)
+                .asSubclass(Deserializer.class),
+            context.conf);
+
         if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) {
-          deserializer.initialize(context.conf, tableProperties);
+          deserializer.initialize(context.conf, clonedTableParameters);
           LOG.debug("SerDe init succeeded for class: " + deserializerClassName);
-          for (Map.Entry property : tableProperties.entrySet()) {
-            if (!property.getValue().equals(tableParameters.get(property.getKey()))) {
+          for (Map.Entry property : clonedTableParameters.entrySet()) {
+            if (!property.getValue().equals(originalTableParameters.get(property.getKey()))) {
               LOG.debug("Resolving changed parameters! key=" + property.getKey() + ", value=" + property.getValue());
-              tableParameters.put((String) property.getKey(), (String) property.getValue());
+              table.getParameters().put((String) property.getKey(), (String) property.getValue());
             }
           }
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
new file mode 100644
index 0000000..e6b75c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
@@ -0,0 +1,63 @@
+-- Check the queries work fine with the following property set to true
+SET hive.optimize.update.table.properties.from.serde=true;
+
+dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/;
+
+CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }');
+INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal;
+SELECT * FROM avro_extschema_literal;
+
+CREATE TABLE avro_extschema_url
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc');
+INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url;
+SELECT * FROM avro_extschema_url;
+
+CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal1;
+SELECT * FROM avro_extschema_literal1;
+
+CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url1;
+SELECT * FROM avro_extschema_url1;

http://git-wip-us.apache.org/repos/asf/hive/blob/1629ec05/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
new file mode 100644
index 0000000..8660c44
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
@@ -0,0 +1,226 @@
+PREHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal
+POSTHOOK: Lineage: avro_extschema_literal.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal
+col1                	string              	                    
+col2                	bigint              	                    
+col3                	string              	                    
+	 	 
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+s1	1	s2
+PREHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url
+POSTHOOK: Lineage: avro_extschema_url.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url
+col1                	string              	                    
+col2                	bigint              	                    
+col3                	string              	                    
+	 	 
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+s1	1	s2
+PREHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: Lineage: avro_extschema_literal1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal1
+col1                	string              	                    
+col2                	bigint              	                    
+col3                	string              	                    
+	 	 
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+s1	1	s2
+PREHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url1
+POSTHOOK: Lineage: avro_extschema_url1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url1
+col1                	string              	                    
+col2                	bigint              	                    
+col3                	string              	                    
+	 	 
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+s1	1	s2