You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/11/10 17:43:54 UTC

svn commit: r1540492 - in /hive/trunk: metastore/scripts/upgrade/mysql/ metastore/scripts/upgrade/oracle/ metastore/scripts/upgrade/postgres/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/org/apache/hadoop/hive/ql/parse/

Author: hashutosh
Date: Sun Nov 10 16:43:53 2013
New Revision: 1540492

URL: http://svn.apache.org/r1540492
Log:
HIVE-5700 : enforce single date format for partition column storage (Sergey Shelukhin via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java
Modified:
    hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql
    hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql
    hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,11 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0' AS ' ';
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0' AS ' ';
+
+UPDATE PARTITION_KEY_VALS
+  INNER JOIN PARTITIONS ON PARTITION_KEY_VALS.PART_ID = PARTITIONS.PART_ID
+  INNER JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = PARTITIONS.TBL_ID
+    AND PARTITION_KEYS.INTEGER_IDX = PARTITION_KEY_VALS.INTEGER_IDX
+    AND PARTITION_KEYS.PKEY_TYPE = 'date'
+SET PART_KEY_VAL = IFNULL(DATE_FORMAT(cast(PART_KEY_VAL as date),'%Y-%m-%d'), PART_KEY_VAL);
+
 UPDATE VERSION SET SCHEMA_VERSION='0.13.0', VERSION_COMMENT='Hive release version 0.13.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0' AS ' ';
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0' AS ' ';

Modified: hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,28 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0' AS Status from dual;
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0' AS Status from dual;
+
+CREATE FUNCTION hive13_to_date(date_str IN VARCHAR2)
+RETURN DATE
+IS dt DATE;
+BEGIN
+  dt := TO_DATE(date_str, 'YYYY-MM-DD');
+  RETURN dt;
+EXCEPTION
+  WHEN others THEN RETURN null;
+END;
+/
+
+MERGE INTO PARTITION_KEY_VALS
+USING (
+  SELECT SRC.PART_ID as IPART_ID, SRC.INTEGER_IDX as IINTEGER_IDX, 
+     NVL(TO_CHAR(hive13_to_date(PART_KEY_VAL),'YYYY-MM-DD'), PART_KEY_VAL) as NORM
+  FROM PARTITION_KEY_VALS SRC
+    INNER JOIN PARTITIONS ON SRC.PART_ID = PARTITIONS.PART_ID
+    INNER JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = PARTITIONS.TBL_ID
+      AND PARTITION_KEYS.INTEGER_IDX = SRC.INTEGER_IDX AND PARTITION_KEYS.PKEY_TYPE = 'date'
+) ON (IPART_ID = PARTITION_KEY_VALS.PART_ID AND IINTEGER_IDX = PARTITION_KEY_VALS.INTEGER_IDX)
+WHEN MATCHED THEN UPDATE SET PART_KEY_VAL = NORM;
+
+DROP FUNCTION hive13_to_date;
+
 UPDATE VERSION SET SCHEMA_VERSION='0.13.0', VERSION_COMMENT='Hive release version 0.13.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0' AS Status from dual;
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0' AS Status from dual;

Modified: hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,26 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0';
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0';
+
+CREATE FUNCTION hive13_to_date(date_str text) RETURNS DATE AS $$
+DECLARE dt DATE;
+BEGIN
+  dt := date_str::DATE;
+  RETURN dt;
+EXCEPTION
+  WHEN others THEN RETURN null;
+END;
+$$ LANGUAGE plpgsql;
+
+UPDATE "PARTITION_KEY_VALS"
+SET "PART_KEY_VAL" = COALESCE(TO_CHAR(hive13_to_date(src."PART_KEY_VAL"),'YYYY-MM-DD'), src."PART_KEY_VAL")
+FROM "PARTITION_KEY_VALS" src
+  INNER JOIN "PARTITIONS" ON src."PART_ID" = "PARTITIONS"."PART_ID"
+  INNER JOIN "PARTITION_KEYS" ON "PARTITION_KEYS"."TBL_ID" = "PARTITIONS"."TBL_ID"
+    AND "PARTITION_KEYS"."INTEGER_IDX" = src."INTEGER_IDX"
+    AND "PARTITION_KEYS"."PKEY_TYPE" = 'date';
+
+DROP FUNCTION hive13_to_date(date_str text);
+
 UPDATE "VERSION" SET "SCHEMA_VERSION"='0.13.0', "VERSION_COMMENT"='Hive release version 0.13.0' where "VER_ID"=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0';
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0';
+
+

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
+import java.sql.Date;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -66,6 +69,7 @@ import org.apache.hadoop.hive.ql.plan.Li
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -73,6 +77,9 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * BaseSemanticAnalyzer.
@@ -80,6 +87,7 @@ import org.apache.hadoop.mapred.TextInpu
  */
 @SuppressWarnings("deprecation")
 public abstract class BaseSemanticAnalyzer {
+  private static final Log STATIC_LOG = LogFactory.getLog(BaseSemanticAnalyzer.class.getName());
   protected final Hive db;
   protected final HiveConf conf;
   protected List<Task<? extends Serializable>> rootTasks;
@@ -707,10 +715,8 @@ public abstract class BaseSemanticAnalyz
       this(db, conf, ast, true, false);
     }
 
-    public tableSpec(Hive db, HiveConf conf, ASTNode ast,
-        boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec)
-        throws SemanticException {
-
+    public tableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec,
+        boolean allowPartialPartitionsSpec) throws SemanticException {
       assert (ast.getToken().getType() == HiveParser.TOK_TAB
           || ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION
           || ast.getToken().getType() == HiveParser.TOK_TABTYPE
@@ -761,7 +767,7 @@ public abstract class BaseSemanticAnalyz
           partSpec.put(colName, val);
         }
 
-        // check if the columns specified in the partition() clause are actually partition columns
+        // check if the columns, as well as value types in the partition() clause are valid
         validatePartSpec(tableHandle, partSpec, ast, conf);
 
         // check if the partition spec is valid
@@ -840,7 +846,6 @@ public abstract class BaseSemanticAnalyz
         return tableHandle.toString();
       }
     }
-
   }
 
   /**
@@ -1156,52 +1161,86 @@ public abstract class BaseSemanticAnalyz
     }
   }
 
-  public static void validatePartSpec(Table tbl,
-      Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
-
+  public static void validatePartSpec(Table tbl, Map<String, String> partSpec,
+      ASTNode astNode, HiveConf conf) throws SemanticException {
     Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
 
     Utilities.validatePartSpec(tbl, partSpec);
 
-    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+      return;
+    }
+
+    try {
+      getPartExprNodeDesc(astNode, astExprNodeMap);
+    } catch (HiveException e) {
+      return;
+    }
+    List<FieldSchema> parts = tbl.getPartitionKeys();
+    Map<String, String> partCols = new HashMap<String, String>(parts.size());
+    for (FieldSchema col : parts) {
+      partCols.put(col.getName(), col.getType().toLowerCase());
+    }
+    for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+      String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+      if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+        astKeyName = stripIdentifierQuotes(astKeyName);
+      }
+      String colType = partCols.get(astKeyName);
+      ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+      TypeInfo expectedType =
+          TypeInfoUtils.getTypeInfoFromTypeString(colType);
+      ObjectInspector outputOI =
+          TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+      Object value = null;
+      String colSpec = partSpec.get(astKeyName);
       try {
-        getPartExprNodeDesc(astNode, astExprNodeMap);
+        value =
+            ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+            evaluate(colSpec);
       } catch (HiveException e) {
-        return;
+        throw new SemanticException(e);
       }
-      List<FieldSchema> parts = tbl.getPartitionKeys();
-      Map<String, String> partCols = new HashMap<String, String>(parts.size());
-      for (FieldSchema col : parts) {
-        partCols.put(col.getName(), col.getType().toLowerCase());
-      }
-      for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
-
-        String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
-        if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
-          astKeyName = stripIdentifierQuotes(astKeyName);
-        }
-        String colType = partCols.get(astKeyName);
-        ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
-
-        TypeInfo expectedType =
-            TypeInfoUtils.getTypeInfoFromTypeString(colType);
-        ObjectInspector outputOI =
-            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
-        Object value = null;
-        try {
-          value =
-              ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
-              evaluate(partSpec.get(astKeyName));
-        } catch (HiveException e) {
-          throw new SemanticException(e);
-        }
-        Object convertedValue =
-          ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
-        if (convertedValue == null) {
-          throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
-              inputOI.getTypeName(), outputOI.getTypeName()));
-        }
+      Object convertedValue =
+        ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+      if (convertedValue == null) {
+        throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+            inputOI.getTypeName(), outputOI.getTypeName()));
       }
+
+      normalizeColSpec(partSpec, astKeyName, colType, colSpec, convertedValue);
+    }
+  }
+
+  @VisibleForTesting
+  static void normalizeColSpec(Map<String, String> partSpec, String colName,
+      String colType, String originalColSpec, Object colValue) throws SemanticException {
+    if (colValue == null) return; // nothing to do with nulls
+    String normalizedColSpec = originalColSpec;
+    if (colType.equals(serdeConstants.DATE_TYPE_NAME)) {
+      normalizedColSpec = normalizeDateCol(colValue, originalColSpec);
+    }
+    if (!normalizedColSpec.equals(originalColSpec)) {
+      STATIC_LOG.warn("Normalizing partition spec - " + colName + " from "
+          + originalColSpec + " to " + normalizedColSpec);
+      partSpec.put(colName, normalizedColSpec);
+    }
+  }
+
+  /** A fixed date format to be used for hive partition column values. */
+  private static final DateFormat partitionDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+  private static String normalizeDateCol(
+      Object colValue, String originalColSpec) throws SemanticException {
+    Date value;
+    if (colValue instanceof DateWritable) {
+      value = ((DateWritable) colValue).get();
+    } else if (colValue instanceof Date) {
+      value = (Date) colValue;
+    } else {
+      throw new SemanticException("Unexpected date type " + colValue.getClass());
     }
+    return partitionDateFormat.format(value);
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -977,13 +977,11 @@ public class SemanticAnalyzer extends Ba
       case HiveParser.TOK_INSERT:
         ASTNode destination = (ASTNode) ast.getChild(0);
         Tree tab = destination.getChild(0);
-
         // Proceed if AST contains partition & If Not Exists
         if (destination.getChildCount() == 2 &&
             tab.getChildCount() == 2 &&
             destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
           String tableName = tab.getChild(0).getChild(0).getText();
-
           Tree partitions = tab.getChild(1);
           int childCount = partitions.getChildCount();
           HashMap<String, String> partition = new HashMap<String, String>();
@@ -997,25 +995,30 @@ public class SemanticAnalyzer extends Ba
             partition.put(partitionName, partitionVal);
           }
           // if it is a dynamic partition throw the exception
-          if (childCount == partition.size()) {
-            try {
-              Table table = db.getTable(tableName);
-              Partition parMetaData = db.getPartition(table, partition, false);
-              // Check partition exists if it exists skip the overwrite
-              if (parMetaData != null) {
-                phase1Result = false;
-                skipRecursion = true;
-                LOG.info("Partition already exists so insert into overwrite " +
-                    "skipped for partition : " + parMetaData.toString());
-                break;
-              }
-            } catch (HiveException e) {
-              LOG.info("Error while getting metadata : ", e);
-            }
-          } else {
+          if (childCount != partition.size()) {
             throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
                 .getMsg(partition.toString()));
           }
+          Table table = null;
+          try {
+            table = db.getTable(tableName);
+          } catch (HiveException ex) {
+            throw new SemanticException(ex);
+          }
+          try {
+            Partition parMetaData = db.getPartition(table, partition, false);
+            // Check partition exists if it exists skip the overwrite
+            if (parMetaData != null) {
+              phase1Result = false;
+              skipRecursion = true;
+              LOG.info("Partition already exists so insert into overwrite " +
+                  "skipped for partition : " + parMetaData.toString());
+              break;
+            }
+          } catch (HiveException e) {
+            LOG.info("Error while getting metadata : ", e);
+          }
+          validatePartSpec(table, partition, (ASTNode)tab, conf);
         }
         skipRecursion = false;
         break;

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java?rev=1540492&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import static org.junit.Assert.*;
+
+import java.sql.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.junit.Test;
+
+public class TestSemanticAnalyzer {
+
+  @Test
+  public void testNormalizeColSpec() throws Exception {
+    // Hive normalizes partition spec for dates to yyyy-mm-dd format. Some versions of Java will
+    // accept other formats for Date.valueOf, e.g. yyyy-m-d, and who knows what else in the future;
+    // some will not accept other formats, so we cannot test normalization with them - type check
+    // will fail before it can ever happen. Thus, test in isolation.
+    checkNormalization("date", "2010-01-01", "2010-01-01", Date.valueOf("2010-01-01"));
+    checkNormalization("date", "2010-1-01", "2010-01-01", Date.valueOf("2010-01-01"));
+    checkNormalization("date", "2010-1-1", "2010-01-01", Date.valueOf("2010-01-01"));
+    checkNormalization("string", "2010-1-1", "2010-1-1", "2010-1-1");
+
+    try {
+      checkNormalization("date", "foo", "", "foo"); // Bad format.
+      fail("should throw");
+    } catch (SemanticException ex) {
+    }
+
+    try {
+      checkNormalization("date", "2010-01-01", "2010-01-01", "2010-01-01"); // Bad value type.
+      fail("should throw");
+    } catch (SemanticException ex) {
+    }
+  }
+
+
+  public void checkNormalization(String colType, String originalColSpec,
+      String result, Object colValue) throws SemanticException {
+    final String colName = "col";
+    Map<String, String> partSpec = new HashMap<String, String>();
+    partSpec.put(colName, originalColSpec);
+    BaseSemanticAnalyzer.normalizeColSpec(partSpec, colName, colType, originalColSpec, colValue);
+    assertEquals(result, partSpec.get(colName));
+    if (colValue instanceof Date) {
+      DateWritable dw = new DateWritable((Date)colValue);
+      BaseSemanticAnalyzer.normalizeColSpec(partSpec, colName, colType, originalColSpec, dw);
+      assertEquals(result, partSpec.get(colName));
+    }
+  }
+}