You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/11/10 17:43:54 UTC
svn commit: r1540492 - in /hive/trunk: metastore/scripts/upgrade/mysql/
metastore/scripts/upgrade/oracle/ metastore/scripts/upgrade/postgres/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/org/apache/hadoop/hive/ql/parse/
Author: hashutosh
Date: Sun Nov 10 16:43:53 2013
New Revision: 1540492
URL: http://svn.apache.org/r1540492
Log:
HIVE-5700 : enforce single date format for partition column storage (Sergey Shelukhin via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java
Modified:
hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql
hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql
hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/mysql/upgrade-0.12.0-to-0.13.0.mysql.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,11 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0' AS ' ';
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0' AS ' ';
+
+UPDATE PARTITION_KEY_VALS
+ INNER JOIN PARTITIONS ON PARTITION_KEY_VALS.PART_ID = PARTITIONS.PART_ID
+ INNER JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = PARTITIONS.TBL_ID
+ AND PARTITION_KEYS.INTEGER_IDX = PARTITION_KEY_VALS.INTEGER_IDX
+ AND PARTITION_KEYS.PKEY_TYPE = 'date'
+SET PART_KEY_VAL = IFNULL(DATE_FORMAT(cast(PART_KEY_VAL as date),'%Y-%m-%d'), PART_KEY_VAL);
+
UPDATE VERSION SET SCHEMA_VERSION='0.13.0', VERSION_COMMENT='Hive release version 0.13.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0' AS ' ';
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0' AS ' ';
Modified: hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/oracle/upgrade-0.12.0-to-0.13.0.oracle.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,28 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0' AS Status from dual;
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0' AS Status from dual;
+
+CREATE FUNCTION hive13_to_date(date_str IN VARCHAR2)
+RETURN DATE
+IS dt DATE;
+BEGIN
+ dt := TO_DATE(date_str, 'YYYY-MM-DD');
+ RETURN dt;
+EXCEPTION
+ WHEN others THEN RETURN null;
+END;
+/
+
+MERGE INTO PARTITION_KEY_VALS
+USING (
+ SELECT SRC.PART_ID as IPART_ID, SRC.INTEGER_IDX as IINTEGER_IDX,
+ NVL(TO_CHAR(hive13_to_date(PART_KEY_VAL),'YYYY-MM-DD'), PART_KEY_VAL) as NORM
+ FROM PARTITION_KEY_VALS SRC
+ INNER JOIN PARTITIONS ON SRC.PART_ID = PARTITIONS.PART_ID
+ INNER JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = PARTITIONS.TBL_ID
+ AND PARTITION_KEYS.INTEGER_IDX = SRC.INTEGER_IDX AND PARTITION_KEYS.PKEY_TYPE = 'date'
+) ON (IPART_ID = PARTITION_KEY_VALS.PART_ID AND IINTEGER_IDX = PARTITION_KEY_VALS.INTEGER_IDX)
+WHEN MATCHED THEN UPDATE SET PART_KEY_VAL = NORM;
+
+DROP FUNCTION hive13_to_date;
+
UPDATE VERSION SET SCHEMA_VERSION='0.13.0', VERSION_COMMENT='Hive release version 0.13.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0' AS Status from dual;
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0' AS Status from dual;
Modified: hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql (original)
+++ hive/trunk/metastore/scripts/upgrade/postgres/upgrade-0.12.0-to-0.13.0.postgres.sql Sun Nov 10 16:43:53 2013
@@ -1,3 +1,26 @@
-SELECT 'Upgrading MetaStore schema from 0.11.0 to 0.12.0';
+SELECT 'Upgrading MetaStore schema from 0.12.0 to 0.13.0';
+
+CREATE FUNCTION hive13_to_date(date_str text) RETURNS DATE AS $$
+DECLARE dt DATE;
+BEGIN
+ dt := date_str::DATE;
+ RETURN dt;
+EXCEPTION
+ WHEN others THEN RETURN null;
+END;
+$$ LANGUAGE plpgsql;
+
+UPDATE "PARTITION_KEY_VALS"
+SET "PART_KEY_VAL" = COALESCE(TO_CHAR(hive13_to_date(src."PART_KEY_VAL"),'YYYY-MM-DD'), src."PART_KEY_VAL")
+FROM "PARTITION_KEY_VALS" src
+ INNER JOIN "PARTITIONS" ON src."PART_ID" = "PARTITIONS"."PART_ID"
+ INNER JOIN "PARTITION_KEYS" ON "PARTITION_KEYS"."TBL_ID" = "PARTITIONS"."TBL_ID"
+ AND "PARTITION_KEYS"."INTEGER_IDX" = src."INTEGER_IDX"
+ AND "PARTITION_KEYS"."PKEY_TYPE" = 'date';
+
+DROP FUNCTION hive13_to_date(date_str text);
+
UPDATE "VERSION" SET "SCHEMA_VERSION"='0.13.0', "VERSION_COMMENT"='Hive release version 0.13.0' where "VER_ID"=1;
-SELECT 'Finished upgrading MetaStore schema from 0.11.0 to 0.12.0';
+SELECT 'Finished upgrading MetaStore schema from 0.12.0 to 0.13.0';
+
+
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
+import java.sql.Date;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -66,6 +69,7 @@ import org.apache.hadoop.hive.ql.plan.Li
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -73,6 +77,9 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.annotations.VisibleForTesting;
/**
* BaseSemanticAnalyzer.
@@ -80,6 +87,7 @@ import org.apache.hadoop.mapred.TextInpu
*/
@SuppressWarnings("deprecation")
public abstract class BaseSemanticAnalyzer {
+ private static final Log STATIC_LOG = LogFactory.getLog(BaseSemanticAnalyzer.class.getName());
protected final Hive db;
protected final HiveConf conf;
protected List<Task<? extends Serializable>> rootTasks;
@@ -707,10 +715,8 @@ public abstract class BaseSemanticAnalyz
this(db, conf, ast, true, false);
}
- public tableSpec(Hive db, HiveConf conf, ASTNode ast,
- boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec)
- throws SemanticException {
-
+ public tableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec,
+ boolean allowPartialPartitionsSpec) throws SemanticException {
assert (ast.getToken().getType() == HiveParser.TOK_TAB
|| ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION
|| ast.getToken().getType() == HiveParser.TOK_TABTYPE
@@ -761,7 +767,7 @@ public abstract class BaseSemanticAnalyz
partSpec.put(colName, val);
}
- // check if the columns specified in the partition() clause are actually partition columns
+ // check if the columns, as well as value types in the partition() clause are valid
validatePartSpec(tableHandle, partSpec, ast, conf);
// check if the partition spec is valid
@@ -840,7 +846,6 @@ public abstract class BaseSemanticAnalyz
return tableHandle.toString();
}
}
-
}
/**
@@ -1156,52 +1161,86 @@ public abstract class BaseSemanticAnalyz
}
}
- public static void validatePartSpec(Table tbl,
- Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
-
+ public static void validatePartSpec(Table tbl, Map<String, String> partSpec,
+ ASTNode astNode, HiveConf conf) throws SemanticException {
Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
Utilities.validatePartSpec(tbl, partSpec);
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+ if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+ return;
+ }
+
+ try {
+ getPartExprNodeDesc(astNode, astExprNodeMap);
+ } catch (HiveException e) {
+ return;
+ }
+ List<FieldSchema> parts = tbl.getPartitionKeys();
+ Map<String, String> partCols = new HashMap<String, String>(parts.size());
+ for (FieldSchema col : parts) {
+ partCols.put(col.getName(), col.getType().toLowerCase());
+ }
+ for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+ String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+ if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+ astKeyName = stripIdentifierQuotes(astKeyName);
+ }
+ String colType = partCols.get(astKeyName);
+ ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+ TypeInfo expectedType =
+ TypeInfoUtils.getTypeInfoFromTypeString(colType);
+ ObjectInspector outputOI =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+ Object value = null;
+ String colSpec = partSpec.get(astKeyName);
try {
- getPartExprNodeDesc(astNode, astExprNodeMap);
+ value =
+ ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+ evaluate(colSpec);
} catch (HiveException e) {
- return;
+ throw new SemanticException(e);
}
- List<FieldSchema> parts = tbl.getPartitionKeys();
- Map<String, String> partCols = new HashMap<String, String>(parts.size());
- for (FieldSchema col : parts) {
- partCols.put(col.getName(), col.getType().toLowerCase());
- }
- for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
-
- String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
- if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
- astKeyName = stripIdentifierQuotes(astKeyName);
- }
- String colType = partCols.get(astKeyName);
- ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
-
- TypeInfo expectedType =
- TypeInfoUtils.getTypeInfoFromTypeString(colType);
- ObjectInspector outputOI =
- TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
- Object value = null;
- try {
- value =
- ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
- evaluate(partSpec.get(astKeyName));
- } catch (HiveException e) {
- throw new SemanticException(e);
- }
- Object convertedValue =
- ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
- if (convertedValue == null) {
- throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
- inputOI.getTypeName(), outputOI.getTypeName()));
- }
+ Object convertedValue =
+ ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+ if (convertedValue == null) {
+ throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+ inputOI.getTypeName(), outputOI.getTypeName()));
}
+
+ normalizeColSpec(partSpec, astKeyName, colType, colSpec, convertedValue);
+ }
+ }
+
+ @VisibleForTesting
+ static void normalizeColSpec(Map<String, String> partSpec, String colName,
+ String colType, String originalColSpec, Object colValue) throws SemanticException {
+ if (colValue == null) return; // nothing to do with nulls
+ String normalizedColSpec = originalColSpec;
+ if (colType.equals(serdeConstants.DATE_TYPE_NAME)) {
+ normalizedColSpec = normalizeDateCol(colValue, originalColSpec);
+ }
+ if (!normalizedColSpec.equals(originalColSpec)) {
+ STATIC_LOG.warn("Normalizing partition spec - " + colName + " from "
+ + originalColSpec + " to " + normalizedColSpec);
+ partSpec.put(colName, normalizedColSpec);
+ }
+ }
+
+ /** A fixed date format to be used for hive partition column values. */
+ private static final DateFormat partitionDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+ private static String normalizeDateCol(
+ Object colValue, String originalColSpec) throws SemanticException {
+ Date value;
+ if (colValue instanceof DateWritable) {
+ value = ((DateWritable) colValue).get();
+ } else if (colValue instanceof Date) {
+ value = (Date) colValue;
+ } else {
+ throw new SemanticException("Unexpected date type " + colValue.getClass());
}
+ return partitionDateFormat.format(value);
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1540492&r1=1540491&r2=1540492&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -977,13 +977,11 @@ public class SemanticAnalyzer extends Ba
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
-
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
-
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap<String, String> partition = new HashMap<String, String>();
@@ -997,25 +995,30 @@ public class SemanticAnalyzer extends Ba
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
- if (childCount == partition.size()) {
- try {
- Table table = db.getTable(tableName);
- Partition parMetaData = db.getPartition(table, partition, false);
- // Check partition exists if it exists skip the overwrite
- if (parMetaData != null) {
- phase1Result = false;
- skipRecursion = true;
- LOG.info("Partition already exists so insert into overwrite " +
- "skipped for partition : " + parMetaData.toString());
- break;
- }
- } catch (HiveException e) {
- LOG.info("Error while getting metadata : ", e);
- }
- } else {
+ if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
+ Table table = null;
+ try {
+ table = db.getTable(tableName);
+ } catch (HiveException ex) {
+ throw new SemanticException(ex);
+ }
+ try {
+ Partition parMetaData = db.getPartition(table, partition, false);
+ // Check partition exists if it exists skip the overwrite
+ if (parMetaData != null) {
+ phase1Result = false;
+ skipRecursion = true;
+ LOG.info("Partition already exists so insert into overwrite " +
+ "skipped for partition : " + parMetaData.toString());
+ break;
+ }
+ } catch (HiveException e) {
+ LOG.info("Error while getting metadata : ", e);
+ }
+ validatePartSpec(table, partition, (ASTNode)tab, conf);
}
skipRecursion = false;
break;
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java?rev=1540492&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java Sun Nov 10 16:43:53 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import static org.junit.Assert.*;
+
+import java.sql.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.junit.Test;
+
+public class TestSemanticAnalyzer {
+
+ @Test
+ public void testNormalizeColSpec() throws Exception {
+ // Hive normalizes partition spec for dates to yyyy-mm-dd format. Some versions of Java will
+ // accept other formats for Date.valueOf, e.g. yyyy-m-d, and who knows what else in the future;
+ // some will not accept other formats, so we cannot test normalization with them - type check
+ // will fail before it can ever happen. Thus, test in isolation.
+ checkNormalization("date", "2010-01-01", "2010-01-01", Date.valueOf("2010-01-01"));
+ checkNormalization("date", "2010-1-01", "2010-01-01", Date.valueOf("2010-01-01"));
+ checkNormalization("date", "2010-1-1", "2010-01-01", Date.valueOf("2010-01-01"));
+ checkNormalization("string", "2010-1-1", "2010-1-1", "2010-1-1");
+
+ try {
+ checkNormalization("date", "foo", "", "foo"); // Bad format.
+ fail("should throw");
+ } catch (SemanticException ex) {
+ }
+
+ try {
+ checkNormalization("date", "2010-01-01", "2010-01-01", "2010-01-01"); // Bad value type.
+ fail("should throw");
+ } catch (SemanticException ex) {
+ }
+ }
+
+
+ public void checkNormalization(String colType, String originalColSpec,
+ String result, Object colValue) throws SemanticException {
+ final String colName = "col";
+ Map<String, String> partSpec = new HashMap<String, String>();
+ partSpec.put(colName, originalColSpec);
+ BaseSemanticAnalyzer.normalizeColSpec(partSpec, colName, colType, originalColSpec, colValue);
+ assertEquals(result, partSpec.get(colName));
+ if (colValue instanceof Date) {
+ DateWritable dw = new DateWritable((Date)colValue);
+ BaseSemanticAnalyzer.normalizeColSpec(partSpec, colName, colType, originalColSpec, dw);
+ assertEquals(result, partSpec.get(colName));
+ }
+ }
+}