You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/09 05:41:33 UTC
hive git commit: HIVE-13618: Trailing spaces in partition column will
be treated differently (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 48be04b2c -> 2a8e38814
HIVE-13618: Trailing spaces in partition column will be treated differently (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2a8e3881
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2a8e3881
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2a8e3881
Branch: refs/heads/master
Commit: 2a8e38814f6dedae4c3b5270540d7ca6ef4b941e
Parents: 48be04b
Author: Pengcheng Xiong <px...@apache.org>
Authored: Sun May 8 22:41:07 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sun May 8 22:41:07 2016 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/metastore/ObjectStore.java | 61 ++++++++----------
...umn_names_with_leading_and_trailing_spaces.q | 15 +++++
...umn_names_with_leading_and_trailing_spaces.q | 15 +++++
...names_with_leading_and_trailing_spaces.q.out | 35 +++++++++++
...names_with_leading_and_trailing_spaces.q.out | 65 ++++++++++++++++++++
6 files changed, 157 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 88381aa..5aedd10 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -427,6 +427,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
mrr.q,\
orc_ppd_basic.q,\
orc_merge_diff_fs.q,\
+ partition_column_names_with_leading_and_trailing_spaces.q,\
stats_filemetadata.q,\
tez_aggr_part_stats.q,\
tez_bmj_schema_evolution.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 5c49be9..550db83 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -1805,7 +1805,8 @@ public class ObjectStore implements RawStore, Configurable {
private MPartition getMPartition(String dbName, String tableName, List<String> part_vals)
throws MetaException {
- MPartition mpart = null;
+ List<MPartition> mparts = null;
+ MPartition ret = null;
boolean commited = false;
Query query = null;
try {
@@ -1825,10 +1826,27 @@ public class ObjectStore implements RawStore, Configurable {
pm.newQuery(MPartition.class,
"table.tableName == t1 && table.database.name == t2 && partitionName == t3");
query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
- query.setUnique(true);
- mpart = (MPartition) query.execute(tableName, dbName, name);
- pm.retrieve(mpart);
+ mparts = (List<MPartition>) query.execute(tableName, dbName, name);
+ pm.retrieveAll(mparts);
commited = commitTransaction();
+ // We need to compare partition name with requested name since some DBs
+ // (like MySQL, Derby) considers 'a' = 'a ' whereas others like (Postgres,
+ // Oracle) doesn't exhibit this problem.
+ if (mparts != null && mparts.size() > 0) {
+ if (mparts.size() > 1) {
+ throw new MetaException(
+ "Expecting only one partition but more than one partitions are found.");
+ } else {
+ MPartition mpart = mparts.get(0);
+ if (name.equals(mpart.getPartitionName())) {
+ ret = mpart;
+ } else {
+ throw new MetaException("Expecting a partition with name " + name
+ + ", but metastore is returning a partition with name " + mpart.getPartitionName()
+ + ".");
+ }
+ }
+ }
} finally {
if (!commited) {
rollbackTransaction();
@@ -1837,7 +1855,7 @@ public class ObjectStore implements RawStore, Configurable {
query.closeAll();
}
}
- return mpart;
+ return ret;
}
/**
@@ -7641,37 +7659,10 @@ public class ObjectStore implements RawStore, Configurable {
@Override
public boolean doesPartitionExist(String dbName, String tableName, List<String> partVals)
throws MetaException {
- boolean success = false;
- Query query = null;
try {
- openTransaction();
- dbName = HiveStringUtils.normalizeIdentifier(dbName);
- tableName = HiveStringUtils.normalizeIdentifier(tableName);
-
- // TODO: this could also be passed from upper layer; or this method should filter the list.
- MTable mtbl = getMTable(dbName, tableName);
- if (mtbl == null) {
- success = commitTransaction();
- return false;
- }
- query =
- pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition "
- + "where table.tableName == t1 && table.database.name == t2 && partitionName == t3");
- query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
- query.setUnique(true);
- query.setResult("partitionName");
- String name =
- Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), partVals);
- String result = (String) query.execute(tableName, dbName, name);
- success = commitTransaction();
- return result != null;
- } finally {
- if (!success) {
- rollbackTransaction();
- }
- if (query != null) {
- query.closeAll();
- }
+ return this.getPartition(dbName, tableName, partVals) != null;
+ } catch (NoSuchObjectException e) {
+ return false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
new file mode 100644
index 0000000..f087130
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+
+create table foo (d string);
+
+create table foo_p (d string) partitioned by (p string);
+
+insert into foo values ("1");
+
+insert into foo_p partition (p="a ") select foo.d from foo;
+
+insert into foo_p partition (p="a") select foo.d from foo;
+
+select * from foo_p where p="a ";
+
+select * from foo_p where p="a";
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
new file mode 100644
index 0000000..f087130
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+
+create table foo (d string);
+
+create table foo_p (d string) partitioned by (p string);
+
+insert into foo values ("1");
+
+insert into foo_p partition (p="a ") select foo.d from foo;
+
+insert into foo_p partition (p="a") select foo.d from foo;
+
+select * from foo_p where p="a ";
+
+select * from foo_p where p="a";
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
new file mode 100644
index 0000000..4dcee8c
--- /dev/null
+++ b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
@@ -0,0 +1,35 @@
+PREHOOK: query: create table foo (d string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (d string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: create table foo_p (d string) partitioned by (p string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo_p
+POSTHOOK: query: create table foo_p (d string) partitioned by (p string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo_p
+PREHOOK: query: insert into foo values ("1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values ("1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a
+POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a
+POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+FAILED: SemanticException [Error 10006]: Line 3:29 Partition not found '"a"'
http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
new file mode 100644
index 0000000..2a35c84
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
@@ -0,0 +1,65 @@
+PREHOOK: query: create table foo (d string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (d string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: create table foo_p (d string) partitioned by (p string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo_p
+POSTHOOK: query: create table foo_p (d string) partitioned by (p string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo_p
+PREHOOK: query: insert into foo values ("1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values ("1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a
+POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a
+POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+PREHOOK: query: insert into foo_p partition (p="a") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a
+POSTHOOK: query: insert into foo_p partition (p="a") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a
+POSTHOOK: Lineage: foo_p PARTITION(p=a).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+PREHOOK: query: select * from foo_p where p="a "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo_p
+PREHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo_p where p="a "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo_p
+POSTHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+1 a
+PREHOOK: query: select * from foo_p where p="a"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo_p
+PREHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo_p where p="a"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo_p
+POSTHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+1 a