You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/09 05:41:33 UTC

hive git commit: HIVE-13618: Trailing spaces in partition column will be treated differently (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 48be04b2c -> 2a8e38814


HIVE-13618: Trailing spaces in partition column will be treated differently (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2a8e3881
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2a8e3881
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2a8e3881

Branch: refs/heads/master
Commit: 2a8e38814f6dedae4c3b5270540d7ca6ef4b941e
Parents: 48be04b
Author: Pengcheng Xiong <px...@apache.org>
Authored: Sun May 8 22:41:07 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sun May 8 22:41:07 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  1 +
 .../hadoop/hive/metastore/ObjectStore.java      | 61 ++++++++----------
 ...umn_names_with_leading_and_trailing_spaces.q | 15 +++++
 ...umn_names_with_leading_and_trailing_spaces.q | 15 +++++
 ...names_with_leading_and_trailing_spaces.q.out | 35 +++++++++++
 ...names_with_leading_and_trailing_spaces.q.out | 65 ++++++++++++++++++++
 6 files changed, 157 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 88381aa..5aedd10 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -427,6 +427,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   mrr.q,\
   orc_ppd_basic.q,\
   orc_merge_diff_fs.q,\
+  partition_column_names_with_leading_and_trailing_spaces.q,\
   stats_filemetadata.q,\
   tez_aggr_part_stats.q,\
   tez_bmj_schema_evolution.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 5c49be9..550db83 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -1805,7 +1805,8 @@ public class ObjectStore implements RawStore, Configurable {
 
   private MPartition getMPartition(String dbName, String tableName, List<String> part_vals)
       throws MetaException {
-    MPartition mpart = null;
+    List<MPartition> mparts = null;
+    MPartition ret = null;
     boolean commited = false;
     Query query = null;
     try {
@@ -1825,10 +1826,27 @@ public class ObjectStore implements RawStore, Configurable {
           pm.newQuery(MPartition.class,
               "table.tableName == t1 && table.database.name == t2 && partitionName == t3");
       query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
-      query.setUnique(true);
-      mpart = (MPartition) query.execute(tableName, dbName, name);
-      pm.retrieve(mpart);
+      mparts = (List<MPartition>) query.execute(tableName, dbName, name);
+      pm.retrieveAll(mparts);
       commited = commitTransaction();
+      // We need to compare partition name with requested name since some DBs
+      // (like MySQL, Derby) considers 'a' = 'a ' whereas others like (Postgres,
+      // Oracle) doesn't exhibit this problem.
+      if (mparts != null && mparts.size() > 0) {
+        if (mparts.size() > 1) {
+          throw new MetaException(
+              "Expecting only one partition but more than one partitions are found.");
+        } else {
+          MPartition mpart = mparts.get(0);
+          if (name.equals(mpart.getPartitionName())) {
+            ret = mpart;
+          } else {
+            throw new MetaException("Expecting a partition with name " + name
+                + ", but metastore is returning a partition with name " + mpart.getPartitionName()
+                + ".");
+          }
+        }
+      }
     } finally {
       if (!commited) {
         rollbackTransaction();
@@ -1837,7 +1855,7 @@ public class ObjectStore implements RawStore, Configurable {
         query.closeAll();
       }
     }
-    return mpart;
+    return ret;
   }
 
   /**
@@ -7641,37 +7659,10 @@ public class ObjectStore implements RawStore, Configurable {
   @Override
   public boolean doesPartitionExist(String dbName, String tableName, List<String> partVals)
       throws MetaException {
-    boolean success = false;
-    Query query = null;
     try {
-      openTransaction();
-      dbName = HiveStringUtils.normalizeIdentifier(dbName);
-      tableName = HiveStringUtils.normalizeIdentifier(tableName);
-
-      // TODO: this could also be passed from upper layer; or this method should filter the list.
-      MTable mtbl = getMTable(dbName, tableName);
-      if (mtbl == null) {
-        success = commitTransaction();
-        return false;
-      }
-      query =
-          pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition "
-              + "where table.tableName == t1 && table.database.name == t2 && partitionName == t3");
-      query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
-      query.setUnique(true);
-      query.setResult("partitionName");
-      String name =
-          Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), partVals);
-      String result = (String) query.execute(tableName, dbName, name);
-      success = commitTransaction();
-      return result != null;
-    } finally {
-      if (!success) {
-        rollbackTransaction();
-      }
-      if (query != null) {
-        query.closeAll();
-      }
+      return this.getPartition(dbName, tableName, partVals) != null;
+    } catch (NoSuchObjectException e) {
+      return false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
new file mode 100644
index 0000000..f087130
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+
+create table foo (d string);
+
+create table foo_p (d string) partitioned by (p string);
+
+insert into foo values ("1");
+
+insert into foo_p partition (p="a ") select foo.d from foo;
+
+insert into foo_p partition (p="a") select foo.d from foo;
+
+select * from foo_p where p="a ";
+
+select * from foo_p where p="a";

http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
new file mode 100644
index 0000000..f087130
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+
+create table foo (d string);
+
+create table foo_p (d string) partitioned by (p string);
+
+insert into foo values ("1");
+
+insert into foo_p partition (p="a ") select foo.d from foo;
+
+insert into foo_p partition (p="a") select foo.d from foo;
+
+select * from foo_p where p="a ";
+
+select * from foo_p where p="a";

http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
new file mode 100644
index 0000000..4dcee8c
--- /dev/null
+++ b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out
@@ -0,0 +1,35 @@
+PREHOOK: query: create table foo (d string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (d string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: create table foo_p (d string) partitioned by (p string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo_p
+POSTHOOK: query: create table foo_p (d string) partitioned by (p string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo_p
+PREHOOK: query: insert into foo values ("1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values ("1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a 
+POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a 
+POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+FAILED: SemanticException [Error 10006]: Line 3:29 Partition not found '"a"'

http://git-wip-us.apache.org/repos/asf/hive/blob/2a8e3881/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
new file mode 100644
index 0000000..2a35c84
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out
@@ -0,0 +1,65 @@
+PREHOOK: query: create table foo (d string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (d string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: create table foo_p (d string) partitioned by (p string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo_p
+POSTHOOK: query: create table foo_p (d string) partitioned by (p string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo_p
+PREHOOK: query: insert into foo values ("1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values ("1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a 
+POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a 
+POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+PREHOOK: query: insert into foo_p partition (p="a") select foo.d from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo_p@p=a
+POSTHOOK: query: insert into foo_p partition (p="a") select foo.d from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo_p@p=a
+POSTHOOK: Lineage: foo_p PARTITION(p=a).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ]
+PREHOOK: query: select * from foo_p where p="a "
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo_p
+PREHOOK: Input: default@foo_p@p=a 
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo_p where p="a "
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo_p
+POSTHOOK: Input: default@foo_p@p=a 
+#### A masked pattern was here ####
+1	a 
+PREHOOK: query: select * from foo_p where p="a"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo_p
+PREHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo_p where p="a"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo_p
+POSTHOOK: Input: default@foo_p@p=a
+#### A masked pattern was here ####
+1	a