You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ct...@apache.org on 2015/12/06 01:53:07 UTC

[1/3] hive git commit: HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu)

Repository: hive
Updated Branches:
  refs/heads/branch-2.0 f40c1502d -> 67d095d89


HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f9791beb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f9791beb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f9791beb

Branch: refs/heads/branch-2.0
Commit: f9791bebaa9d71ff62cecedb0243d09e99abfb38
Parents: f40c150
Author: ctang <ct...@gmail.com>
Authored: Fri Dec 4 10:46:23 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:25 2015 -0500

----------------------------------------------------------------------
 .../apache/hadoop/hive/common/FileUtils.java    |  66 ++++++++--
 .../test/resources/testconfiguration.properties |   3 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java    |  26 +++-
 .../clientpositive/encryption_with_trash.q      |  33 +++++
 .../encrypted/encryption_with_trash.q.out       | 122 +++++++++++++++++++
 5 files changed, 234 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index f943781..5dd9f40 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -574,7 +574,7 @@ public final class FileUtils {
   }
 
   /**
-   * Deletes all files under a directory, sending them to the trash.  Leaves the directory as is.
+   * Trashes or deletes all files under a directory. Leaves the directory as is.
    * @param fs FileSystem to use
    * @param f path of directory
    * @param conf hive configuration
@@ -582,17 +582,34 @@ public final class FileUtils {
    * @throws FileNotFoundException
    * @throws IOException
    */
-  public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) throws FileNotFoundException, IOException {
+  public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf)
+      throws FileNotFoundException, IOException {
+    return trashFilesUnderDir(fs, f, conf, true);
+  }
+
+  /**
+   * Trashes or deletes all files under a directory. Leaves the directory as is.
+   * @param fs FileSystem to use
+   * @param f path of directory
+   * @param conf hive configuration
+   * @param forceDelete whether to force delete files if trashing does not succeed
+   * @return true if deletion successful
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf,
+      boolean forceDelete) throws FileNotFoundException, IOException {
     FileStatus[] statuses = fs.listStatus(f, HIDDEN_FILES_PATH_FILTER);
     boolean result = true;
     for (FileStatus status : statuses) {
-      result = result & moveToTrash(fs, status.getPath(), conf);
+      result = result & moveToTrash(fs, status.getPath(), conf, forceDelete);
     }
     return result;
   }
 
   /**
-   * Move a particular file or directory to the trash.
+   * Move a particular file or directory to the trash. If for a certain reason the trashing fails
+   * it will force deletes the file or directory
    * @param fs FileSystem to use
    * @param f path of file or directory to move to trash.
    * @param conf
@@ -600,18 +617,47 @@ public final class FileUtils {
    * @throws IOException
    */
   public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) throws IOException {
+    return moveToTrash(fs, f, conf, true);
+  }
+
+  /**
+   * Move a particular file or directory to the trash.
+   * @param fs FileSystem to use
+   * @param f path of file or directory to move to trash.
+   * @param conf
+   * @param forceDelete whether force delete the file or directory if trashing fails
+   * @return true if move successful
+   * @throws IOException
+   */
+  public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete)
+      throws IOException {
     LOG.info("deleting  " + f);
     HadoopShims hadoopShim = ShimLoader.getHadoopShims();
 
-    if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) {
-      LOG.info("Moved to trash: " + f);
-      return true;
+    boolean result = false;
+    try {
+      result = hadoopShim.moveToAppropriateTrash(fs, f, conf);
+      if (result) {
+        LOG.info("Moved to trash: " + f);
+        return true;
+      }
+    } catch (IOException ioe) {
+      if (forceDelete) {
+        // for whatever failure reason including that trash has lower encryption zone
+        // retry with force delete
+        LOG.warn(ioe.getMessage() + "; Force to delete it.");
+      } else {
+        throw ioe;
+      }
     }
 
-    boolean result = fs.delete(f, true);
-    if (!result) {
-      LOG.error("Failed to delete " + f);
+    if (forceDelete) {
+      result = fs.delete(f, true);
+      if (!result) {
+        LOG.error("Failed to delete " + f);
+      }
     }
+
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 523f894..f2b4d80 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -474,7 +474,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
   encryption_drop_table.q \
   encryption_insert_values.q \
   encryption_drop_view.q \
-  encryption_drop_partition.q
+  encryption_drop_partition.q \
+  encryption_with_trash.q
 
 beeline.positive.exclude=add_part_exist.q,\
   alter1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 488d923..a4ada24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2953,19 +2953,35 @@ private void constructOneLBLocationMap(FileStatus fSta,
       List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);
 
       if (oldPath != null) {
+        boolean oldPathDeleted = false;
+        boolean isOldPathUnderDestf = false;
         try {
           FileSystem fs2 = oldPath.getFileSystem(conf);
           if (fs2.exists(oldPath)) {
             // Do not delete oldPath if:
             //  - destf is subdir of oldPath
             //if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2)))
-            if (FileUtils.isSubDir(oldPath, destf, fs2)) {
-              FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
+            isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2);
+            if (isOldPathUnderDestf) {
+              // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its
+              // existing content might result in incorrect (extra) data.
+              // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is
+              // not the destf or its subdir?
+              oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
             }
           }
-        } catch (Exception e) {
-          //swallow the exception
-          LOG.warn("Directory " + oldPath.toString() + " cannot be removed: " + e, e);
+        } catch (IOException e) {
+          if (isOldPathUnderDestf) {
+            // if oldPath is a subdir of destf but it could not be cleaned
+            throw new HiveException("Directory " + oldPath.toString()
+                + " could not be cleaned up.", e);
+          } else {
+            //swallow the exception since it won't affect the final result
+            LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e);
+          }
+        }
+        if (isOldPathUnderDestf && !oldPathDeleted) {
+          throw new HiveException("Destination directory " + destf + " has not be cleaned up.");
         }
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/queries/clientpositive/encryption_with_trash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_with_trash.q b/ql/src/test/queries/clientpositive/encryption_with_trash.q
new file mode 100644
index 0000000..8f8789a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_with_trash.q
@@ -0,0 +1,33 @@
+set fs.trash.interval=5
+
+-- SORT_QUERY_RESULTS
+
+-- init
+drop table IF EXISTS encryptedTableSrc PURGE;
+drop table IF EXISTS unencryptedTable PURGE;
+
+create table encryptedTableSrc(key string, value string)
+LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc';
+
+create table encryptedTable(key string, value string) partitioned by (ds string)
+    LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTable';
+CRYPTO CREATE_KEY --keyName key_1 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTable;
+
+-- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502');
+
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+-- clean up
+drop table encryptedTable PURGE;
+drop table unencryptedTable PURGE;
+CRYPTO DELETE_KEY --keyName key_1;
+set fs.trash.interval=0

http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
new file mode 100644
index 0000000..3d1f75f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
@@ -0,0 +1,122 @@
+Warning: Value had a \n character in it.
+PREHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTableSrc
+POSTHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTableSrc
+PREHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTable
+POSTHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTable
+Encryption key created: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTableSrc' using key: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTable' using key: 'key_1'
+PREHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encryptedtablesrc
+POSTHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encryptedtablesrc
+POSTHOOK: Lineage: encryptedtablesrc.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encryptedtablesrc.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+4
+PREHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: -- clean up
+drop table encryptedTable PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Output: default@encryptedtable
+POSTHOOK: query: -- clean up
+drop table encryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Output: default@encryptedtable
+PREHOOK: query: drop table unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+Encryption key deleted: 'key_1'


[2/3] hive git commit: HIVE-12506:SHOW CREATE TABLE command creates a table that does not work for RCFile format (Chaoyu Tang, reviewed by Jimmy Xiang)

Posted by ct...@apache.org.
HIVE-12506:SHOW CREATE TABLE command creates a table that does not work for RCFile format (Chaoyu Tang, reviewed by Jimmy Xiang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ed973608
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ed973608
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ed973608

Branch: refs/heads/branch-2.0
Commit: ed9736080aa960ad7d7c2af58d692e6b4f9d6df5
Parents: f9791be
Author: ctang <ct...@gmail.com>
Authored: Fri Dec 4 11:04:14 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:42 2015 -0500

----------------------------------------------------------------------
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  5 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 51 ++++----------------
 .../results/clientpositive/nullformat.q.out     |  6 ++-
 .../results/clientpositive/nullformatCTAS.q.out |  6 ++-
 .../show_create_table_delimited.q.out           | 13 +++--
 5 files changed, 28 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 02cbd76..23068f8 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -90,6 +90,7 @@ public class MetaStoreUtils {
 
   public static final String DEFAULT_DATABASE_NAME = "default";
   public static final String DEFAULT_DATABASE_COMMENT = "Default Hive database";
+  public static final String DEFAULT_SERIALIZATION_FORMAT = "1";
 
   public static final String DATABASE_WAREHOUSE_SUFFIX = ".db";
 
@@ -116,8 +117,8 @@ public class MetaStoreUtils {
     SerDeInfo serdeInfo = sd.getSerdeInfo();
     serdeInfo.setSerializationLib(LazySimpleSerDe.class.getName());
     serdeInfo.setParameters(new HashMap<String, String>());
-    serdeInfo.getParameters().put(
-        org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
+    serdeInfo.getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT,
+        DEFAULT_SERIALIZATION_FORMAT);
 
     List<FieldSchema> fields = new ArrayList<FieldSchema>();
     sd.setCols(fields);

http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index a210b95..1264421 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -2117,37 +2117,15 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
       StringBuilder tbl_row_format = new StringBuilder();
       StorageDescriptor sd = tbl.getTTable().getSd();
       SerDeInfo serdeInfo = sd.getSerdeInfo();
-      tbl_row_format.append("ROW FORMAT");
+      Map<String, String> serdeParams = serdeInfo.getParameters();
+      tbl_row_format.append("ROW FORMAT SERDE \n");
+      tbl_row_format.append("  '" + escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
       if (tbl.getStorageHandler() == null) {
-        Map<String, String> serdeParams = serdeInfo.getParameters();
-        String[] delimiters = new String[] {
-            serdeParams.remove(serdeConstants.FIELD_DELIM),
-            serdeParams.remove(serdeConstants.COLLECTION_DELIM),
-            serdeParams.remove(serdeConstants.MAPKEY_DELIM),
-            serdeParams.remove(serdeConstants.LINE_DELIM),
-            serdeParams.remove(serdeConstants.SERIALIZATION_NULL_FORMAT)
-        };
-        serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
-        if (containsNonNull(delimiters)) {
-          // There is a "serialization.format" property by default,
-          // even with a delimited row format.
-          // But our result will only cover the following four delimiters.
-          tbl_row_format.append(" DELIMITED \n");
-
-          // Warn:
-          // If the four delimiters all exist in a CREATE TABLE query,
-          // this following order needs to be strictly followed,
-          // or the query will fail with a ParseException.
-          for (int i = 0; i < DELIMITER_PREFIXES.length; i++) {
-            if (delimiters[i] != null) {
-              tbl_row_format.append("  ").append(DELIMITER_PREFIXES[i]).append(" '");
-              tbl_row_format.append(escapeHiveCommand(StringEscapeUtils.escapeJava(delimiters[i])));
-              tbl_row_format.append("' \n");
-            }
-          }
-        } else {
-          tbl_row_format.append(" SERDE \n  '" +
-              escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
+        // If serialization.format property has the default value, it will not to be included in
+        // SERDE properties
+        if (MetaStoreUtils.DEFAULT_SERIALIZATION_FORMAT.equals(serdeParams.get(
+            serdeConstants.SERIALIZATION_FORMAT))){
+          serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
         }
         if (!serdeParams.isEmpty()) {
           appendSerdeParams(tbl_row_format, serdeParams).append(" \n");
@@ -2158,12 +2136,10 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
             escapeHiveCommand(sd.getOutputFormat()) + "'");
       } else {
         duplicateProps.add(META_TABLE_STORAGE);
-        tbl_row_format.append(" SERDE \n  '" +
-            escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
         tbl_row_format.append("STORED BY \n  '" + escapeHiveCommand(tbl.getParameters().get(
             META_TABLE_STORAGE)) + "' \n");
         // SerDe Properties
-        if (serdeInfo.getParametersSize() > 0) {
+        if (!serdeParams.isEmpty()) {
           appendSerdeParams(tbl_row_format, serdeInfo.getParameters());
         }
       }
@@ -2210,15 +2186,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
     return prop_string;
   }
 
-  private boolean containsNonNull(String[] values) {
-    for (String value : values) {
-      if (value != null) {
-        return true;
-      }
-    }
-    return false;
-  }
-
   private StringBuilder appendSerdeParams(StringBuilder builder, Map<String, String> serdeParam) {
     serdeParam = new TreeMap<String, String>(serdeParam);
     builder.append("WITH SERDEPROPERTIES ( \n");

http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/nullformat.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nullformat.q.out b/ql/src/test/results/clientpositive/nullformat.q.out
index 6cfc2b8..af91470 100644
--- a/ql/src/test/results/clientpositive/nullformat.q.out
+++ b/ql/src/test/results/clientpositive/nullformat.q.out
@@ -82,8 +82,10 @@ POSTHOOK: Input: default@null_tab1
 CREATE TABLE `null_tab1`(
   `a` string, 
   `b` string)
-ROW FORMAT DELIMITED 
-  NULL DEFINED AS 'fooNull' 
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES ( 
+  'serialization.null.format'='fooNull') 
 STORED AS INPUTFORMAT 
   'org.apache.hadoop.mapred.TextInputFormat' 
 OUTPUTFORMAT 

http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/nullformatCTAS.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nullformatCTAS.q.out b/ql/src/test/results/clientpositive/nullformatCTAS.q.out
index 7adca68..d6b22b1 100644
--- a/ql/src/test/results/clientpositive/nullformatCTAS.q.out
+++ b/ql/src/test/results/clientpositive/nullformatCTAS.q.out
@@ -163,8 +163,10 @@ POSTHOOK: Input: default@null_tab3
 CREATE TABLE `null_tab3`(
   `a` string, 
   `b` string)
-ROW FORMAT DELIMITED 
-  NULL DEFINED AS 'fooNull' 
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES ( 
+  'serialization.null.format'='fooNull') 
 STORED AS INPUTFORMAT 
   'org.apache.hadoop.mapred.TextInputFormat' 
 OUTPUTFORMAT 

http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_create_table_delimited.q.out b/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
index 79f29de..e14f850 100644
--- a/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
+++ b/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
@@ -28,11 +28,14 @@ CREATE TABLE `tmp_showcrt1`(
   `key` int, 
   `value` string, 
   `newvalue` bigint)
-ROW FORMAT DELIMITED 
-  FIELDS TERMINATED BY ',' 
-  COLLECTION ITEMS TERMINATED BY '|' 
-  MAP KEYS TERMINATED BY '%' 
-  LINES TERMINATED BY '\n' 
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+WITH SERDEPROPERTIES ( 
+  'colelction.delim'='|', 
+  'field.delim'=',', 
+  'line.delim'='\n', 
+  'mapkey.delim'='%', 
+  'serialization.format'=',') 
 STORED AS INPUTFORMAT 
   'org.apache.hadoop.mapred.TextInputFormat' 
 OUTPUTFORMAT 


[3/3] hive git commit: HIVE-12566: Incorrect result returns when using COALESCE in WHERE condition with LEFT JOIN (Chaoyu Tang, reviewed by Xuefu Zhang, Jesus Camacho Rodriguez)

Posted by ct...@apache.org.
HIVE-12566: Incorrect result returns when using COALESCE in WHERE condition with LEFT JOIN (Chaoyu Tang, reviewed by Xuefu Zhang, Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/67d095d8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/67d095d8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/67d095d8

Branch: refs/heads/branch-2.0
Commit: 67d095d898faf1e390f0934027a2e55d6a9e0e95
Parents: ed97360
Author: ctang <ct...@gmail.com>
Authored: Sat Dec 5 11:51:45 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:57 2015 -0500

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   2 +-
 .../clientpositive/join_cond_pushdown_unqual5.q |  14 ++
 .../join_cond_pushdown_unqual5.q.out            | 186 +++++++++++++++++++
 3 files changed, 201 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 4ebdf90..e1a0c4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9643,7 +9643,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
          * as Join conditions
          */
         Set<String> dests = qb.getParseInfo().getClauseNames();
-        if ( dests.size() == 1 ) {
+        if ( dests.size() == 1 && joinTree.getNoOuterJoin()) {
           String dest = dests.iterator().next();
           ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
           if ( whereClause != null ) {

http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q b/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
new file mode 100644
index 0000000..7e6d32b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
@@ -0,0 +1,14 @@
+-- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+
+insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071');
+insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072');
+
+set hive.auto.convert.join=false;
+EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+
+set hive.auto.convert.join=true;
+EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
new file mode 100644
index 0000000..ab333b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: -- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ltable
+POSTHOOK: query: -- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ltable
+PREHOOK: query: CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rtable
+POSTHOOK: query: CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rtable
+PREHOOK: query: insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@ltable
+POSTHOOK: query: insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@ltable
+POSTHOOK: Lineage: ltable.index EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.la EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.lk1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.lk2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@rtable
+POSTHOOK: query: insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@rtable
+POSTHOOK: Lineage: rtable.ra EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: rtable.rk1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: rtable.rk2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l
+            Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: lk1 (type: string), lk2 (type: string)
+              sort order: ++
+              Map-reduce partition columns: lk1 (type: string), lk2 (type: string)
+              Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+              value expressions: index (type: int), la (type: int)
+          TableScan
+            alias: r
+            Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: rk1 (type: string), rk2 (type: string)
+              sort order: ++
+              Map-reduce partition columns: rk1 (type: string), rk2 (type: string)
+              Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+              value expressions: ra (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 lk1 (type: string), lk2 (type: string)
+            1 rk1 (type: string), rk2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9
+          Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col7,'EMPTY')) (type: boolean)
+            Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+              Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ltable
+PREHOOK: Input: default@rtable
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ltable
+POSTHOOK: Input: default@rtable
+#### A masked pattern was here ####
+2	NULL	CD5415192225530	00071	NULL	NULL	NULL
+PREHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        r 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        r 
+          TableScan
+            alias: r
+            Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+            HashTable Sink Operator
+              keys:
+                0 lk1 (type: string), lk2 (type: string)
+                1 rk1 (type: string), rk2 (type: string)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l
+            Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+              keys:
+                0 lk1 (type: string), lk2 (type: string)
+                1 rk1 (type: string), rk2 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9
+              Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col7,'EMPTY')) (type: boolean)
+                Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ltable
+PREHOOK: Input: default@rtable
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ltable
+POSTHOOK: Input: default@rtable
+#### A masked pattern was here ####
+2	NULL	CD5415192225530	00071	NULL	NULL	NULL