You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ct...@apache.org on 2015/12/06 01:53:07 UTC
[1/3] hive git commit: HIVE-12505:Insert overwrite in same encrypted
zone silently fails to remove some existing files (Chaoyu Tang,
reviewed by Aihua Xu)
Repository: hive
Updated Branches:
refs/heads/branch-2.0 f40c1502d -> 67d095d89
HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f9791beb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f9791beb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f9791beb
Branch: refs/heads/branch-2.0
Commit: f9791bebaa9d71ff62cecedb0243d09e99abfb38
Parents: f40c150
Author: ctang <ct...@gmail.com>
Authored: Fri Dec 4 10:46:23 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:25 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/common/FileUtils.java | 66 ++++++++--
.../test/resources/testconfiguration.properties | 3 +-
.../apache/hadoop/hive/ql/metadata/Hive.java | 26 +++-
.../clientpositive/encryption_with_trash.q | 33 +++++
.../encrypted/encryption_with_trash.q.out | 122 +++++++++++++++++++
5 files changed, 234 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index f943781..5dd9f40 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -574,7 +574,7 @@ public final class FileUtils {
}
/**
- * Deletes all files under a directory, sending them to the trash. Leaves the directory as is.
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
* @param fs FileSystem to use
* @param f path of directory
* @param conf hive configuration
@@ -582,17 +582,34 @@ public final class FileUtils {
* @throws FileNotFoundException
* @throws IOException
*/
- public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) throws FileNotFoundException, IOException {
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf)
+ throws FileNotFoundException, IOException {
+ return trashFilesUnderDir(fs, f, conf, true);
+ }
+
+ /**
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
+ * @param fs FileSystem to use
+ * @param f path of directory
+ * @param conf hive configuration
+ * @param forceDelete whether to force delete files if trashing does not succeed
+ * @return true if deletion successful
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf,
+ boolean forceDelete) throws FileNotFoundException, IOException {
FileStatus[] statuses = fs.listStatus(f, HIDDEN_FILES_PATH_FILTER);
boolean result = true;
for (FileStatus status : statuses) {
- result = result & moveToTrash(fs, status.getPath(), conf);
+ result = result & moveToTrash(fs, status.getPath(), conf, forceDelete);
}
return result;
}
/**
- * Move a particular file or directory to the trash.
+ * Move a particular file or directory to the trash. If for a certain reason the trashing fails
+ * it will force deletes the file or directory
* @param fs FileSystem to use
* @param f path of file or directory to move to trash.
* @param conf
@@ -600,18 +617,47 @@ public final class FileUtils {
* @throws IOException
*/
public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) throws IOException {
+ return moveToTrash(fs, f, conf, true);
+ }
+
+ /**
+ * Move a particular file or directory to the trash.
+ * @param fs FileSystem to use
+ * @param f path of file or directory to move to trash.
+ * @param conf
+ * @param forceDelete whether force delete the file or directory if trashing fails
+ * @return true if move successful
+ * @throws IOException
+ */
+ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete)
+ throws IOException {
LOG.info("deleting " + f);
HadoopShims hadoopShim = ShimLoader.getHadoopShims();
- if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) {
- LOG.info("Moved to trash: " + f);
- return true;
+ boolean result = false;
+ try {
+ result = hadoopShim.moveToAppropriateTrash(fs, f, conf);
+ if (result) {
+ LOG.info("Moved to trash: " + f);
+ return true;
+ }
+ } catch (IOException ioe) {
+ if (forceDelete) {
+ // for whatever failure reason including that trash has lower encryption zone
+ // retry with force delete
+ LOG.warn(ioe.getMessage() + "; Force to delete it.");
+ } else {
+ throw ioe;
+ }
}
- boolean result = fs.delete(f, true);
- if (!result) {
- LOG.error("Failed to delete " + f);
+ if (forceDelete) {
+ result = fs.delete(f, true);
+ if (!result) {
+ LOG.error("Failed to delete " + f);
+ }
}
+
return result;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 523f894..f2b4d80 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -474,7 +474,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
encryption_drop_table.q \
encryption_insert_values.q \
encryption_drop_view.q \
- encryption_drop_partition.q
+ encryption_drop_partition.q \
+ encryption_with_trash.q
beeline.positive.exclude=add_part_exist.q,\
alter1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 488d923..a4ada24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2953,19 +2953,35 @@ private void constructOneLBLocationMap(FileStatus fSta,
List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);
if (oldPath != null) {
+ boolean oldPathDeleted = false;
+ boolean isOldPathUnderDestf = false;
try {
FileSystem fs2 = oldPath.getFileSystem(conf);
if (fs2.exists(oldPath)) {
// Do not delete oldPath if:
// - destf is subdir of oldPath
//if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2)))
- if (FileUtils.isSubDir(oldPath, destf, fs2)) {
- FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
+ isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2);
+ if (isOldPathUnderDestf) {
+ // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its
+ // existing content might result in incorrect (extra) data.
+ // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is
+ // not the destf or its subdir?
+ oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
}
}
- } catch (Exception e) {
- //swallow the exception
- LOG.warn("Directory " + oldPath.toString() + " cannot be removed: " + e, e);
+ } catch (IOException e) {
+ if (isOldPathUnderDestf) {
+ // if oldPath is a subdir of destf but it could not be cleaned
+ throw new HiveException("Directory " + oldPath.toString()
+ + " could not be cleaned up.", e);
+ } else {
+ //swallow the exception since it won't affect the final result
+ LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e);
+ }
+ }
+ if (isOldPathUnderDestf && !oldPathDeleted) {
+ throw new HiveException("Destination directory " + destf + " has not be cleaned up.");
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/queries/clientpositive/encryption_with_trash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_with_trash.q b/ql/src/test/queries/clientpositive/encryption_with_trash.q
new file mode 100644
index 0000000..8f8789a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_with_trash.q
@@ -0,0 +1,33 @@
+set fs.trash.interval=5
+
+-- SORT_QUERY_RESULTS
+
+-- init
+drop table IF EXISTS encryptedTableSrc PURGE;
+drop table IF EXISTS unencryptedTable PURGE;
+
+create table encryptedTableSrc(key string, value string)
+LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc';
+
+create table encryptedTable(key string, value string) partitioned by (ds string)
+ LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTable';
+CRYPTO CREATE_KEY --keyName key_1 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTable;
+
+-- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502');
+
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+-- clean up
+drop table encryptedTable PURGE;
+drop table unencryptedTable PURGE;
+CRYPTO DELETE_KEY --keyName key_1;
+set fs.trash.interval=0
http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
new file mode 100644
index 0000000..3d1f75f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
@@ -0,0 +1,122 @@
+Warning: Value had a \n character in it.
+PREHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTableSrc
+POSTHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTableSrc
+PREHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTable
+POSTHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTable
+Encryption key created: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTableSrc' using key: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTable' using key: 'key_1'
+PREHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encryptedtablesrc
+POSTHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encryptedtablesrc
+POSTHOOK: Lineage: encryptedtablesrc.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encryptedtablesrc.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+4
+PREHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: -- clean up
+drop table encryptedTable PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Output: default@encryptedtable
+POSTHOOK: query: -- clean up
+drop table encryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Output: default@encryptedtable
+PREHOOK: query: drop table unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+Encryption key deleted: 'key_1'
[2/3] hive git commit: HIVE-12506:SHOW CREATE TABLE command creates a
table that does not work for RCFile format (Chaoyu Tang,
reviewed by Jimmy Xiang)
Posted by ct...@apache.org.
HIVE-12506:SHOW CREATE TABLE command creates a table that does not work for RCFile format (Chaoyu Tang, reviewed by Jimmy Xiang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ed973608
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ed973608
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ed973608
Branch: refs/heads/branch-2.0
Commit: ed9736080aa960ad7d7c2af58d692e6b4f9d6df5
Parents: f9791be
Author: ctang <ct...@gmail.com>
Authored: Fri Dec 4 11:04:14 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:42 2015 -0500
----------------------------------------------------------------------
.../hadoop/hive/metastore/MetaStoreUtils.java | 5 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 51 ++++----------------
.../results/clientpositive/nullformat.q.out | 6 ++-
.../results/clientpositive/nullformatCTAS.q.out | 6 ++-
.../show_create_table_delimited.q.out | 13 +++--
5 files changed, 28 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 02cbd76..23068f8 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -90,6 +90,7 @@ public class MetaStoreUtils {
public static final String DEFAULT_DATABASE_NAME = "default";
public static final String DEFAULT_DATABASE_COMMENT = "Default Hive database";
+ public static final String DEFAULT_SERIALIZATION_FORMAT = "1";
public static final String DATABASE_WAREHOUSE_SUFFIX = ".db";
@@ -116,8 +117,8 @@ public class MetaStoreUtils {
SerDeInfo serdeInfo = sd.getSerdeInfo();
serdeInfo.setSerializationLib(LazySimpleSerDe.class.getName());
serdeInfo.setParameters(new HashMap<String, String>());
- serdeInfo.getParameters().put(
- org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1");
+ serdeInfo.getParameters().put(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT,
+ DEFAULT_SERIALIZATION_FORMAT);
List<FieldSchema> fields = new ArrayList<FieldSchema>();
sd.setCols(fields);
http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index a210b95..1264421 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -2117,37 +2117,15 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
StringBuilder tbl_row_format = new StringBuilder();
StorageDescriptor sd = tbl.getTTable().getSd();
SerDeInfo serdeInfo = sd.getSerdeInfo();
- tbl_row_format.append("ROW FORMAT");
+ Map<String, String> serdeParams = serdeInfo.getParameters();
+ tbl_row_format.append("ROW FORMAT SERDE \n");
+ tbl_row_format.append(" '" + escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
if (tbl.getStorageHandler() == null) {
- Map<String, String> serdeParams = serdeInfo.getParameters();
- String[] delimiters = new String[] {
- serdeParams.remove(serdeConstants.FIELD_DELIM),
- serdeParams.remove(serdeConstants.COLLECTION_DELIM),
- serdeParams.remove(serdeConstants.MAPKEY_DELIM),
- serdeParams.remove(serdeConstants.LINE_DELIM),
- serdeParams.remove(serdeConstants.SERIALIZATION_NULL_FORMAT)
- };
- serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
- if (containsNonNull(delimiters)) {
- // There is a "serialization.format" property by default,
- // even with a delimited row format.
- // But our result will only cover the following four delimiters.
- tbl_row_format.append(" DELIMITED \n");
-
- // Warn:
- // If the four delimiters all exist in a CREATE TABLE query,
- // this following order needs to be strictly followed,
- // or the query will fail with a ParseException.
- for (int i = 0; i < DELIMITER_PREFIXES.length; i++) {
- if (delimiters[i] != null) {
- tbl_row_format.append(" ").append(DELIMITER_PREFIXES[i]).append(" '");
- tbl_row_format.append(escapeHiveCommand(StringEscapeUtils.escapeJava(delimiters[i])));
- tbl_row_format.append("' \n");
- }
- }
- } else {
- tbl_row_format.append(" SERDE \n '" +
- escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
+ // If serialization.format property has the default value, it will not to be included in
+ // SERDE properties
+ if (MetaStoreUtils.DEFAULT_SERIALIZATION_FORMAT.equals(serdeParams.get(
+ serdeConstants.SERIALIZATION_FORMAT))){
+ serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
}
if (!serdeParams.isEmpty()) {
appendSerdeParams(tbl_row_format, serdeParams).append(" \n");
@@ -2158,12 +2136,10 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
escapeHiveCommand(sd.getOutputFormat()) + "'");
} else {
duplicateProps.add(META_TABLE_STORAGE);
- tbl_row_format.append(" SERDE \n '" +
- escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
tbl_row_format.append("STORED BY \n '" + escapeHiveCommand(tbl.getParameters().get(
META_TABLE_STORAGE)) + "' \n");
// SerDe Properties
- if (serdeInfo.getParametersSize() > 0) {
+ if (!serdeParams.isEmpty()) {
appendSerdeParams(tbl_row_format, serdeInfo.getParameters());
}
}
@@ -2210,15 +2186,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
return prop_string;
}
- private boolean containsNonNull(String[] values) {
- for (String value : values) {
- if (value != null) {
- return true;
- }
- }
- return false;
- }
-
private StringBuilder appendSerdeParams(StringBuilder builder, Map<String, String> serdeParam) {
serdeParam = new TreeMap<String, String>(serdeParam);
builder.append("WITH SERDEPROPERTIES ( \n");
http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/nullformat.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nullformat.q.out b/ql/src/test/results/clientpositive/nullformat.q.out
index 6cfc2b8..af91470 100644
--- a/ql/src/test/results/clientpositive/nullformat.q.out
+++ b/ql/src/test/results/clientpositive/nullformat.q.out
@@ -82,8 +82,10 @@ POSTHOOK: Input: default@null_tab1
CREATE TABLE `null_tab1`(
`a` string,
`b` string)
-ROW FORMAT DELIMITED
- NULL DEFINED AS 'fooNull'
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'serialization.null.format'='fooNull')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/nullformatCTAS.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nullformatCTAS.q.out b/ql/src/test/results/clientpositive/nullformatCTAS.q.out
index 7adca68..d6b22b1 100644
--- a/ql/src/test/results/clientpositive/nullformatCTAS.q.out
+++ b/ql/src/test/results/clientpositive/nullformatCTAS.q.out
@@ -163,8 +163,10 @@ POSTHOOK: Input: default@null_tab3
CREATE TABLE `null_tab3`(
`a` string,
`b` string)
-ROW FORMAT DELIMITED
- NULL DEFINED AS 'fooNull'
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'serialization.null.format'='fooNull')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
http://git-wip-us.apache.org/repos/asf/hive/blob/ed973608/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_create_table_delimited.q.out b/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
index 79f29de..e14f850 100644
--- a/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
+++ b/ql/src/test/results/clientpositive/show_create_table_delimited.q.out
@@ -28,11 +28,14 @@ CREATE TABLE `tmp_showcrt1`(
`key` int,
`value` string,
`newvalue` bigint)
-ROW FORMAT DELIMITED
- FIELDS TERMINATED BY ','
- COLLECTION ITEMS TERMINATED BY '|'
- MAP KEYS TERMINATED BY '%'
- LINES TERMINATED BY '\n'
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+ 'colelction.delim'='|',
+ 'field.delim'=',',
+ 'line.delim'='\n',
+ 'mapkey.delim'='%',
+ 'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
[3/3] hive git commit: HIVE-12566: Incorrect result returns when
using COALESCE in WHERE condition with LEFT JOIN (Chaoyu Tang,
reviewed by Xuefu Zhang, Jesus Camacho Rodriguez)
Posted by ct...@apache.org.
HIVE-12566: Incorrect result returns when using COALESCE in WHERE condition with LEFT JOIN (Chaoyu Tang, reviewed by Xuefu Zhang, Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/67d095d8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/67d095d8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/67d095d8
Branch: refs/heads/branch-2.0
Commit: 67d095d898faf1e390f0934027a2e55d6a9e0e95
Parents: ed97360
Author: ctang <ct...@gmail.com>
Authored: Sat Dec 5 11:51:45 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Sat Dec 5 19:50:57 2015 -0500
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
.../clientpositive/join_cond_pushdown_unqual5.q | 14 ++
.../join_cond_pushdown_unqual5.q.out | 186 +++++++++++++++++++
3 files changed, 201 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 4ebdf90..e1a0c4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9643,7 +9643,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* as Join conditions
*/
Set<String> dests = qb.getParseInfo().getClauseNames();
- if ( dests.size() == 1 ) {
+ if ( dests.size() == 1 && joinTree.getNoOuterJoin()) {
String dest = dests.iterator().next();
ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
if ( whereClause != null ) {
http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q b/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
new file mode 100644
index 0000000..7e6d32b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual5.q
@@ -0,0 +1,14 @@
+-- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+
+insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071');
+insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072');
+
+set hive.auto.convert.join=false;
+EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+
+set hive.auto.convert.join=true;
+EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
+SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY');
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/67d095d8/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
new file mode 100644
index 0000000..ab333b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: -- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ltable
+POSTHOOK: query: -- outer join is not qualified for pushing down of where to join condition
+CREATE TABLE ltable (index int, la int, lk1 string, lk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ltable
+PREHOOK: query: CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@rtable
+POSTHOOK: query: CREATE TABLE rtable (ra int, rk1 string, rk2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@rtable
+PREHOOK: query: insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@ltable
+POSTHOOK: query: insert into ltable values (1, null, 'CD5415192314304', '00071'), (2, null, 'CD5415192225530', '00071')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@ltable
+POSTHOOK: Lineage: ltable.index EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.la EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.lk1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: ltable.lk2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@rtable
+POSTHOOK: query: insert into rtable values (1, 'CD5415192314304', '00071'), (45, 'CD5415192314304', '00072')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@rtable
+POSTHOOK: Lineage: rtable.ra EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: rtable.rk1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: rtable.rk2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: l
+ Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: lk1 (type: string), lk2 (type: string)
+ sort order: ++
+ Map-reduce partition columns: lk1 (type: string), lk2 (type: string)
+ Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ value expressions: index (type: int), la (type: int)
+ TableScan
+ alias: r
+ Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: rk1 (type: string), rk2 (type: string)
+ sort order: ++
+ Map-reduce partition columns: rk1 (type: string), rk2 (type: string)
+ Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ value expressions: ra (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 lk1 (type: string), lk2 (type: string)
+ 1 rk1 (type: string), rk2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9
+ Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col7,'EMPTY')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ltable
+PREHOOK: Input: default@rtable
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ltable
+POSTHOOK: Input: default@rtable
+#### A masked pattern was here ####
+2 NULL CD5415192225530 00071 NULL NULL NULL
+PREHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ r
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ r
+ TableScan
+ alias: r
+ Statistics: Num rows: 2 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 lk1 (type: string), lk2 (type: string)
+ 1 rk1 (type: string), rk2 (type: string)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: l
+ Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 lk1 (type: string), lk2 (type: string)
+ 1 rk1 (type: string), rk2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9
+ Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col7,'EMPTY')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ltable
+PREHOOK: Input: default@rtable
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM ltable l LEFT OUTER JOIN rtable r on (l.lk1 = r.rk1 AND l.lk2 = r.rk2) WHERE COALESCE(l.la,'EMPTY')=COALESCE(r.ra,'EMPTY')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ltable
+POSTHOOK: Input: default@rtable
+#### A masked pattern was here ####
+2 NULL CD5415192225530 00071 NULL NULL NULL