You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/05/23 00:05:16 UTC
hive git commit: HIVE-10658 - Insert with values clause may expose
data that should be encrypted
Repository: hive
Updated Branches:
refs/heads/branch-1.2 9253f5a0d -> 7b89fad81
HIVE-10658 - Insert with values clause may expose data that should be encrypted
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b89fad8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b89fad8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b89fad8
Branch: refs/heads/branch-1.2
Commit: 7b89fad8107b678a27d26931d5d93d91e9544a5a
Parents: 9253f5a
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri May 22 15:05:06 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri May 22 15:05:06 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 3 +-
.../org/apache/hadoop/hive/ql/parse/QB.java | 19 ++++++
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 64 ++++++++++++++++--
.../apache/hadoop/hive/ql/parse/TestIUD.java | 7 ++
.../clientpositive/encryption_insert_values.q | 15 +++++
.../encryption_insert_partition_dynamic.q.out | 6 +-
.../encryption_insert_partition_static.q.out | 6 +-
.../encrypted/encryption_insert_values.q.out | 71 ++++++++++++++++++++
8 files changed, 182 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b9d85f6..9e95d1b 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -350,7 +350,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
encryption_load_data_to_encrypted_tables.q, \
encryption_unencrypted_nonhdfs_external_tables.q \
encryption_move_tbl.q \
- encryption_drop_table.q
+ encryption_drop_table.q \
+ encryption_insert_values.q
beeline.positive.exclude=add_part_exist.q,\
alter1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
index 7f4d0ff..0ddc221 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.parse;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
@@ -27,6 +28,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
@@ -55,6 +57,7 @@ public class QB {
private boolean isAnalyzeRewrite;
private CreateTableDesc tblDesc = null; // table descriptor of the final
private CreateTableDesc directoryDesc = null ;
+ private List<Path> encryptedTargetTablePaths;
// used by PTFs
/*
@@ -387,4 +390,20 @@ public class QB {
return havingClauseSubQueryPredicate;
}
+ void addEncryptedTargetTablePath(Path p) {
+ if(encryptedTargetTablePaths == null) {
+ encryptedTargetTablePaths = new ArrayList<>();
+ }
+ encryptedTargetTablePaths.add(p);
+ }
+ /**
+ * List of dbName.tblName of encrypted target tables of insert statement
+ * Used to support Insert ... values(...)
+ */
+ List<Path> getEncryptedTargetTablePaths() {
+ if(encryptedTargetTablePaths == null) {
+ return Collections.emptyList();
+ }
+ return encryptedTargetTablePaths;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 675ad7a..bf889fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -206,6 +206,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.InputFormat;
@@ -718,8 +719,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return this.nameToSplitSample;
}
- // Generate a temp table out of a value clause
- private ASTNode genValuesTempTable(ASTNode originalFrom) throws SemanticException {
+ /**
+ * Generate a temp table out of a value clause
+ * See also {@link #preProcessForInsert(ASTNode, QB)}
+ */
+ private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
+ Path dataDir = null;
+ if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
+ //currently only Insert into T values(...) is supported thus only 1 values clause
+ //and only 1 target table are possible. If/when support for
+ //select ... from values(...) is added an insert statement may have multiple
+ //encrypted target tables.
+ dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
+ }
// Pick a name for the table
SessionState ss = SessionState.get();
String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
@@ -756,7 +768,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
Path tablePath = null;
FileSystem fs = null;
try {
- tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ if(dataDir == null) {
+ tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ }
+ else {
+ //if target table of insert is encrypted, make sure temporary table data is stored
+ //similarly encrypted
+ tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
+ }
fs = tablePath.getFileSystem(conf);
fs.mkdirs(tablePath);
Path dataFile = new Path(tablePath, "data_file");
@@ -1200,7 +1219,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
} else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
// Create a temp table with the passed values in it then rewrite this portion of the
// tree to be from that table.
- ASTNode newFrom = genValuesTempTable(frm);
+ ASTNode newFrom = genValuesTempTable(frm, qb);
ast.setChild(0, newFrom);
processTable(qb, newFrom);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
@@ -10018,6 +10037,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// 4. continue analyzing from the child ASTNode.
Phase1Ctx ctx_1 = initPhase1Ctx();
+ preProcessForInsert(child, qb);
if (!doPhase1(child, qb, ctx_1, plannerCtx)) {
// if phase1Result false return
return false;
@@ -10033,6 +10053,42 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return true;
}
+ /**
+ * This will walk AST of an INSERT statement and assemble a list of target tables
+ * which are in an HDFS encryption zone. This is needed to make sure that so that
+ * the data from values clause of Insert ... select values(...) is stored securely.
+ * See also {@link #genValuesTempTable(ASTNode, QB)}
+ * @throws SemanticException
+ */
+ private void preProcessForInsert(ASTNode node, QB qb) throws SemanticException {
+ try {
+ if(!(node != null && node.getToken() != null && node.getToken().getType() == HiveParser.TOK_QUERY)) {
+ return;
+ }
+ for (Node child : node.getChildren()) {
+ //each insert of multi insert looks like
+ //(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
+ if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
+ continue;
+ }
+ ASTNode n = (ASTNode) ((ASTNode) child).getFirstChildWithType(HiveParser.TOK_INSERT_INTO);
+ if (n == null) continue;
+ n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TAB);
+ if (n == null) continue;
+ n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TABNAME);
+ if (n == null) continue;
+ String[] dbTab = getQualifiedTableName(n);
+ Table t = db.getTable(dbTab[0], dbTab[1]);
+ Path tablePath = t.getPath();
+ if (isPathEncrypted(tablePath)) {
+ qb.addEncryptedTargetTablePath(tablePath);
+ }
+ }
+ }
+ catch(Exception ex) {
+ throw new SemanticException(ex);
+ }
+ }
Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
return genPlan(qb);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
index febf6c5..9d4457c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
@@ -297,4 +297,11 @@ public class TestIUD {
"(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))",
ast.toStringTree());
}
+ @Test
+ public void testMultiInsert() throws ParseException {
+ ASTNode ast = parse("from S insert into T1 select a, b insert into T2 select c, d");
+ Assert.assertEquals("AST doesn't match", "(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME S))) " +
+ "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)))) " +
+ "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)))))", ast.toStringTree());
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/queries/clientpositive/encryption_insert_values.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_insert_values.q b/ql/src/test/queries/clientpositive/encryption_insert_values.q
new file mode 100644
index 0000000..2dd3e9a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_insert_values.q
@@ -0,0 +1,15 @@
+-- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE;
+CREATE TABLE encrypted_table (key INT, value STRING) LOCATION '${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table';
+CRYPTO CREATE_KEY --keyName key_128 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_128 --path ${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table;
+
+INSERT INTO encrypted_table values(1,'foo'),(2,'bar');
+
+select * from encrypted_table;
+
+-- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1;
+
+CRYPTO DELETE_KEY --keyName key_128;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
index cb6dc5c..31d9a6e 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
@@ -93,9 +93,9 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type: string)
auto parallelism: false
Path -> Alias:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
Path -> Partition:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
Partition
base file name: Values__Tmp__Table__1
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -106,6 +106,7 @@ STAGE PLANS:
columns.comments
columns.types string:string
#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
name default.values__tmp__table__1
serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
serialization.format 1
@@ -120,6 +121,7 @@ STAGE PLANS:
columns.comments
columns.types string:string
#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
name default.values__tmp__table__1
serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
index 8966608..c6e5ee1 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
@@ -96,9 +96,9 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type: string)
auto parallelism: false
Path -> Alias:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
Path -> Partition:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
Partition
base file name: Values__Tmp__Table__1
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -109,6 +109,7 @@ STAGE PLANS:
columns.comments
columns.types string:string
#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
name default.values__tmp__table__1
serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
serialization.format 1
@@ -123,6 +124,7 @@ STAGE PLANS:
columns.comments
columns.types string:string
#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
name default.values__tmp__table__1
serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
new file mode 100644
index 0000000..888a612
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
@@ -0,0 +1,71 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encrypted_table
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encrypted_table
+Encryption key created: 'key_128'
+Encryption zone created: '/build/ql/test/data/warehouse/default/encrypted_table' using key: 'key_128'
+PREHOOK: query: INSERT INTO encrypted_table values(1,'foo'),(2,'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encrypted_table
+POSTHOOK: query: INSERT INTO encrypted_table values(1,'foo'),(2,'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encrypted_table
+POSTHOOK: Lineage: encrypted_table.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select * from encrypted_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+POSTHOOK: query: select * from encrypted_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+1 foo
+2 bar
+PREHOOK: query: -- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@values__tmp__table__1
+POSTHOOK: query: -- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@values__tmp__table__1
+# col_name data_type comment
+
+tmp_values_col1 string
+tmp_values_col2 string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+Table Type: MANAGED_TABLE
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1