You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2023/01/04 04:36:07 UTC
[hive] branch master updated: HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)
This is an automated email from the ASF dual-hosted git repository.
tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ac1cfe2294d HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)
ac1cfe2294d is described below
commit ac1cfe2294d1b7b8c02b60899d69868920ede0f5
Author: Teddy Choi <tc...@apache.org>
AuthorDate: Wed Jan 4 13:35:57 2023 +0900
HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)
---
.../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 1 +
.../hive/ql/hooks/EnforceReadOnlyDatabaseHook.java | 196 +++++++++++++++++++++
.../test/queries/clientnegative/read_only_delete.q | 23 +++
.../test/queries/clientnegative/read_only_insert.q | 23 +++
.../test/queries/clientpositive/read_only_hook.q | 27 +++
.../results/clientnegative/read_only_delete.q.out | 34 ++++
.../results/clientnegative/read_only_insert.q.out | 35 ++++
.../clientpositive/llap/read_only_hook.q.out | 71 ++++++++
8 files changed, 410 insertions(+)
diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 1a3ad38acef..5e4fbf8e642 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -485,6 +485,7 @@ public enum ErrorMsg {
CTLF_UNSUPPORTED_FORMAT(10434, "CREATE TABLE LIKE FILE is not supported by the ''{0}'' file format", true),
NON_NATIVE_ACID_UPDATE(10435, "Update and Merge into non-native ACID table is only supported when " +
HiveConf.ConfVars.SPLIT_UPDATE.varname + " is true."),
+ READ_ONLY_DATABASE(10436, "Database {0} is read-only", true),
//========================== 20000 range starts here ========================//
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
new file mode 100644
index 00000000000..c343fc7b14f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.hooks;
+
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.ddl.DDLDesc;
+import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * EnforceReadOnlyDatabaseHook is a hook that disallows write operations on read-only databases.
+ * It's enforced when "hive.exec.pre.hooks" has "org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook" and
+ * a database has 'readonly'='true' property.
+ */
+public class EnforceReadOnlyDatabaseHook implements ExecuteWithHookContext {
+ public static final String READONLY = "readonly";
+ private static final Logger LOG = LoggerFactory.getLogger(EnforceReadOnlyDatabaseHook.class);
+
+ @Override
+ public void run(HookContext hookContext) throws Exception {
+ assert(hookContext.getHookType() == HookContext.HookType.PRE_EXEC_HOOK);
+ final QueryState queryState = hookContext.getQueryState();
+ final HiveOperation hiveOperation = queryState.getHiveOperation();
+
+ // Allow read-only type operations, excluding query.
+ // A query can be EXPLAIN, SELECT, INSERT, UPDATE, or DELETE.
+ if (isReadOnlyOperation(hiveOperation)) {
+ return;
+ }
+
+ // Allow EXPLAIN or SELECT query operations, disallow INSERT, UPDATE, and DELETE.
+ if (isExplainOrSelectQuery(hookContext)) {
+ return;
+ }
+
+ // Allow ALTERDATABASE operations to make writable.
+ // It's a special allowed case to get out from readonly mode.
+ if (isAlterDbWritable(hookContext)) {
+ return;
+ }
+ // The second exception is to load a dumped database.
+ if (hiveOperation == HiveOperation.REPLLOAD) {
+ return;
+ }
+
+ // Now the remaining operation is a write operation, as a read operation is already allowed.
+
+ // Disallow write operations on a read-only database.
+ checkReadOnlyDbAsOutput(hookContext);
+
+ // Allow write operations on writable databases.
+ }
+
+ private static void checkReadOnlyDbAsOutput(HookContext hookContext) throws HiveException {
+ LOG.debug(hookContext.getQueryPlan().getOutputs().toString());
+ // If it has a data/metadata change operation on a read-only database, throw an exception.
+ final Set<WriteEntity> outputs = hookContext.getQueryPlan().getOutputs();
+ for (WriteEntity output: outputs) {
+ // Get the database.
+ final Database database;
+ final Hive hive = SessionState.get().getHiveDb();
+ if (output.getDatabase() == null) {
+ // For a table, get its database.
+ database = hive.getDatabase(output.getTable().getDbName());
+ } else {
+ // For a database, allow new one, since it's not read-only yet.
+ database = output.getDatabase();
+ final boolean exists = hive.databaseExists(database.getName());
+ LOG.debug("database exists: {}", exists);
+ if (!exists) {
+ continue;
+ }
+ }
+
+ // Read the database property.
+ final Map<String, String> parameters = database.getParameters();
+ LOG.debug("database name: " + database.getName() + " param: " + database.getParameters());
+
+ // If it's a read-only database, disallow it.
+ if (parameters == null) {
+ continue;
+ }
+ if (parameters.isEmpty()) {
+ continue;
+ }
+ if (!parameters.containsKey(READONLY)) {
+ continue;
+ }
+ if ("true".equalsIgnoreCase(parameters.get(READONLY))) {
+ throw new SemanticException(ErrorMsg.READ_ONLY_DATABASE, database.getName());
+ }
+ }
+ }
+
+ private static boolean isExplainOrSelectQuery(HookContext hookContext) {
+ final QueryState queryState = hookContext.getQueryState();
+ final HiveOperation hiveOperation = queryState.getHiveOperation();
+
+ // Allow EXPLAIN, SELECT queries, not INSERT, UPDATE, DELETE queries.
+ if (hiveOperation == HiveOperation.QUERY) {
+ if (hookContext.getQueryPlan().isExplain()) {
+ return true;
+ }
+ final String upper = queryState.getQueryString().trim().toUpperCase();
+ if (upper.startsWith("SELECT")) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks whether it's an ALTERDATABASE statement without {"readonly":"true"} property.
+ * 1. The properties are removed.
+ * 2. "readonly" is removed from the properties.
+ * 3. "readonly" becomes non-true value.
+ */
+ private static boolean isAlterDbWritable(HookContext hookContext) {
+ final HiveOperation hiveOperation = hookContext.getQueryState().getHiveOperation();
+ if (hiveOperation == HiveOperation.ALTERDATABASE) {
+ final List<Task<?>> rootTasks = hookContext.getQueryPlan().getRootTasks();
+ if (rootTasks.size() == 1) {
+ final Serializable rootWork = rootTasks.get(0).getWork();
+ if (rootWork instanceof DDLWork) {
+ final DDLWork ddlWork = (DDLWork) rootWork;
+ final DDLDesc ddlDesc = ddlWork.getDDLDesc();
+ if (ddlDesc instanceof AlterDatabaseSetPropertiesDesc) {
+ final AlterDatabaseSetPropertiesDesc alterDatabaseDesc = (AlterDatabaseSetPropertiesDesc) ddlDesc;
+ final Map<String, String> properties = alterDatabaseDesc.getDatabaseProperties();
+ if (properties == null) {
+ // The properties are removed.
+ return true;
+ }
+ if (!properties.containsKey(READONLY)) {
+ // "readonly" is removed from the properties.
+ return true;
+ }
+ if (!"true".equalsIgnoreCase(properties.get(READONLY))) {
+ // "readonly" becomes non-true value.
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ private static boolean isReadOnlyOperation(HiveOperation hiveOperation) {
+ switch (hiveOperation) {
+ case EXPLAIN:
+ case SWITCHDATABASE:
+ case REPLDUMP:
+ case REPLSTATUS:
+ case EXPORT:
+ case KILL_QUERY:
+ return true;
+ }
+ if (Sets.newHashSet("SHOW", "DESC").stream().anyMatch(hiveOperation.name()::startsWith)) {
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/ql/src/test/queries/clientnegative/read_only_delete.q b/ql/src/test/queries/clientnegative/read_only_delete.q
new file mode 100644
index 00000000000..d91f0c0febf
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/read_only_delete.q
@@ -0,0 +1,23 @@
+set hive.vectorized.execution.enabled=false;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+CREATE DATABASE writable;
+CREATE TABLE writable.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false');
+DELETE FROM writable.src WHERE cint < 0;
+
+CREATE DATABASE readonly;
+CREATE TABLE readonly.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true');
+DELETE FROM readonly.src WHERE cint < 0;
diff --git a/ql/src/test/queries/clientnegative/read_only_insert.q b/ql/src/test/queries/clientnegative/read_only_insert.q
new file mode 100644
index 00000000000..e3e28943b84
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/read_only_insert.q
@@ -0,0 +1,23 @@
+set hive.vectorized.execution.enabled=false;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+CREATE DATABASE writable;
+CREATE TABLE writable.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false');
+INSERT INTO writable.src VALUES(1);
+
+CREATE DATABASE readonly;
+CREATE TABLE readonly.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true');
+INSERT INTO readonly.src VALUES(1);
diff --git a/ql/src/test/queries/clientpositive/read_only_hook.q b/ql/src/test/queries/clientpositive/read_only_hook.q
new file mode 100644
index 00000000000..007af957738
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/read_only_hook.q
@@ -0,0 +1,27 @@
+--! qt:dataset:src
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true');
+
+-- SET
+set hive.exec.reducers.max = 1;
+
+-- USE
+use default;
+
+-- SHOW
+SHOW DATABASES;
+SHOW CREATE DATABASE default;
+SHOW CREATE TABLE src;
+
+-- DESC
+DESC DATABASE default;
+
+-- DESCRIBE
+DESCRIBE src;
+
+-- EXPLAIN
+EXPLAIN SELECT * FROM src LIMIT 1;
+
+-- SELECT
+SELECT * FROM src LIMIT 1;
diff --git a/ql/src/test/results/clientnegative/read_only_delete.q.out b/ql/src/test/results/clientnegative/read_only_delete.q.out
new file mode 100644
index 00000000000..59e78080e31
--- /dev/null
+++ b/ql/src/test/results/clientnegative/read_only_delete.q.out
@@ -0,0 +1,34 @@
+POSTHOOK: query: CREATE DATABASE writable
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: CREATE TABLE writable.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:writable
+POSTHOOK: Output: writable@src
+POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: DELETE FROM writable.src WHERE cint < 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: writable@src
+POSTHOOK: Output: writable@src
+POSTHOOK: query: CREATE DATABASE readonly
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:readonly
+POSTHOOK: query: CREATE TABLE readonly.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:readonly
+POSTHOOK: Output: readonly@src
+POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:readonly
+FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only)
+org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only
+#### A masked pattern was here ####
+
diff --git a/ql/src/test/results/clientnegative/read_only_insert.q.out b/ql/src/test/results/clientnegative/read_only_insert.q.out
new file mode 100644
index 00000000000..f78f18a5055
--- /dev/null
+++ b/ql/src/test/results/clientnegative/read_only_insert.q.out
@@ -0,0 +1,35 @@
+POSTHOOK: query: CREATE DATABASE writable
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: CREATE TABLE writable.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:writable
+POSTHOOK: Output: writable@src
+POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: INSERT INTO writable.src VALUES(1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: writable@src
+POSTHOOK: Lineage: src.cint SCRIPT []
+POSTHOOK: query: CREATE DATABASE readonly
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:readonly
+POSTHOOK: query: CREATE TABLE readonly.src
+ (cint INT)
+ CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:readonly
+POSTHOOK: Output: readonly@src
+POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:readonly
+FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only)
+org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only
+#### A masked pattern was here ####
+
diff --git a/ql/src/test/results/clientpositive/llap/read_only_hook.q.out b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out
new file mode 100644
index 00000000000..d7a1e983d4c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out
@@ -0,0 +1,71 @@
+POSTHOOK: query: ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+POSTHOOK: query: SHOW DATABASES
+POSTHOOK: type: SHOWDATABASES
+default
+POSTHOOK: query: SHOW CREATE DATABASE default
+POSTHOOK: type: SHOW_CREATEDATABASE
+POSTHOOK: Input: database:default
+CREATE DATABASE `default`
+COMMENT
+ 'Default Hive database'
+LOCATION
+#### A masked pattern was here ####
+WITH DBPROPERTIES (
+ 'readonly'='true')
+POSTHOOK: query: SHOW CREATE TABLE src
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@src
+CREATE TABLE `src`(
+ `key` string COMMENT 'default',
+ `value` string COMMENT 'default')
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+ 'bucketing_version'='2',
+#### A masked pattern was here ####
+POSTHOOK: query: DESC DATABASE default
+POSTHOOK: type: DESCDATABASE
+POSTHOOK: Input: database:default
+default Default Hive database location/in/test public ROLE
+POSTHOOK: query: DESCRIBE src
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src
+key string default
+value string default
+POSTHOOK: query: EXPLAIN SELECT * FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ TableScan
+ alias: src
+ Limit
+ Number of rows: 1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+POSTHOOK: query: SELECT * FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238