You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2023/01/04 04:36:07 UTC

[hive] branch master updated: HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new ac1cfe2294d HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)
ac1cfe2294d is described below

commit ac1cfe2294d1b7b8c02b60899d69868920ede0f5
Author: Teddy Choi <tc...@apache.org>
AuthorDate: Wed Jan 4 13:35:57 2023 +0900

    HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)
---
 .../java/org/apache/hadoop/hive/ql/ErrorMsg.java   |   1 +
 .../hive/ql/hooks/EnforceReadOnlyDatabaseHook.java | 196 +++++++++++++++++++++
 .../test/queries/clientnegative/read_only_delete.q |  23 +++
 .../test/queries/clientnegative/read_only_insert.q |  23 +++
 .../test/queries/clientpositive/read_only_hook.q   |  27 +++
 .../results/clientnegative/read_only_delete.q.out  |  34 ++++
 .../results/clientnegative/read_only_insert.q.out  |  35 ++++
 .../clientpositive/llap/read_only_hook.q.out       |  71 ++++++++
 8 files changed, 410 insertions(+)

diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 1a3ad38acef..5e4fbf8e642 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -485,6 +485,7 @@ public enum ErrorMsg {
   CTLF_UNSUPPORTED_FORMAT(10434, "CREATE TABLE LIKE FILE is not supported by the ''{0}'' file format", true),
   NON_NATIVE_ACID_UPDATE(10435, "Update and Merge into non-native ACID table is only supported when " +
           HiveConf.ConfVars.SPLIT_UPDATE.varname + " is true."),
+  READ_ONLY_DATABASE(10436, "Database {0} is read-only", true),
 
   //========================== 20000 range starts here ========================//
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
new file mode 100644
index 00000000000..c343fc7b14f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.hooks;
+
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.ddl.DDLDesc;
+import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * EnforceReadOnlyDatabaseHook is a hook that disallows write operations on read-only databases.
+ * It's enforced when "hive.exec.pre.hooks" has "org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook" and
+ * a database has 'readonly'='true' property.
+ */
+public class EnforceReadOnlyDatabaseHook implements ExecuteWithHookContext {
+  public static final String READONLY = "readonly";
+  private static final Logger LOG = LoggerFactory.getLogger(EnforceReadOnlyDatabaseHook.class);
+
+  @Override
+  public void run(HookContext hookContext) throws Exception {
+    assert(hookContext.getHookType() == HookContext.HookType.PRE_EXEC_HOOK);
+    final QueryState queryState = hookContext.getQueryState();
+    final HiveOperation hiveOperation = queryState.getHiveOperation();
+
+    // Allow read-only type operations, excluding query.
+    // A query can be EXPLAIN, SELECT, INSERT, UPDATE, or DELETE.
+    if (isReadOnlyOperation(hiveOperation)) {
+      return;
+    }
+
+    // Allow EXPLAIN or SELECT query operations, disallow INSERT, UPDATE, and DELETE.
+    if (isExplainOrSelectQuery(hookContext)) {
+      return;
+    }
+
+    // Allow ALTERDATABASE operations to make writable.
+    // It's a special allowed case to get out from readonly mode.
+    if (isAlterDbWritable(hookContext)) {
+      return;
+    }
+    // The second exception is to load a dumped database.
+    if (hiveOperation == HiveOperation.REPLLOAD) {
+      return;
+    }
+
+    // Now the remaining operation is a write operation, as a read operation is already allowed.
+
+    // Disallow write operations on a read-only database.
+    checkReadOnlyDbAsOutput(hookContext);
+
+    // Allow write operations on writable databases.
+  }
+
+  private static void checkReadOnlyDbAsOutput(HookContext hookContext) throws HiveException {
+    LOG.debug(hookContext.getQueryPlan().getOutputs().toString());
+    // If it has a data/metadata change operation on a read-only database, throw an exception.
+    final Set<WriteEntity> outputs = hookContext.getQueryPlan().getOutputs();
+    for (WriteEntity output: outputs) {
+      // Get the database.
+      final Database database;
+      final Hive hive = SessionState.get().getHiveDb();
+      if (output.getDatabase() == null) {
+        // For a table, get its database.
+        database = hive.getDatabase(output.getTable().getDbName());
+      } else {
+        // For a database, allow new one, since it's not read-only yet.
+        database = output.getDatabase();
+        final boolean exists = hive.databaseExists(database.getName());
+        LOG.debug("database exists: {}", exists);
+        if (!exists) {
+          continue;
+        }
+      }
+
+      // Read the database property.
+      final Map<String, String> parameters = database.getParameters();
+      LOG.debug("database name: " + database.getName() + " param: " + database.getParameters());
+
+      // If it's a read-only database, disallow it.
+      if (parameters == null) {
+        continue;
+      }
+      if (parameters.isEmpty()) {
+        continue;
+      }
+      if (!parameters.containsKey(READONLY)) {
+        continue;
+      }
+      if ("true".equalsIgnoreCase(parameters.get(READONLY))) {
+        throw new SemanticException(ErrorMsg.READ_ONLY_DATABASE, database.getName());
+      }
+    }
+  }
+
+  private static boolean isExplainOrSelectQuery(HookContext hookContext) {
+    final QueryState queryState = hookContext.getQueryState();
+    final HiveOperation hiveOperation = queryState.getHiveOperation();
+
+    // Allow EXPLAIN, SELECT queries, not INSERT, UPDATE, DELETE queries.
+    if (hiveOperation == HiveOperation.QUERY) {
+      if (hookContext.getQueryPlan().isExplain()) {
+        return true;
+      }
+      final String upper = queryState.getQueryString().trim().toUpperCase();
+      if (upper.startsWith("SELECT")) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Checks whether it's an ALTERDATABASE statement without {"readonly":"true"} property.
+   * 1. The properties are removed.
+   * 2. "readonly" is removed from the properties.
+   * 3. "readonly" becomes non-true value.
+   */
+  private static boolean isAlterDbWritable(HookContext hookContext) {
+    final HiveOperation hiveOperation = hookContext.getQueryState().getHiveOperation();
+    if (hiveOperation == HiveOperation.ALTERDATABASE) {
+      final List<Task<?>> rootTasks = hookContext.getQueryPlan().getRootTasks();
+      if (rootTasks.size() == 1) {
+        final Serializable rootWork = rootTasks.get(0).getWork();
+        if (rootWork instanceof DDLWork) {
+          final DDLWork ddlWork = (DDLWork) rootWork;
+          final DDLDesc ddlDesc = ddlWork.getDDLDesc();
+          if (ddlDesc instanceof AlterDatabaseSetPropertiesDesc) {
+            final AlterDatabaseSetPropertiesDesc alterDatabaseDesc = (AlterDatabaseSetPropertiesDesc) ddlDesc;
+            final Map<String, String> properties = alterDatabaseDesc.getDatabaseProperties();
+            if (properties == null) {
+              // The properties are removed.
+              return true;
+            }
+            if (!properties.containsKey(READONLY)) {
+              // "readonly" is removed from the properties.
+              return true;
+            }
+            if (!"true".equalsIgnoreCase(properties.get(READONLY))) {
+              // "readonly" becomes non-true value.
+              return true;
+            }
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  private static boolean isReadOnlyOperation(HiveOperation hiveOperation) {
+    switch (hiveOperation) {
+    case EXPLAIN:
+    case SWITCHDATABASE:
+    case REPLDUMP:
+    case REPLSTATUS:
+    case EXPORT:
+    case KILL_QUERY:
+      return true;
+    }
+    if (Sets.newHashSet("SHOW", "DESC").stream().anyMatch(hiveOperation.name()::startsWith)) {
+      return true;
+    }
+    return false;
+  }
+}
diff --git a/ql/src/test/queries/clientnegative/read_only_delete.q b/ql/src/test/queries/clientnegative/read_only_delete.q
new file mode 100644
index 00000000000..d91f0c0febf
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/read_only_delete.q
@@ -0,0 +1,23 @@
+set hive.vectorized.execution.enabled=false;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+CREATE DATABASE writable;
+CREATE TABLE writable.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false');
+DELETE FROM writable.src WHERE cint < 0;
+
+CREATE DATABASE readonly;
+CREATE TABLE readonly.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true');
+DELETE FROM readonly.src WHERE cint < 0;
diff --git a/ql/src/test/queries/clientnegative/read_only_insert.q b/ql/src/test/queries/clientnegative/read_only_insert.q
new file mode 100644
index 00000000000..e3e28943b84
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/read_only_insert.q
@@ -0,0 +1,23 @@
+set hive.vectorized.execution.enabled=false;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+CREATE DATABASE writable;
+CREATE TABLE writable.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false');
+INSERT INTO writable.src VALUES(1);
+
+CREATE DATABASE readonly;
+CREATE TABLE readonly.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true');
+ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true');
+INSERT INTO readonly.src VALUES(1);
diff --git a/ql/src/test/queries/clientpositive/read_only_hook.q b/ql/src/test/queries/clientpositive/read_only_hook.q
new file mode 100644
index 00000000000..007af957738
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/read_only_hook.q
@@ -0,0 +1,27 @@
+--! qt:dataset:src
+set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook;
+
+ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true');
+
+-- SET
+set hive.exec.reducers.max = 1;
+
+-- USE
+use default;
+
+-- SHOW
+SHOW DATABASES;
+SHOW CREATE DATABASE default;
+SHOW CREATE TABLE src;
+
+-- DESC
+DESC DATABASE default;
+
+-- DESCRIBE
+DESCRIBE src;
+
+-- EXPLAIN
+EXPLAIN SELECT * FROM src LIMIT 1;
+
+-- SELECT
+SELECT * FROM src LIMIT 1;
diff --git a/ql/src/test/results/clientnegative/read_only_delete.q.out b/ql/src/test/results/clientnegative/read_only_delete.q.out
new file mode 100644
index 00000000000..59e78080e31
--- /dev/null
+++ b/ql/src/test/results/clientnegative/read_only_delete.q.out
@@ -0,0 +1,34 @@
+POSTHOOK: query: CREATE DATABASE writable
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: CREATE TABLE writable.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:writable
+POSTHOOK: Output: writable@src
+POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: DELETE FROM writable.src WHERE cint < 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: writable@src
+POSTHOOK: Output: writable@src
+POSTHOOK: query: CREATE DATABASE readonly
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:readonly
+POSTHOOK: query: CREATE TABLE readonly.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:readonly
+POSTHOOK: Output: readonly@src
+POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:readonly
+FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only)
+org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only
+#### A masked pattern was here ####
+
diff --git a/ql/src/test/results/clientnegative/read_only_insert.q.out b/ql/src/test/results/clientnegative/read_only_insert.q.out
new file mode 100644
index 00000000000..f78f18a5055
--- /dev/null
+++ b/ql/src/test/results/clientnegative/read_only_insert.q.out
@@ -0,0 +1,35 @@
+POSTHOOK: query: CREATE DATABASE writable
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: CREATE TABLE writable.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:writable
+POSTHOOK: Output: writable@src
+POSTHOOK: query: ALTER DATABASE writable SET DBPROPERTIES('readonly' = 'false')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:writable
+POSTHOOK: query: INSERT INTO writable.src VALUES(1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: writable@src
+POSTHOOK: Lineage: src.cint SCRIPT []
+POSTHOOK: query: CREATE DATABASE readonly
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:readonly
+POSTHOOK: query: CREATE TABLE readonly.src
+    (cint INT)
+    CLUSTERED BY (cint) INTO 1 BUCKETS STORED AS ORC
+    TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:readonly
+POSTHOOK: Output: readonly@src
+POSTHOOK: query: ALTER DATABASE readonly SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:readonly
+FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.parse.SemanticException(Database readonly is read-only)
+org.apache.hadoop.hive.ql.parse.SemanticException: Database readonly is read-only
+#### A masked pattern was here ####
+
diff --git a/ql/src/test/results/clientpositive/llap/read_only_hook.q.out b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out
new file mode 100644
index 00000000000..d7a1e983d4c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/read_only_hook.q.out
@@ -0,0 +1,71 @@
+POSTHOOK: query: ALTER DATABASE default SET DBPROPERTIES('readonly' = 'true')
+POSTHOOK: type: ALTERDATABASE
+POSTHOOK: Output: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+POSTHOOK: query: SHOW DATABASES
+POSTHOOK: type: SHOWDATABASES
+default
+POSTHOOK: query: SHOW CREATE DATABASE default
+POSTHOOK: type: SHOW_CREATEDATABASE
+POSTHOOK: Input: database:default
+CREATE DATABASE `default`
+COMMENT
+  'Default Hive database'
+LOCATION
+#### A masked pattern was here ####
+WITH DBPROPERTIES (
+  'readonly'='true')
+POSTHOOK: query: SHOW CREATE TABLE src
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@src
+CREATE TABLE `src`(
+  `key` string COMMENT 'default', 
+  `value` string COMMENT 'default')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+#### A masked pattern was here ####
+POSTHOOK: query: DESC DATABASE default
+POSTHOOK: type: DESCDATABASE
+POSTHOOK: Input: database:default
+default	Default Hive database	location/in/test		public	ROLE		
+POSTHOOK: query: DESCRIBE src
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src
+key                 	string              	default             
+value               	string              	default             
+POSTHOOK: query: EXPLAIN SELECT * FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        TableScan
+          alias: src
+          Limit
+            Number of rows: 1
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              ListSink
+
+POSTHOOK: query: SELECT * FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238