You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/09/30 05:53:52 UTC

[GitHub] [hudi] felixYyu commented on a diff in pull request #5064: [HUDI-3654] Add new module `hudi-metaserver`

felixYyu commented on code in PR #5064:
URL: https://github.com/apache/hudi/pull/5064#discussion_r984168332


##########
hudi-metaserver/src/main/resources/mybatis/DDLMapper.xml:
##########
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+
+<mapper namespace="DDLMapper">
+    <update id="createDBs">
+        CREATE TABLE dbs
+        (
+            db_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            desc VARCHAR(512) COMMENT 'database description',
+            location_uri VARCHAR(512) COMMENT 'database storage path',
+            name VARCHAR(512) UNIQUE COMMENT 'database name',
+            owner_name VARCHAR(512) COMMENT 'database owner',
+            owner_type VARCHAR(512) COMMENT 'database type',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'db created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time'
+        ) COMMENT 'databases';
+
+    </update>
+
+    <update id="createTables">
+        CREATE TABLE tbls
+        (
+            tbl_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            db_id BIGINT COMMENT 'database id',
+            name VARCHAR(512) COMMENT 'table name',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'table created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+            owner_name VARCHAR(512) COMMENT 'table owner',
+            location VARCHAR(512) COMMENT 'table location',
+            UNIQUE KEY uniq_tb (db_id, name)
+        ) COMMENT 'tables';
+    </update>
+
+    <update id="createTableParams">
+        CREATE TABLE tbl_params
+        (
+            tbl_id BIGINT UNSIGNED COMMENT 'tbl id',
+            param_key VARCHAR(256) COMMENT 'param_key',
+            param_value VARCHAR(2048) COMMENT 'param_value',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'parameter created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+            PRIMARY KEY (tbl_id, param_key)
+        ) COMMENT 'tbl params';
+    </update>
+
+    <update id="createPartitions">
+        CREATE TABLE partitions
+        (
+            part_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            part_name VARCHAR(256) COMMENT 'partition path',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'create time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP  COMMENT 'update time',
+            is_deleted BOOL DEFAULT FALSE COMMENT 'whether the partition is deleted',
+            UNIQUE uniq_partition_version (tbl_id, part_name)
+        ) COMMENT 'partitions';
+    </update>
+
+    <update id="createTableTimestamp">
+        CREATE TABLE tbl_timestamp
+        (
+            tbl_id BIGINT UNSIGNED PRIMARY KEY COMMENT 'uuid',
+            ts VARCHAR(17) COMMENT 'instant timestamp'
+        ) COMMENT 'generate the unique timestamp for a table';
+    </update>
+
+    <update id="createInstant">
+        CREATE TABLE instant
+        (
+            instant_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            ts  VARCHAR(17) COMMENT 'instant timestamp',
+            action TINYINT COMMENT 'commit, deltacommit, compaction, replace etc',
+            state    TINYINT COMMENT 'completed, requested, inflight, invalid etc',
+            duration INT  DEFAULT 0 COMMENT 'for heartbeat (s)',
+            start_ts INT  DEFAULT 0 COMMENT 'for heartbeat (s)',
+            UNIQUE KEY uniq_inst1 (tbl_id, state, ts, action),
+            UNIQUE KEY uniq_inst2 (tbl_id, ts)
+        ) COMMENT 'timeline';
+    </update>
+
+    <update id="createInstantMeta">
+        CREATE TABLE instant_meta
+        (
+            commit_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            ts VARCHAR(17) COMMENT 'instant timestamp',
+            action TINYINT COMMENT 'commit, deltacommit, compaction, replace etc',
+            state TINYINT COMMENT 'completed, requested, inflight, invalid etc',
+            data LONGBLOB COMMENT 'instant metadate',

Review Comment:
   typo 'metadate'->'metadata'



##########
hudi-metaserver/src/main/java/org/apache/hudi/common/table/HoodieTableMetaServerClient.java:
##########
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.table;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hudi.common.config.HoodieMetaServerConfig;
+import org.apache.hudi.common.fs.ConsistencyGuardConfig;
+import org.apache.hudi.common.fs.FileSystemRetryConfig;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieMetaServerBasedTimeline;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.metaserver.client.HoodieMetaServerClient;
+import org.apache.hudi.metaserver.client.HoodieMetaServerClientProxy;
+import org.apache.hudi.metaserver.thrift.NoSuchObjectException;
+import org.apache.hudi.metaserver.thrift.Table;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * HoodieTableMetaClient implementation for hoodie table whose metadata is stored in the hoodie meta server.
+ */
+public class HoodieTableMetaServerClient extends HoodieTableMetaClient {
+  private static final Logger LOG = LogManager.getLogger(HoodieTableMetaServerClient.class);
+
+  private String databaseName;
+  private String tableName;
+  private Table table;
+  private HoodieMetaServerClient metaServerClient;
+
+  public HoodieTableMetaServerClient(Configuration conf, ConsistencyGuardConfig consistencyGuardConfig, FileSystemRetryConfig fileSystemRetryConfig,
+                                     String databaseName, String tableName, HoodieMetaServerConfig config) {
+    super(conf, config.getString(HoodieWriteConfig.BASE_PATH), false, consistencyGuardConfig, Option.of(TimelineLayoutVersion.CURR_LAYOUT_VERSION),
+        config.getString(HoodieTableConfig.PAYLOAD_CLASS_NAME), fileSystemRetryConfig);
+    if (databaseName == null || tableName == null) {

Review Comment:
   empty or null of dbname and tablename should be checked.
   org.apache.hudi.common.util.StringUtils
   ```suggestion
      if (StringUtils.isNullOrEmpty(databaseName) || StringUtils.isNullOrEmpty(tableName)) {
   ```
   



##########
hudi-metaserver/src/main/thrift/hudi-metaserver.thrift:
##########
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ namespace java org.apache.hudi.metaserver.thrift
+
+ // table related
+ struct Table {
+   1: string tableName,
+   2: string dbName,
+   3: string owner,
+   4: i32 createTime,
+   5: string location,
+   6: string tableType,
+   7: list<FieldSchema> partitionKeys,
+   8: map<string, string> parameters
+ }
+
+ struct FieldSchema {
+   1: string name,
+   2: string type,
+   3: string comments
+ }
+
+// timeline related
+// align with actions defined in HoodieTimeline
+enum TAction {
+    COMMIT = 1,
+    DELTACOMMIT = 2,
+    CLEAN = 3,
+    ROLLBACK = 4,
+    SAVEPOINT = 5,
+    REPLACECOMMIT = 6,
+    COMPACTION = 7,
+    RESTORE = 8
+}
+
+// align with states defined in HoodieInstant
+enum TState {
+   REQUESTED = 1,
+   INFLIGHT = 2,
+   COMPLETED = 3,
+   INVALID = 4
+}
+
+struct THoodieInstant {
+   1: string timestamp,
+   2: TAction action,
+   3: TState state
+}
+
+struct HoodieInstantChangeResult {
+  1: bool success,
+  2: optional THoodieInstant instant,
+  4: optional string msg
+}
+
+exception MetaStoreException {
+  1: string message
+}
+
+exception MetaException {
+  1: string message
+}
+
+exception NoSuchObjectException {
+  1: string message
+}
+
+exception AlreadyExistException {
+  1: string message
+}
+
+service ThriftHoodieMetaServer {
+  // table related
+  void create_database(1:string db)

Review Comment:
   The function should follow hump expression.



##########
hudi-metaserver/src/main/resources/mybatis/DDLMapper.xml:
##########
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+
+<mapper namespace="DDLMapper">
+    <update id="createDBs">
+        CREATE TABLE dbs
+        (
+            db_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            desc VARCHAR(512) COMMENT 'database description',
+            location_uri VARCHAR(512) COMMENT 'database storage path',
+            name VARCHAR(512) UNIQUE COMMENT 'database name',
+            owner_name VARCHAR(512) COMMENT 'database owner',
+            owner_type VARCHAR(512) COMMENT 'database type',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'db created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time'
+        ) COMMENT 'databases';
+
+    </update>
+
+    <update id="createTables">
+        CREATE TABLE tbls
+        (
+            tbl_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            db_id BIGINT COMMENT 'database id',
+            name VARCHAR(512) COMMENT 'table name',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'table created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+            owner_name VARCHAR(512) COMMENT 'table owner',
+            location VARCHAR(512) COMMENT 'table location',
+            UNIQUE KEY uniq_tb (db_id, name)
+        ) COMMENT 'tables';
+    </update>
+
+    <update id="createTableParams">
+        CREATE TABLE tbl_params
+        (
+            tbl_id BIGINT UNSIGNED COMMENT 'tbl id',
+            param_key VARCHAR(256) COMMENT 'param_key',
+            param_value VARCHAR(2048) COMMENT 'param_value',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'parameter created time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+            PRIMARY KEY (tbl_id, param_key)
+        ) COMMENT 'tbl params';
+    </update>
+
+    <update id="createPartitions">
+        CREATE TABLE partitions
+        (
+            part_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            part_name VARCHAR(256) COMMENT 'partition path',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'create time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP  COMMENT 'update time',
+            is_deleted BOOL DEFAULT FALSE COMMENT 'whether the partition is deleted',
+            UNIQUE uniq_partition_version (tbl_id, part_name)
+        ) COMMENT 'partitions';
+    </update>
+
+    <update id="createTableTimestamp">
+        CREATE TABLE tbl_timestamp
+        (
+            tbl_id BIGINT UNSIGNED PRIMARY KEY COMMENT 'uuid',
+            ts VARCHAR(17) COMMENT 'instant timestamp'
+        ) COMMENT 'generate the unique timestamp for a table';
+    </update>
+
+    <update id="createInstant">
+        CREATE TABLE instant
+        (
+            instant_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            ts  VARCHAR(17) COMMENT 'instant timestamp',
+            action TINYINT COMMENT 'commit, deltacommit, compaction, replace etc',
+            state    TINYINT COMMENT 'completed, requested, inflight, invalid etc',
+            duration INT  DEFAULT 0 COMMENT 'for heartbeat (s)',
+            start_ts INT  DEFAULT 0 COMMENT 'for heartbeat (s)',
+            UNIQUE KEY uniq_inst1 (tbl_id, state, ts, action),
+            UNIQUE KEY uniq_inst2 (tbl_id, ts)
+        ) COMMENT 'timeline';
+    </update>
+
+    <update id="createInstantMeta">
+        CREATE TABLE instant_meta
+        (
+            commit_id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id BIGINT COMMENT 'table id',
+            ts VARCHAR(17) COMMENT 'instant timestamp',
+            action TINYINT COMMENT 'commit, deltacommit, compaction, replace etc',
+            state TINYINT COMMENT 'completed, requested, inflight, invalid etc',
+            data LONGBLOB COMMENT 'instant metadate',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'instant meta created time',
+            UNIQUE KEY uniq_inst3 (tbl_id, state, ts, action)
+        ) COMMENT 'instant meta';
+    </update>
+
+    <update id="createFiles">
+        CREATE TABLE files
+        (
+            id          BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT COMMENT 'uuid',
+            tbl_id      BIGINT COMMENT 'table id',
+            part_id     BIGINT COMMENT 'partition id',
+            name        VARCHAR(256) COMMENT 'file name',
+            size        BIGINT COMMENT 'file size',
+            is_deleted  BOOL COMMENT 'whether the file has been deleted',
+            create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'create time',
+            update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+            UNIQUE KEY uniq_name (part_id, name)
+        ) COMMENT 'snapshot 文件表';

Review Comment:
   It's better to keep the language unified.
   COMMENT 'snapshot 文件表'->COMMENT 'snapshot files'



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org