You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2021/08/06 04:05:49 UTC

[GitHub] [incubator-doris] morningman commented on a change in pull request #6063: [Bug]Fix the bug data balance causes tablet loss

morningman commented on a change in pull request #6063:
URL: https://github.com/apache/incubator-doris/pull/6063#discussion_r667641350



##########
File path: be/src/agent/task_worker_pool.cpp
##########
@@ -900,6 +906,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() {
         DorisMetrics::instance()->clone_requests_total->increment(1);
         LOG(INFO) << "get clone task. signature:" << agent_task_req.signature;
 
+        // check tablet with the same tabletId existance, if exist, set tablet in clone mode
+        string err;
+        TabletSharedPtr exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(
+                clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err);
+        if (exist_tablet != nullptr) {
+            exist_tablet->set_clone_mode(true);

Review comment:
       Why not checking replica id here?

##########
File path: be/src/agent/task_worker_pool.cpp
##########
@@ -927,6 +941,14 @@ void TaskWorkerPool::_clone_worker_thread_callback() {
         task_status.__set_error_msgs(error_msgs);
         finish_task_request.__set_task_status(task_status);
 
+        // clone done, set clone mode false
+        // Retrieve once again to prevent tablet from being dropped
+        exist_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(
+                clone_req.tablet_id, 0 /*replica_id*/, clone_req.schema_hash, &err);

Review comment:
       Why not checking replica id here?

##########
File path: be/src/agent/task_worker_pool.cpp
##########
@@ -372,7 +372,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() {
             ++_s_report_version;
             // get path hash of the created tablet
             TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(
-                    create_tablet_req.tablet_id, create_tablet_req.tablet_schema.schema_hash);
+                    create_tablet_req.tablet_id, create_tablet_req.replica_id, create_tablet_req.tablet_schema.schema_hash);

Review comment:
       Check if `create_tablet_req.replica_id` is set before using it. We can not rely on the default value of it. It depends on thrift's implementation. Or you can set the default value in thrift file.

##########
File path: be/src/olap/task/engine_clone_task.cpp
##########
@@ -157,15 +169,26 @@ OLAPStatus EngineCloneTask::_do_clone() {
             string header_path = TabletMeta::construct_header_file_path(
                     schema_hash_path_stream.str(), _clone_req.tablet_id);
             OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path);
-            if (reset_id_status != OLAP_SUCCESS) {
-                LOG(WARNING) << "errors while set tablet uid: '" << header_path;
-                _error_msgs->push_back("errors while set tablet uid.");
+            // reset_replica_id here. before load tablet to tablet_manager
+            OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, _clone_req.replica_id);

Review comment:
       merge `reset_tablet_replica_id()` into `reset_tablet_uid()`? To avoid save meta twice?

##########
File path: be/src/agent/task_worker_pool.cpp
##########
@@ -425,10 +425,16 @@ void TaskWorkerPool::_drop_tablet_worker_thread_callback() {
         TStatus task_status;
         string err;
         TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(
-                drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, &err);
+                drop_tablet_req.tablet_id, drop_tablet_req.replica_id, drop_tablet_req.schema_hash, &err);
         if (dropped_tablet != nullptr) {
+            if (dropped_tablet->clone_mode()) {
+                LOG(WARNING) << "drop table cancelled as tablet is in clone mode! signature: " << agent_task_req.signature;
+                error_msgs.push_back("drop table cancelled!");

Review comment:
       add reason to error_msgs too.

##########
File path: be/src/olap/task/engine_clone_task.cpp
##########
@@ -157,15 +169,26 @@ OLAPStatus EngineCloneTask::_do_clone() {
             string header_path = TabletMeta::construct_header_file_path(
                     schema_hash_path_stream.str(), _clone_req.tablet_id);
             OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path);
-            if (reset_id_status != OLAP_SUCCESS) {
-                LOG(WARNING) << "errors while set tablet uid: '" << header_path;
-                _error_msgs->push_back("errors while set tablet uid.");
+            // reset_replica_id here. before load tablet to tablet_manager
+            OLAPStatus reset_replica_id_status = TabletMeta::reset_tablet_replica_id(header_path, _clone_req.replica_id);
+            if (reset_id_status != OLAP_SUCCESS || reset_replica_id_status != OLAP_SUCCESS) {
+                LOG(WARNING) << "errors while set tablet uid or replica id: '" << header_path;
+                _error_msgs->push_back("errors while set tablet uid/replica_id.");
                 status = DORIS_ERROR;
             } else {
-                OLAPStatus load_header_status =
-                        StorageEngine::instance()->tablet_manager()->load_tablet_from_dir(
+                OLAPStatus load_header_status;
+                if (old_version_tablet != nullptr) {
+                    // drop old version tablet first, then and new tablet

Review comment:
       ```suggestion
                       // drop old version tablet first, then add new tablet
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org