You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2021/12/06 13:57:57 UTC

[GitHub] [incubator-doris] morningman commented on a change in pull request #7098: Support remote storage, step1: use a struct instead of string for parameter path, add basic remote method

morningman commented on a change in pull request #7098:
URL: https://github.com/apache/incubator-doris/pull/7098#discussion_r763022307



##########
File path: be/src/olap/rowset/beta_rowset.cpp
##########
@@ -138,34 +141,69 @@ OLAPStatus BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset
 
 OLAPStatus BetaRowset::copy_files_to(const std::string& dir) {
     for (int i = 0; i < num_segments(); ++i) {
-        std::string dst_path = segment_file_path(dir, rowset_id(), i);
-        if (FileUtils::check_exist(dst_path)) {
-            LOG(WARNING) << "file already exist: " << dst_path;
+        FilePathDesc dst_path_desc = segment_file_path(dir, rowset_id(), i);
+        Status status = Env::get_env(_rowset_path_desc.storage_medium)->path_exists(dst_path_desc.filepath);

Review comment:
       we can get `env` once instead of get it for each segment

##########
File path: be/src/olap/rowset/beta_rowset_writer.cpp
##########
@@ -54,13 +54,13 @@ BetaRowsetWriter::~BetaRowsetWriter() {
         _segment_writer.reset(); // ensure all files are closed
         Status st;
         for (int i = 0; i < _num_segment; ++i) {
-            auto path = BetaRowset::segment_file_path(_context.rowset_path_prefix,
+            auto path_desc = BetaRowset::segment_file_path(_context.path_desc,
                                                       _context.rowset_id, i);
             // Even if an error is encountered, these files that have not been cleaned up
             // will be cleaned up by the GC background. So here we only print the error
             // message when we encounter an error.
-            WARN_IF_ERROR(Env::Default()->delete_file(path),
-                          strings::Substitute("Failed to delete file=$0", path));
+            WARN_IF_ERROR(Env::get_env(_context.path_desc.storage_medium)->delete_file(path_desc.filepath),

Review comment:
       get env once

##########
File path: be/src/util/coding.cpp
##########
@@ -1,11 +1,19 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
 //
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.

Review comment:
       Do not modify license

##########
File path: be/src/util/coding.h
##########
@@ -1,11 +1,19 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
 //
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.

Review comment:
       Do not modify license

##########
File path: be/src/env/env.h
##########
@@ -1,11 +1,19 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
+// Licensed to the Apache Software Foundation (ASF) under one

Review comment:
       Do not modify origin license header

##########
File path: be/src/env/env_posix.cpp
##########
@@ -266,13 +277,38 @@ class PosixRandomAccessFile : public RandomAccessFile {
         }
     }
 
-    Status read_at(uint64_t offset, const Slice& result) const override {
-        return do_readv_at(_fd, _filename, offset, &result, 1);
+    Status read_at(uint64_t offset, const Slice* result) const override {
+        return readv_at(offset, result, 1);
+    }
+
+    Status readv_at(uint64_t offset, const Slice* result, size_t res_cnt) const override {
+        return do_readv_at(_fd, _filename, offset, result, res_cnt);
     }
 
-    Status readv_at(uint64_t offset, const Slice* res, size_t res_cnt) const override {
-        return do_readv_at(_fd, _filename, offset, res, res_cnt);
+    Status read_all(std::string* content) const override {
+        std::fstream fs(_filename.c_str(), std::fstream::in);
+        if (!fs.is_open()) {
+            RETURN_NOT_OK_STATUS_WITH_WARN(
+                    Status::IOError(
+                            strings::Substitute("failed to open cluster id file $0", _filename)),
+                    "open file failed");
+        }
+        std::string data;
+        fs >> data;
+        fs.close();
+        if ((fs.rdstate() & std::fstream::eofbit) != 0) {
+            *content = data;

Review comment:
       The memory allocate for string `data` will be released.

##########
File path: be/src/env/env_posix.cpp
##########
@@ -1,11 +1,19 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
+// Licensed to the Apache Software Foundation (ASF) under one

Review comment:
       Do not modify origin license header

##########
File path: be/src/olap/rowset/beta_rowset.cpp
##########
@@ -138,34 +141,69 @@ OLAPStatus BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset
 
 OLAPStatus BetaRowset::copy_files_to(const std::string& dir) {
     for (int i = 0; i < num_segments(); ++i) {
-        std::string dst_path = segment_file_path(dir, rowset_id(), i);
-        if (FileUtils::check_exist(dst_path)) {
-            LOG(WARNING) << "file already exist: " << dst_path;
+        FilePathDesc dst_path_desc = segment_file_path(dir, rowset_id(), i);
+        Status status = Env::get_env(_rowset_path_desc.storage_medium)->path_exists(dst_path_desc.filepath);
+        if (status.ok()) {
+            LOG(WARNING) << "file already exist: " << dst_path_desc.filepath;
             return OLAP_ERR_FILE_ALREADY_EXIST;
         }
-        std::string src_path = segment_file_path(_rowset_path, rowset_id(), i);
-        if (copy_file(src_path, dst_path) != OLAP_SUCCESS) {
-            LOG(WARNING) << "fail to copy file. from=" << src_path << ", to=" << dst_path
-                         << ", errno=" << Errno::no();
+        if (!status.is_not_found()) {
+            LOG(WARNING) << "file check exist error: " << dst_path_desc.filepath;
             return OLAP_ERR_OS_ERROR;
         }
+        FilePathDesc src_path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i);
+        if (!Env::get_env(_rowset_path_desc.storage_medium)->copy_path(
+                src_path_desc.filepath, dst_path_desc.filepath).ok()) {
+            LOG(WARNING) << "fail to copy file. from=" << src_path_desc.filepath << ", to="
+                    << dst_path_desc.filepath << ", errno=" << Errno::no();
+            return OLAP_ERR_OS_ERROR;
+        }
+    }
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus BetaRowset::upload_files_to(const FilePathDesc& dir_desc) {
+    RemoteEnv* dest_env = dynamic_cast<RemoteEnv*>(Env::get_env(_rowset_path_desc.storage_medium));
+    std::shared_ptr<StorageBackend> storage_backend = dest_env->get_storage_backend();
+    for (int i = 0; i < num_segments(); ++i) {
+        FilePathDesc dst_path_desc = segment_file_path(dir_desc, rowset_id(), i);
+        Status status = storage_backend->exist(dst_path_desc.remote_path);
+        if (status.ok()) {
+            LOG(WARNING) << "file already exist: " << dst_path_desc.remote_path;
+            return OLAP_ERR_FILE_ALREADY_EXIST;
+        }
+        if (!status.is_not_found()) {
+            LOG(WARNING) << "file check exist error: " << dst_path_desc.remote_path;
+            return OLAP_ERR_OS_ERROR;
+        }
+        FilePathDesc src_path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i);
+
+        if (!storage_backend->upload(src_path_desc.filepath, dst_path_desc.remote_path).ok()) {
+            LOG(WARNING) << "fail to upload file. from=" << src_path_desc.filepath << ", to="
+                         << dst_path_desc.remote_path << ", errno=" << Errno::no();
+            return OLAP_ERR_OS_ERROR;
+        }
+        LOG(INFO) << "succeed to upload file. from " << src_path_desc.filepath << " to "
+                  << dst_path_desc.remote_path;
     }
     return OLAP_SUCCESS;
 }
 
 bool BetaRowset::check_path(const std::string& path) {
     std::set<std::string> valid_paths;
     for (int i = 0; i < num_segments(); ++i) {
-        valid_paths.insert(segment_file_path(_rowset_path, rowset_id(), i));
+        FilePathDesc path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i);
+        valid_paths.insert(path_desc.filepath);
     }
     return valid_paths.find(path) != valid_paths.end();
 }
 
 bool BetaRowset::check_file_exist() {
     for (int i = 0; i < num_segments(); ++i) {
-        std::string data_file = segment_file_path(_rowset_path, rowset_id(), i);
-        if (!FileUtils::check_exist(data_file)) {
-            LOG(WARNING) << "data file not existed: " << data_file << " for rowset_id: " << rowset_id();
+        FilePathDesc path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i);
+        if (!Env::get_env(_rowset_path_desc.storage_medium)->path_exists(path_desc.filepath).ok()) {

Review comment:
       get env once

##########
File path: be/src/util/coding.cpp
##########
@@ -1,11 +1,19 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under both the GPLv2 (found in the
-//  COPYING file in the root directory) and Apache 2.0 License
-//  (found in the LICENSE.Apache file in the root directory).
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
 //
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.

Review comment:
       Do not modify license




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org