You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by GitBox <gi...@apache.org> on 2022/08/02 04:21:57 UTC

[GitHub] [incubator-pegasus] empiredan commented on a diff in pull request #1091: feat: improve performance of count_data

empiredan commented on code in PR #1091:
URL: https://github.com/apache/incubator-pegasus/pull/1091#discussion_r935080105


##########
src/shell/commands/data_operations.cpp:
##########
@@ -2397,6 +2401,15 @@ bool count_data(command_executor *e, shell_context *sc, arguments args)
         options.no_value = false;
     else
         options.no_value = true;
+
+    if (diff_hash_key || stat_size || value_filter_type != pegasus::pegasus_client::FT_NO_FILTER ||
+        sort_key_filter_type != pegasus::pegasus_client::FT_NO_FILTER) {
+        options.only_return_count = false;
+    }

Review Comment:
   Otherwise, can `only_return_count` be set true automatically ?



##########
src/server/pegasus_server_impl.cpp:
##########
@@ -1365,7 +1373,8 @@ void pegasus_server_impl::on_scan(scan_rpc rpc)
                                                    epoch_now,
                                                    no_value,
                                                    validate_hash,
-                                                   return_expire_ts);
+                                                   return_expire_ts,
+                                                   request.only_return_count ? false : true);

Review Comment:
   ```c++
   !request.only_return_count);
   ```



##########
src/shell/commands/data_operations.cpp:
##########
@@ -2397,6 +2401,15 @@ bool count_data(command_executor *e, shell_context *sc, arguments args)
         options.no_value = false;
     else
         options.no_value = true;
+
+    if (diff_hash_key || stat_size || value_filter_type != pegasus::pegasus_client::FT_NO_FILTER ||

Review Comment:
   Is it possible to pass `value_filter_type` to the server side to filter value remotely ?



##########
src/server/pegasus_server_impl.cpp:
##########
@@ -1449,7 +1463,10 @@ void pegasus_server_impl::on_scan(scan_rpc rpc)
         resp.error = rocksdb::Status::Code::kNotFound;
     }
 
-    _cu_calculator->add_scan_cu(req, resp.error, resp.kvs);
+    // abandon calculate capacity unit
+    if (request.only_return_count) {

Review Comment:
   ```c++
   if (!request.only_return_count)
   ```
   
   On the other hand, should `add_scan_cu` be ignored ? Since it will be count as 1 even if not found.
   ```c++
       if (status == rocksdb::Status::kNotFound) {
           add_read_cu(1);
           return;
       }
   ```



##########
src/shell/commands/data_operations.cpp:
##########
@@ -2169,6 +2169,7 @@ bool clear_data(command_executor *e, shell_context *sc, arguments args)
 bool count_data(command_executor *e, shell_context *sc, arguments args)
 {
     static struct option long_options[] = {{"precise", no_argument, 0, 'c'},
+                                           {"only_return_data_count", no_argument, 0, 'o'},

Review Comment:
   Do we need the option `only_return_data_count` ? Can it be decided accordingly ? 



##########
src/shell/command_helper.h:
##########
@@ -499,36 +501,40 @@ inline void scan_data_next(scan_data_context *context)
                             context->timeout_ms);
                         break;
                     case SCAN_COUNT:
-                        context->split_rows++;
-                        if (context->stat_size && context->statistics) {
-                            long hash_key_size = hash_key.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::HASH_KEY_SIZE),
-                                hash_key_size);
-
-                            long sort_key_size = sort_key.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::SORT_KEY_SIZE),
-                                sort_key_size);
-
-                            long value_size = value.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::VALUE_SIZE), value_size);
-
-                            long row_size = hash_key_size + sort_key_size + value_size;
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::ROW_SIZE), row_size);
-
-                            if (context->top_count > 0) {
-                                context->top_rows.push(
-                                    std::move(hash_key), std::move(sort_key), row_size);
+                        if (kv_count == -1) {

Review Comment:
   Does `kv_count == -1` means `only_return_count` ? Can `only_return_count` be used as the condition to be checked ?



##########
src/server/pegasus_server_impl.cpp:
##########
@@ -2316,24 +2332,28 @@ pegasus_server_impl::append_key_value_for_scan(std::vector<::dsn::apps::key_valu
             return range_iteration_state::kFiltered;
         }
     }
-    std::shared_ptr<char> key_buf(::dsn::utils::make_shared_array<char>(raw_key.length()));
-    ::memcpy(key_buf.get(), raw_key.data(), raw_key.length());
-    kv.key.assign(std::move(key_buf), 0, raw_key.length());
+    if (fill_value) {

Review Comment:
   ```c++
   if (!fill_value) {
       return range_iteration_state::kNormal;
   }
   ```



##########
src/server/pegasus_server_impl.cpp:
##########
@@ -1198,7 +1198,8 @@ void pegasus_server_impl::on_get_scanner(get_scanner_rpc rpc)
             epoch_now,
             request.no_value,
             request.__isset.validate_partition_hash ? request.validate_partition_hash : true,
-            return_expire_ts);
+            return_expire_ts,
+            request.only_return_count ? false : true);

Review Comment:
   ```c++
   !request.only_return_count);
   ```



##########
src/shell/command_helper.h:
##########
@@ -499,36 +501,40 @@ inline void scan_data_next(scan_data_context *context)
                             context->timeout_ms);
                         break;
                     case SCAN_COUNT:
-                        context->split_rows++;
-                        if (context->stat_size && context->statistics) {
-                            long hash_key_size = hash_key.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::HASH_KEY_SIZE),
-                                hash_key_size);
-
-                            long sort_key_size = sort_key.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::SORT_KEY_SIZE),
-                                sort_key_size);
-
-                            long value_size = value.size();
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::VALUE_SIZE), value_size);
-
-                            long row_size = hash_key_size + sort_key_size + value_size;
-                            context->statistics->measureTime(
-                                static_cast<uint32_t>(histogram_type::ROW_SIZE), row_size);
-
-                            if (context->top_count > 0) {
-                                context->top_rows.push(
-                                    std::move(hash_key), std::move(sort_key), row_size);
+                        if (kv_count == -1) {
+                            context->split_rows++;
+                            if (context->stat_size && context->statistics) {
+                                long hash_key_size = hash_key.size();
+                                context->statistics->measureTime(
+                                    static_cast<uint32_t>(histogram_type::HASH_KEY_SIZE),
+                                    hash_key_size);
+
+                                long sort_key_size = sort_key.size();
+                                context->statistics->measureTime(
+                                    static_cast<uint32_t>(histogram_type::SORT_KEY_SIZE),
+                                    sort_key_size);
+
+                                long value_size = value.size();
+                                context->statistics->measureTime(
+                                    static_cast<uint32_t>(histogram_type::VALUE_SIZE), value_size);
+
+                                long row_size = hash_key_size + sort_key_size + value_size;
+                                context->statistics->measureTime(
+                                    static_cast<uint32_t>(histogram_type::ROW_SIZE), row_size);
+
+                                if (context->top_count > 0) {
+                                    context->top_rows.push(
+                                        std::move(hash_key), std::move(sort_key), row_size);
+                                }
                             }
-                        }
-                        if (context->count_hash_key) {
-                            if (hash_key != context->last_hash_key) {
-                                context->split_hash_key_count++;
-                                context->last_hash_key = std::move(hash_key);
+                            if (context->count_hash_key) {
+                                if (hash_key != context->last_hash_key) {
+                                    context->split_hash_key_count++;
+                                    context->last_hash_key = std::move(hash_key);
+                                }
                             }
+                        } else {

Review Comment:
   ```c++
   if (...) {
       context->split_rows += kv_count;
       scan_data_next(context);
       break;
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org