You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/06/06 14:15:57 UTC

[doris] branch master updated: [enhancement](cooldown) use cooldown replica first when generating scan node (#20384)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c991249360 [enhancement](cooldown) use cooldown replica first when generating scan node (#20384)
c991249360 is described below

commit c9912493604e7e9a9e55fe78e51a59b3cd383530
Author: AlexYue <yj...@gmail.com>
AuthorDate: Tue Jun 6 22:15:49 2023 +0800

    [enhancement](cooldown) use cooldown replica first when generating scan node (#20384)
---
 be/src/olap/rowset/beta_rowset_reader.cpp          |  1 +
 .../main/java/org/apache/doris/catalog/Tablet.java |  4 ++++
 .../org/apache/doris/planner/OlapScanNode.java     | 23 ++++++++++++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 2df50d1177..9472948167 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -196,6 +196,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     _read_options.read_orderby_key_reverse = read_context->read_orderby_key_reverse;
     _read_options.read_orderby_key_columns = read_context->read_orderby_key_columns;
     _read_options.io_ctx.reader_type = read_context->reader_type;
+    _read_options.io_ctx.file_cache_stats = &read_context->stats->file_cache_stats;
     _read_options.runtime_state = read_context->runtime_state;
     _read_options.output_columns = read_context->output_columns;
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index d9a4ee46c6..078179b823 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -154,6 +154,10 @@ public class Tablet extends MetaObject implements Writable {
         cooldownConfLock.writeLock().unlock();
     }
 
+    public long getCooldownReplicaId() {
+        return cooldownReplicaId;
+    }
+
     public Pair<Long, Long> getCooldownConf() {
         cooldownConfLock.readLock().lock();
         try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 80bac9fb55..29b43d30f2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -101,6 +101,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -766,6 +767,28 @@ public class OlapScanNode extends ScanNode {
                 replicas.clear();
                 replicas.add(replica);
             }
+            final long coolDownReplicaId = tablet.getCooldownReplicaId();
+            // we prefer to query using cooldown replica to make sure the cache is fully utilized
+            // for example: consider there are 3BEs(A,B,C) and each has one replica for tablet X. and X
+            // is now under cooldown
+            // first time we choose BE A, and A will download data into cache while the other two's cache is empty
+            // second time we choose BE B, this time B will be cached, C is still empty
+            // third time we choose BE C, after this time all replica is cached
+            // but it means we will do 3 S3 IO to get the data which will bring 3 slow query
+            if (-1L != coolDownReplicaId) {
+                final Optional<Replica> replicaOptional = replicas.stream()
+                                .filter(r -> r.getId() == coolDownReplicaId).findAny();
+                replicaOptional.ifPresent(
+                        r -> {
+                            Backend backend = Env.getCurrentSystemInfo()
+                                    .getBackend(r.getBackendId());
+                            if (backend != null && backend.isAlive()) {
+                                replicas.clear();
+                                replicas.add(r);
+                            }
+                        }
+                );
+            }
             boolean tabletIsNull = true;
             boolean collectedStat = false;
             List<String> errs = Lists.newArrayList();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org