You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by GitBox <gi...@apache.org> on 2020/06/03 01:40:39 UTC

[GitHub] [kafka] hachikuji commented on a change in pull request #8724: KAFKA-10040; Make computing the PreferredReplicaImbalanceCount metric more efficient

hachikuji commented on a change in pull request #8724:
URL: https://github.com/apache/kafka/pull/8724#discussion_r434262971



##########
File path: core/src/main/scala/kafka/controller/KafkaController.scala
##########
@@ -1254,23 +1254,7 @@ class KafkaController(val config: KafkaConfig,
       if (!isActive) {
         0
       } else {
-        controllerContext.allPartitions.count { topicPartition =>

Review comment:
       Ouch. So the main issue is that we were doing this full iteration overall partitions on _every_ controller event.

##########
File path: core/src/main/scala/kafka/controller/ControllerContext.scala
##########
@@ -391,6 +404,54 @@ class ControllerContext {
     partitionsForTopic(topic).filter { partition => states.contains(partitionState(partition)) }.toSet
   }
 
+  def putLeadershipInfo(partition: TopicPartition, leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch): Unit = {
+    val previous = partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
+    val replicaAssignment = partitionFullReplicaAssignment(partition)
+    updatePreferredReplicaImbalanceMetric(partition, Some(replicaAssignment), previous,
+      Some(replicaAssignment), Some(leaderIsrAndControllerEpoch))
+  }
+
+  private def updatePreferredReplicaImbalanceMetric(partition: TopicPartition,
+                                                    oldReplicaAssignment: Option[ReplicaAssignment],
+                                                    oldLeadershipInfo: Option[LeaderIsrAndControllerEpoch],
+                                                    newReplicaAssignment: Option[ReplicaAssignment],
+                                                    newLeadershipInfo: Option[LeaderIsrAndControllerEpoch]): Unit = {
+    if (!isTopicQueuedUpForDeletion(partition.topic)) {
+      oldReplicaAssignment.foreach { replicaAssignment =>
+        oldLeadershipInfo.foreach { leadershipInfo =>
+          if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+            preferredReplicaImbalanceCount -= 1
+        }
+      }
+
+      newReplicaAssignment.foreach { replicaAssignment =>
+        newLeadershipInfo.foreach { leadershipInfo =>
+          if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+            preferredReplicaImbalanceCount += 1
+        }
+      }
+    }
+  }
+
+  private def cleanPreferredReplicaImbalanceMetric(topic: String): Unit = {
+    partitionAssignments.getOrElse(topic, mutable.Map.empty).foreach { case (partition, replicaAssignment) =>
+      partitionLeadershipInfo.get(new TopicPartition(topic, partition)).foreach { leadershipInfo =>
+        if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+          preferredReplicaImbalanceCount -= 1
+      }
+    }
+  }
+
+  private def isReplicaImbalance(replicaAssignment: ReplicaAssignment,

Review comment:
       What about `hasPreferredLeader`?

##########
File path: core/src/main/scala/kafka/controller/ControllerContext.scala
##########
@@ -391,6 +404,54 @@ class ControllerContext {
     partitionsForTopic(topic).filter { partition => states.contains(partitionState(partition)) }.toSet
   }
 
+  def putLeadershipInfo(partition: TopicPartition, leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch): Unit = {

Review comment:
       Wonder if we should consider making `partitionLeadershipInfo` private so that we do not mistakenly invoke `put` in the future directly.

##########
File path: core/src/main/scala/kafka/controller/ControllerContext.scala
##########
@@ -391,6 +404,54 @@ class ControllerContext {
     partitionsForTopic(topic).filter { partition => states.contains(partitionState(partition)) }.toSet
   }
 
+  def putLeadershipInfo(partition: TopicPartition, leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch): Unit = {
+    val previous = partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
+    val replicaAssignment = partitionFullReplicaAssignment(partition)
+    updatePreferredReplicaImbalanceMetric(partition, Some(replicaAssignment), previous,
+      Some(replicaAssignment), Some(leaderIsrAndControllerEpoch))
+  }
+
+  private def updatePreferredReplicaImbalanceMetric(partition: TopicPartition,
+                                                    oldReplicaAssignment: Option[ReplicaAssignment],
+                                                    oldLeadershipInfo: Option[LeaderIsrAndControllerEpoch],
+                                                    newReplicaAssignment: Option[ReplicaAssignment],
+                                                    newLeadershipInfo: Option[LeaderIsrAndControllerEpoch]): Unit = {
+    if (!isTopicQueuedUpForDeletion(partition.topic)) {
+      oldReplicaAssignment.foreach { replicaAssignment =>
+        oldLeadershipInfo.foreach { leadershipInfo =>
+          if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+            preferredReplicaImbalanceCount -= 1
+        }
+      }
+
+      newReplicaAssignment.foreach { replicaAssignment =>
+        newLeadershipInfo.foreach { leadershipInfo =>
+          if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+            preferredReplicaImbalanceCount += 1
+        }
+      }
+    }
+  }
+
+  private def cleanPreferredReplicaImbalanceMetric(topic: String): Unit = {
+    partitionAssignments.getOrElse(topic, mutable.Map.empty).foreach { case (partition, replicaAssignment) =>
+      partitionLeadershipInfo.get(new TopicPartition(topic, partition)).foreach { leadershipInfo =>
+        if (isReplicaImbalance(replicaAssignment, leadershipInfo))
+          preferredReplicaImbalanceCount -= 1
+      }
+    }
+  }
+
+  private def isReplicaImbalance(replicaAssignment: ReplicaAssignment,
+                                 leadershipInfo: LeaderIsrAndControllerEpoch): Boolean = {
+    val preferredReplica = replicaAssignment.replicas.head
+    if (replicaAssignment.isBeingReassigned && replicaAssignment.addingReplicas.contains(preferredReplica))
+      // reassigning partitions are not counted as imbalanced until the new replica joins the ISR (completes reassignment)

Review comment:
       Not something to change here, but it's a little curious that we only do this when a reassignment is in progress. I'm not sure it's useful to distinguish that case as opposed to a broker that is catching up after a restart. If the preferred leader is not in the ISR, then we can't elect it anyway. Seems like it might be more useful if this metric captured the "eligible imbalance". 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org