You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by "artemlivshits (via GitHub)" <gi...@apache.org> on 2023/04/07 00:19:08 UTC

[GitHub] [kafka] artemlivshits commented on a diff in pull request #13391: KAFKA-14561: Improve transactions experience for older clients by ensuring ongoing transaction

artemlivshits commented on code in PR #13391:
URL: https://github.com/apache/kafka/pull/13391#discussion_r1160343683


##########
core/src/main/scala/kafka/server/AddPartitionsToTxnManager.scala:
##########
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.common.{InterBrokerSendThread, RequestAndCompletionHandler}
+import org.apache.kafka.clients.{ClientResponse, NetworkClient, RequestCompletionHandler}
+import org.apache.kafka.common.{InvalidRecordException, Node, TopicPartition}
+import org.apache.kafka.common.message.AddPartitionsToTxnRequestData.{AddPartitionsToTxnTransaction, AddPartitionsToTxnTransactionCollection}
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.requests.{AddPartitionsToTxnRequest, AddPartitionsToTxnResponse}
+import org.apache.kafka.common.utils.Time
+
+import scala.collection.mutable
+
+object AddPartitionsToTxnManager {
+  type AppendCallback = Map[TopicPartition, Errors] => Unit
+}
+
+
+class TransactionDataAndCallbacks(val transactionData: AddPartitionsToTxnTransactionCollection,
+                                  val callbacks: mutable.Map[String, AddPartitionsToTxnManager.AppendCallback])
+
+
+class AddPartitionsToTxnManager(config: KafkaConfig, client: NetworkClient, time: Time) 
+  extends InterBrokerSendThread("AddPartitionsToTxnSenderThread-" + config.brokerId, client, config.requestTimeoutMs, time) {
+  
+  private val inflightNodes = mutable.HashSet[Node]()
+  private val nodesToTransactions = mutable.Map[Node, TransactionDataAndCallbacks]()
+  
+  def addTxnData(node: Node, transactionData: AddPartitionsToTxnTransaction, callback: AddPartitionsToTxnManager.AppendCallback): Unit = {
+    nodesToTransactions.synchronized {
+      // Check if we have already (either node or individual transaction). Add the Node if it isn't there.
+      val currentNodeAndTransactionData = nodesToTransactions.getOrElseUpdate(node,
+        new TransactionDataAndCallbacks(
+          new AddPartitionsToTxnTransactionCollection(1),
+          mutable.Map[String, AddPartitionsToTxnManager.AppendCallback]()))
+
+      val currentTransactionData = currentNodeAndTransactionData.transactionData.find(transactionData.transactionalId)
+
+      // Check if we already have txn ID -- this should only happen in epoch bump case. If so, we should return error for old entry and remove from queue.
+      if (currentTransactionData != null) {
+        if (currentTransactionData.producerEpoch() < transactionData.producerEpoch()) {
+          val topicPartitionsToError = mutable.Map[TopicPartition, Errors]()
+          currentTransactionData.topics().forEach { topic =>
+            topic.partitions().forEach { partition =>
+              topicPartitionsToError.put(new TopicPartition(topic.name(), partition), Errors.INVALID_PRODUCER_EPOCH)
+            }
+          }
+          val oldCallback = currentNodeAndTransactionData.callbacks(transactionData.transactionalId())
+          currentNodeAndTransactionData.transactionData.remove(transactionData)
+          oldCallback(topicPartitionsToError.toMap)
+        } else {
+          // We should never see a request on the same epoch since we haven't finished handling the one in queue
+          throw new InvalidRecordException("Received a second request from the same connection without finishing the first.")
+        }
+      }
+      currentNodeAndTransactionData.transactionData.add(transactionData)
+      currentNodeAndTransactionData.callbacks.put(transactionData.transactionalId(), callback)
+      wakeup()
+    }
+  }
+
+  private class AddPartitionsToTxnHandler(node: Node, transactionDataAndCallbacks: TransactionDataAndCallbacks) extends RequestCompletionHandler {
+    override def onComplete(response: ClientResponse): Unit = {
+      inflightNodes.remove(node)
+      if (response.authenticationException() != null) {
+        error(s"AddPartitionsToTxnRequest failed for broker ${config.brokerId} with an " +
+          "authentication exception.", response.authenticationException)
+        transactionDataAndCallbacks.callbacks.foreach { case (txnId, callback) =>
+          callback(buildErrorMap(txnId, transactionDataAndCallbacks.transactionData, Errors.forException(response.authenticationException()).code()))
+        }
+      } else if (response.versionMismatch != null) {
+        // We may see unsupported version exception if we try to send a verify only request to a broker that can't handle it. 
+        // In this case, skip verification.
+        error(s"AddPartitionsToTxnRequest failed for broker ${config.brokerId} with invalid version exception. This suggests verification is not supported." +
+              s"Continuing handling the produce request.")
+        transactionDataAndCallbacks.callbacks.values.foreach(_(Map.empty))
+      } else {
+        val addPartitionsToTxnResponseData = response.responseBody.asInstanceOf[AddPartitionsToTxnResponse].data
+        if (addPartitionsToTxnResponseData.errorCode != 0) {
+          error(s"AddPartitionsToTxnRequest for broker ${config.brokerId}  returned with error ${Errors.forCode(addPartitionsToTxnResponseData.errorCode)}.")
+          // TODO: send error back correctly -- we need to verify all possible errors can be handled by the client.
+          // errors -- versionmismatch --> handled above
+          //        -- clusterauth --> should handle differently
+          transactionDataAndCallbacks.callbacks.foreach { case (txnId, callback) =>
+            callback(buildErrorMap(txnId, transactionDataAndCallbacks.transactionData, addPartitionsToTxnResponseData.errorCode()))
+          }
+        } else {
+          addPartitionsToTxnResponseData.resultsByTransaction().forEach { transactionResult =>

Review Comment:
   If we don't want to put too many things in one change, we could implement the race condition checks in a separate change -- even though we didn't fully fix the problem we didn't regress (in fact improved quite a bit).  On the other hand, fixing localTime metric should be done in this change, because it worked before this change so if we don't fix it, it would be a regression.
   
   Another approach (if it makes things simpler in any way) could be to split out the framework to run callbacks on request threads and add metrics there, then rebase this change on top of framework, so this change focuses on the transaction-specific stuff.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscribe@kafka.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org