You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by GitBox <gi...@apache.org> on 2021/02/08 13:56:57 UTC

[GitHub] [kafka] dajac commented on a change in pull request #9430: KAFKA-5235: GetOffsetShell: support for multiple topics and consumer configuration override

dajac commented on a change in pull request #9430:
URL: https://github.com/apache/kafka/pull/9430#discussion_r572047945



##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -20,102 +20,131 @@ package kafka.tools
 
 import java.util.Properties
 import joptsimple._
-import kafka.utils.{CommandLineUtils, Exit, ToolsUtils}
+import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils}
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.common.requests.ListOffsetsRequest
 import org.apache.kafka.common.{PartitionInfo, TopicPartition}
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.common.utils.Utils
 
+import java.util.regex.Pattern
 import scala.jdk.CollectionConverters._
 import scala.collection.Seq
+import scala.math.Ordering.Implicits.infixOrderingOps
 
 object GetOffsetShell {
+  private val topicPartitionPattern = Pattern.compile( "([^:,]*)(?::([0-9]*)(?:-([0-9]*))?)?")
 
   def main(args: Array[String]): Unit = {
+    try {
+      fetchOffsets(args)
+    } catch {
+      case e: Exception => Exit.exit(1, Some(e.getMessage))

Review comment:
       It seems that we need to print out something here for the user. The default exit procedure doesn't do anything with the provided message. Take a look at the other commands to see how we handle this.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)

Review comment:
       I don't think that `matcher.groupCount() == 0` is necessary. My understanding is that it will never be `0` as we have groups defined in the regex.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -20,102 +20,131 @@ package kafka.tools
 
 import java.util.Properties
 import joptsimple._
-import kafka.utils.{CommandLineUtils, Exit, ToolsUtils}
+import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils}
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.common.requests.ListOffsetsRequest
 import org.apache.kafka.common.{PartitionInfo, TopicPartition}
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.common.utils.Utils
 
+import java.util.regex.Pattern
 import scala.jdk.CollectionConverters._
 import scala.collection.Seq
+import scala.math.Ordering.Implicits.infixOrderingOps
 
 object GetOffsetShell {
+  private val topicPartitionPattern = Pattern.compile( "([^:,]*)(?::([0-9]*)(?:-([0-9]*))?)?")

Review comment:
       nit: We usually capitalize the first letter of constant in Scala.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)
+      throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
+
+    val topicPattern = wrapNullOrEmpty(matcher.group(1))
+    val lowerRange = if (matcher.groupCount() >= 2) wrapNullOrEmpty(matcher.group(2)) else None
+    val upperRangeSection = if (matcher.groupCount() >= 3) matcher.group(3) else null
+    val upperRange = wrapNullOrEmpty(upperRangeSection)
+    val isRange = upperRangeSection != null
+
+    val includeList = IncludeList(topicPattern.getOrElse(".*"))
+
+    val partitionFilter: Int => Boolean = if (lowerRange.isEmpty && upperRange.isEmpty) {
+      _ => true
+    } else if (lowerRange.isEmpty) {
+      val upperBound = upperRange.get.toInt
+      p => p < upperBound
+    } else if (upperRange.isEmpty) {
+      val lowerBound = lowerRange.get.toInt
+      if (isRange) {
+        p => p >= lowerBound
+      }
+      else {
+        p => p == lowerBound
+      }
+    } else {
+      val upperBound = upperRange.get.toInt
+      val lowerBound = lowerRange.get.toInt
+      p => p >= lowerBound && p < upperBound
+    }
+
+    tp => includeList.isTopicAllowed(tp.topic(), excludeInternalTopics) && partitionFilter.apply(tp.partition())
+  }
+
+  /**
+   * Creates a topic-partition filter based on a topic pattern and a set of partition ids.
+   */
+  def createTopicPartitionFilterWithTopicAndPartitionPattern(topicOpt: Option[String], excludeInternalTopics: Boolean, partitionIds: Set[Int]): PartitionInfo => Boolean = {
+    val topicsFilter = IncludeList(topicOpt.getOrElse(".*"))
+    t => topicsFilter.isTopicAllowed(t.topic, excludeInternalTopics) && (partitionIds.isEmpty || partitionIds.contains(t.partition))
+  }
+
+  /**
+   * Return the partition infos. Filter them with topicPartitionFilter.
+   */
+  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topicPartitionFilter: PartitionInfo => Boolean): Seq[PartitionInfo] = {
+    consumer.listTopics.asScala.values.flatMap { partitions =>
+      partitions.asScala.filter { tp => topicPartitionFilter.apply(tp) }

Review comment:
       nit: We should use parenthesis here instead of curly braces. `filter(topicPartitionFilter)` might even work directly as well.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }

Review comment:
       nit: We tend to use parenthesis instead of curly braces when the lambda is on a single line.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)
+      throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
+
+    val topicPattern = wrapNullOrEmpty(matcher.group(1))
+    val lowerRange = if (matcher.groupCount() >= 2) wrapNullOrEmpty(matcher.group(2)) else None
+    val upperRangeSection = if (matcher.groupCount() >= 3) matcher.group(3) else null

Review comment:
       The `null` instead of the `None` is quite subtile. I did not see it at first. See my comment which suggest to push more to the regex.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {

Review comment:
       nit: `sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1))` -> `sortWith(compareTopicPartitions)`

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -20,102 +20,131 @@ package kafka.tools
 
 import java.util.Properties
 import joptsimple._
-import kafka.utils.{CommandLineUtils, Exit, ToolsUtils}
+import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils}
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.common.requests.ListOffsetsRequest
 import org.apache.kafka.common.{PartitionInfo, TopicPartition}
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.common.utils.Utils
 
+import java.util.regex.Pattern
 import scala.jdk.CollectionConverters._
 import scala.collection.Seq
+import scala.math.Ordering.Implicits.infixOrderingOps
 
 object GetOffsetShell {
+  private val topicPartitionPattern = Pattern.compile( "([^:,]*)(?::([0-9]*)(?:-([0-9]*))?)?")
 
   def main(args: Array[String]): Unit = {
+    try {
+      fetchOffsets(args)
+    } catch {
+      case e: Exception => Exit.exit(1, Some(e.getMessage))
+    }
+  }
+
+  private def fetchOffsets(args: Array[String]): Unit = {
     val parser = new OptionParser(false)
-    val brokerListOpt = parser.accepts("broker-list", "REQUIRED: The list of hostname and port of the server to connect to.")
+    val brokerListOpt = parser.accepts("broker-list", "DEPRECATED, use --bootstrap-server instead; ignored if --bootstrap-server is specified. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.")
                            .withRequiredArg
-                           .describedAs("hostname:port,...,hostname:port")
+                           .describedAs("HOST1:PORT1,...,HOST3:PORT3")
                            .ofType(classOf[String])
-    val topicOpt = parser.accepts("topic", "REQUIRED: The topic to get offset from.")
+    val bootstrapServerOpt = parser.accepts("bootstrap-server", "REQUIRED. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.")
+                           .requiredUnless("broker-list")
+                           .withRequiredArg
+                           .describedAs("HOST1:PORT1,...,HOST3:PORT3")
+                           .ofType(classOf[String])
+    val topicPartitionsOpt = parser.accepts("topic-partitions", s"Comma separated list of topic-partition patterns to get the offsets for, with the format of '$topicPartitionPattern'." +
+                                            " The first group is an optional regex for the topic name, if omitted, it matches any topic name." +
+                                            " The section after ':' describes a 'partition' pattern, which can be: a number, a range in the format of 'NUMBER-NUMBER' (lower inclusive, upper exclusive), an inclusive lower bound in the format of 'NUMBER-', an exclusive upper bound in the format of '-NUMBER' or may be omitted to accept all partitions.")
+                           .withRequiredArg
+                           .describedAs("topic1:1,topic2:0-3,topic3,topic4:5-,topic5:-3")
+                           .ofType(classOf[String])
+    val topicOpt = parser.accepts("topic", s"The topic to get the offsets for. It also accepts a regular expression. If not present, all authorized topics are queried. Cannot be used if --topic-partitions is present.")
                            .withRequiredArg
                            .describedAs("topic")
                            .ofType(classOf[String])
-    val partitionOpt = parser.accepts("partitions", "comma separated list of partition ids. If not specified, it will find offsets for all partitions")
+    val partitionsOpt = parser.accepts("partitions", s"Comma separated list of partition ids to get the offsets for. If not present, all partitions of the authorized topics are queried. Cannot be used if --topic-partitions is present.")
                            .withRequiredArg
                            .describedAs("partition ids")
                            .ofType(classOf[String])
-                           .defaultsTo("")
-    val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently commited record timestamp is given.]")
+    val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently committed record timestamp is given.]")
                            .withRequiredArg
                            .describedAs("timestamp/-1(latest)/-2(earliest)")
                            .ofType(classOf[java.lang.Long])
                            .defaultsTo(-1L)
-    parser.accepts("offsets", "DEPRECATED AND IGNORED: number of offsets returned")
-                           .withRequiredArg
-                           .describedAs("count")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(1)
-    parser.accepts("max-wait-ms", "DEPRECATED AND IGNORED: The max amount of time each fetch request waits.")
+    val commandConfigOpt = parser.accepts("command-config", s"Property file containing configs to be passed to Consumer Client.")
                            .withRequiredArg
-                           .describedAs("ms")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(1000)
+                           .describedAs("config file")
+                           .ofType(classOf[String])
+    val excludeInternalTopicsOpt = parser.accepts("exclude-internal-topics", s"By default, internal topics are included. If specified, internal topics are excluded.")
 
-   if (args.length == 0)
-      CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic offsets.")
+    if (args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic-partition offsets.")
 
     val options = parser.parse(args : _*)
 
-    CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, topicOpt)
+    val effectiveBrokerListOpt = if (options.has(bootstrapServerOpt))
+      bootstrapServerOpt
+    else
+      brokerListOpt
+
+    CommandLineUtils.checkRequiredArgs(parser, options, effectiveBrokerListOpt)
 
     val clientId = "GetOffsetShell"
-    val brokerList = options.valueOf(brokerListOpt)
+    val brokerList = options.valueOf(effectiveBrokerListOpt)
+
     ToolsUtils.validatePortOrDie(parser, brokerList)
-    val topic = options.valueOf(topicOpt)
-    val partitionIdsRequested: Set[Int] = {
-      val partitionsString = options.valueOf(partitionOpt)
-      if (partitionsString.isEmpty)
-        Set.empty
-      else
-        partitionsString.split(",").map { partitionString =>
-          try partitionString.toInt
-          catch {
-            case _: NumberFormatException =>
-              System.err.println(s"--partitions expects a comma separated list of numeric partition ids, but received: $partitionsString")
-              Exit.exit(1)
-          }
-        }.toSet
+    val excludeInternalTopics = options.has(excludeInternalTopicsOpt)
+
+    if (options.has(topicPartitionsOpt) && (options.has(topicOpt) || options.has(partitionsOpt))) {
+      throw new IllegalArgumentException("--topic-partitions cannot be used with --topic or --partitions")
     }
+
     val listOffsetsTimestamp = options.valueOf(timeOpt).longValue
 
-    val config = new Properties
+    val topicPartitionFilter = if (options.has(topicPartitionsOpt)) {
+      createTopicPartitionFilterWithPatternList(options.valueOf(topicPartitionsOpt), excludeInternalTopics)
+    } else {
+      val partitionIdsRequested: Set[Int] = {
+        val partitionsString = options.valueOf(partitionsOpt)
+        if (partitionsString == null || partitionsString.isEmpty)
+          Set.empty
+        else
+          partitionsString.split(",").map { partitionString =>
+            try partitionString.toInt
+            catch {
+              case _: NumberFormatException =>
+                throw new IllegalArgumentException(s"--partitions expects a comma separated list of numeric " +
+                  s"partition ids, but received: $partitionsString")
+            }
+          }.toSet
+      }
+
+      createTopicPartitionFilterWithTopicAndPartitionPattern(
+        if (options.has(topicOpt)) Some(options.valueOf(topicOpt)) else None,
+        excludeInternalTopics,
+        partitionIdsRequested
+      )
+    }
+
+    val config = if (options.has(commandConfigOpt))
+      Utils.loadProps(options.valueOf(commandConfigOpt))
+    else
+      new Properties
     config.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
     config.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId)
     val consumer = new KafkaConsumer(config, new ByteArrayDeserializer, new ByteArrayDeserializer)

Review comment:
       Not related to your changes but it seems that we never close the consumer. It would be great if we could do it.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)
+      throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
+
+    val topicPattern = wrapNullOrEmpty(matcher.group(1))
+    val lowerRange = if (matcher.groupCount() >= 2) wrapNullOrEmpty(matcher.group(2)) else None
+    val upperRangeSection = if (matcher.groupCount() >= 3) matcher.group(3) else null
+    val upperRange = wrapNullOrEmpty(upperRangeSection)
+    val isRange = upperRangeSection != null
+
+    val includeList = IncludeList(topicPattern.getOrElse(".*"))
+
+    val partitionFilter: Int => Boolean = if (lowerRange.isEmpty && upperRange.isEmpty) {
+      _ => true
+    } else if (lowerRange.isEmpty) {
+      val upperBound = upperRange.get.toInt
+      p => p < upperBound
+    } else if (upperRange.isEmpty) {
+      val lowerBound = lowerRange.get.toInt
+      if (isRange) {
+        p => p >= lowerBound
+      }
+      else {
+        p => p == lowerBound
+      }
+    } else {
+      val upperBound = upperRange.get.toInt
+      val lowerBound = lowerRange.get.toInt
+      p => p >= lowerBound && p < upperBound
+    }
+
+    tp => includeList.isTopicAllowed(tp.topic(), excludeInternalTopics) && partitionFilter.apply(tp.partition())

Review comment:
       nit: The parenthesis after `topic` and `partition` could be omitted. It seems that we could call `partitionFilter` directly without the `apply`.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)
+      throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
+
+    val topicPattern = wrapNullOrEmpty(matcher.group(1))
+    val lowerRange = if (matcher.groupCount() >= 2) wrapNullOrEmpty(matcher.group(2)) else None
+    val upperRangeSection = if (matcher.groupCount() >= 3) matcher.group(3) else null
+    val upperRange = wrapNullOrEmpty(upperRangeSection)
+    val isRange = upperRangeSection != null
+
+    val includeList = IncludeList(topicPattern.getOrElse(".*"))
+
+    val partitionFilter: Int => Boolean = if (lowerRange.isEmpty && upperRange.isEmpty) {
+      _ => true
+    } else if (lowerRange.isEmpty) {
+      val upperBound = upperRange.get.toInt
+      p => p < upperBound
+    } else if (upperRange.isEmpty) {
+      val lowerBound = lowerRange.get.toInt
+      if (isRange) {
+        p => p >= lowerBound
+      }
+      else {
+        p => p == lowerBound
+      }
+    } else {
+      val upperBound = upperRange.get.toInt
+      val lowerBound = lowerRange.get.toInt
+      p => p >= lowerBound && p < upperBound
+    }

Review comment:
       I think that we could further simplify this by differentiating the range case from the single partition case in the regex directly. We could for instance use the following regex: `"([^:,]*)(?::(?:([0-9]*)|(?:([0-9]*)-([0-9]*))))?"`.
   
   It allows to do something like this for the processing:
   ```
       val matcher = topicPartitionPattern.matcher(ruleSpec)
       if (!matcher.matches())
         throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
   
       def group(group: Int): Option[String] = {
         Option(matcher.group(group)).filter(s => s != null && s.nonEmpty)
       }
   
       val topicFilter = IncludeList(group(1).getOrElse(".*"))
       val partitionFilter = group(2).map(_.toInt) match {
         case Some(partition) =>
           (p: Int) => p == partition
         case None =>
           val lowerRange = group(3).map(_.toInt).getOrElse(0)
           val upperRange = group(4).map(_.toInt).getOrElse(Int.MaxValue)
           (p: Int) => p >= lowerRange && p < upperRange
       }
   ```
   
   Basically, if no single partition is provided, we use a range with `0` and `Int.MaxValue` as defaults.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -132,23 +161,85 @@ object GetOffsetShell {
         }
     }
 
-    partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) =>
-      println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}")
+    partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
     }
+  }
 
+  def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = {
+    (a.topic(), a.partition()) < (b.topic(), b.partition())
   }
 
   /**
-   * Return the partition infos for `topic`. If the topic does not exist, `None` is returned.
+   * Creates a topic-partition filter based on a list of patterns.
+   * Expected format:
+   * List: TopicPartitionPattern(, TopicPartitionPattern)*
+   * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern
+   * TopicPattern: REGEX
+   * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = {
-    val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer
-    if (partitionInfos.isEmpty)
-      None
-    else if (partitionIds.isEmpty)
-      Some(partitionInfos)
-    else
-      Some(partitionInfos.filter(p => partitionIds.contains(p.partition)))
+  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    val ruleSpecs = topicPartitions.split(",")
+    val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }
+    tp => rules.exists(rule => rule.apply(tp))
   }
 
+  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+    def wrapNullOrEmpty(s: String): Option[String] = {
+      if (s == null || s.isEmpty)
+        None
+      else
+        Some(s)
+    }
+
+    val matcher = topicPartitionPattern.matcher(ruleSpec)
+    if (!matcher.matches() || matcher.groupCount() == 0)
+      throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
+
+    val topicPattern = wrapNullOrEmpty(matcher.group(1))
+    val lowerRange = if (matcher.groupCount() >= 2) wrapNullOrEmpty(matcher.group(2)) else None

Review comment:
       It seems that checking `matcher.groupCount() >= 2` is not necessary. Groups will be `null` anyway if they are not filled.

##########
File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala
##########
@@ -20,102 +20,131 @@ package kafka.tools
 
 import java.util.Properties
 import joptsimple._
-import kafka.utils.{CommandLineUtils, Exit, ToolsUtils}
+import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils}
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.common.requests.ListOffsetsRequest
 import org.apache.kafka.common.{PartitionInfo, TopicPartition}
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.common.utils.Utils
 
+import java.util.regex.Pattern
 import scala.jdk.CollectionConverters._
 import scala.collection.Seq
+import scala.math.Ordering.Implicits.infixOrderingOps
 
 object GetOffsetShell {
+  private val topicPartitionPattern = Pattern.compile( "([^:,]*)(?::([0-9]*)(?:-([0-9]*))?)?")
 
   def main(args: Array[String]): Unit = {
+    try {
+      fetchOffsets(args)
+    } catch {
+      case e: Exception => Exit.exit(1, Some(e.getMessage))
+    }
+  }
+
+  private def fetchOffsets(args: Array[String]): Unit = {
     val parser = new OptionParser(false)
-    val brokerListOpt = parser.accepts("broker-list", "REQUIRED: The list of hostname and port of the server to connect to.")
+    val brokerListOpt = parser.accepts("broker-list", "DEPRECATED, use --bootstrap-server instead; ignored if --bootstrap-server is specified. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.")
                            .withRequiredArg
-                           .describedAs("hostname:port,...,hostname:port")
+                           .describedAs("HOST1:PORT1,...,HOST3:PORT3")
                            .ofType(classOf[String])
-    val topicOpt = parser.accepts("topic", "REQUIRED: The topic to get offset from.")
+    val bootstrapServerOpt = parser.accepts("bootstrap-server", "REQUIRED. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.")
+                           .requiredUnless("broker-list")
+                           .withRequiredArg
+                           .describedAs("HOST1:PORT1,...,HOST3:PORT3")
+                           .ofType(classOf[String])
+    val topicPartitionsOpt = parser.accepts("topic-partitions", s"Comma separated list of topic-partition patterns to get the offsets for, with the format of '$topicPartitionPattern'." +
+                                            " The first group is an optional regex for the topic name, if omitted, it matches any topic name." +
+                                            " The section after ':' describes a 'partition' pattern, which can be: a number, a range in the format of 'NUMBER-NUMBER' (lower inclusive, upper exclusive), an inclusive lower bound in the format of 'NUMBER-', an exclusive upper bound in the format of '-NUMBER' or may be omitted to accept all partitions.")
+                           .withRequiredArg
+                           .describedAs("topic1:1,topic2:0-3,topic3,topic4:5-,topic5:-3")
+                           .ofType(classOf[String])
+    val topicOpt = parser.accepts("topic", s"The topic to get the offsets for. It also accepts a regular expression. If not present, all authorized topics are queried. Cannot be used if --topic-partitions is present.")
                            .withRequiredArg
                            .describedAs("topic")
                            .ofType(classOf[String])
-    val partitionOpt = parser.accepts("partitions", "comma separated list of partition ids. If not specified, it will find offsets for all partitions")
+    val partitionsOpt = parser.accepts("partitions", s"Comma separated list of partition ids to get the offsets for. If not present, all partitions of the authorized topics are queried. Cannot be used if --topic-partitions is present.")
                            .withRequiredArg
                            .describedAs("partition ids")
                            .ofType(classOf[String])
-                           .defaultsTo("")
-    val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently commited record timestamp is given.]")
+    val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently committed record timestamp is given.]")
                            .withRequiredArg
                            .describedAs("timestamp/-1(latest)/-2(earliest)")
                            .ofType(classOf[java.lang.Long])
                            .defaultsTo(-1L)
-    parser.accepts("offsets", "DEPRECATED AND IGNORED: number of offsets returned")
-                           .withRequiredArg
-                           .describedAs("count")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(1)
-    parser.accepts("max-wait-ms", "DEPRECATED AND IGNORED: The max amount of time each fetch request waits.")
+    val commandConfigOpt = parser.accepts("command-config", s"Property file containing configs to be passed to Consumer Client.")
                            .withRequiredArg
-                           .describedAs("ms")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(1000)
+                           .describedAs("config file")
+                           .ofType(classOf[String])
+    val excludeInternalTopicsOpt = parser.accepts("exclude-internal-topics", s"By default, internal topics are included. If specified, internal topics are excluded.")
 
-   if (args.length == 0)
-      CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic offsets.")
+    if (args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic-partition offsets.")
 
     val options = parser.parse(args : _*)
 
-    CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, topicOpt)
+    val effectiveBrokerListOpt = if (options.has(bootstrapServerOpt))
+      bootstrapServerOpt
+    else
+      brokerListOpt
+
+    CommandLineUtils.checkRequiredArgs(parser, options, effectiveBrokerListOpt)
 
     val clientId = "GetOffsetShell"
-    val brokerList = options.valueOf(brokerListOpt)
+    val brokerList = options.valueOf(effectiveBrokerListOpt)
+
     ToolsUtils.validatePortOrDie(parser, brokerList)
-    val topic = options.valueOf(topicOpt)
-    val partitionIdsRequested: Set[Int] = {
-      val partitionsString = options.valueOf(partitionOpt)
-      if (partitionsString.isEmpty)
-        Set.empty
-      else
-        partitionsString.split(",").map { partitionString =>
-          try partitionString.toInt
-          catch {
-            case _: NumberFormatException =>
-              System.err.println(s"--partitions expects a comma separated list of numeric partition ids, but received: $partitionsString")
-              Exit.exit(1)
-          }
-        }.toSet
+    val excludeInternalTopics = options.has(excludeInternalTopicsOpt)
+
+    if (options.has(topicPartitionsOpt) && (options.has(topicOpt) || options.has(partitionsOpt))) {
+      throw new IllegalArgumentException("--topic-partitions cannot be used with --topic or --partitions")
     }
+
     val listOffsetsTimestamp = options.valueOf(timeOpt).longValue
 
-    val config = new Properties
+    val topicPartitionFilter = if (options.has(topicPartitionsOpt)) {
+      createTopicPartitionFilterWithPatternList(options.valueOf(topicPartitionsOpt), excludeInternalTopics)
+    } else {
+      val partitionIdsRequested: Set[Int] = {
+        val partitionsString = options.valueOf(partitionsOpt)
+        if (partitionsString == null || partitionsString.isEmpty)
+          Set.empty
+        else
+          partitionsString.split(",").map { partitionString =>
+            try partitionString.toInt
+            catch {
+              case _: NumberFormatException =>
+                throw new IllegalArgumentException(s"--partitions expects a comma separated list of numeric " +
+                  s"partition ids, but received: $partitionsString")
+            }
+          }.toSet

Review comment:
       nit: Should we consider moving this block in an method?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org