You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by "OmniaGM (via GitHub)" <gi...@apache.org> on 2023/01/30 11:27:12 UTC

[GitHub] [kafka] OmniaGM commented on a diff in pull request #13171: KAFKA-14584: Move StateChangeLogMerger tool

OmniaGM commented on code in PR #13171:
URL: https://github.com/apache/kafka/pull/13171#discussion_r1090495143


##########
tools/src/main/java/org/apache/kafka/tools/StateChangeLogMerger.java:
##########
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.tools;
+
+import joptsimple.OptionSpec;
+import org.apache.kafka.common.internals.Topic;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.util.CommandDefaultOptions;
+import org.apache.kafka.server.util.CommandLineUtils;
+import org.apache.kafka.server.util.ToolsUtils;
+
+import java.io.File;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.Arrays;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * A utility that merges the state change logs (possibly obtained from different brokers and over multiple days).
+ *
+ * This utility expects at least one of the following two arguments -
+ * 1. A list of state change log files
+ * 2. A regex to specify state change log file names.
+ *
+ * This utility optionally also accepts the following arguments -
+ * 1. The topic whose state change logs should be merged
+ * 2. A list of partitions whose state change logs should be merged (can be specified only when the topic argument
+ * is explicitly specified)
+ * 3. Start time from when the logs should be merged
+ * 4. End time until when the logs should be merged
+ */
+public class StateChangeLogMerger {
+    private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss,SSS";
+    private static final Pattern DATE_PATTERN = Pattern.compile("([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}){1}");
+    private static final Pattern TOPIC_PART_PATTERN = Pattern.compile("\\[(" + Topic.LEGAL_CHARS + "+),([0-9]+)\\]");
+
+    private static List<String> files;
+    private static String topic;
+    private static List<Integer> partitions;
+    private static Date startDate;
+    private static Date endDate;
+
+    public static void main(String[] args) {
+        try {
+            StateChangeLogMergerOptions options = new StateChangeLogMergerOptions(args);
+            if (CommandLineUtils.isPrintHelpNeeded(options)) {
+                CommandLineUtils.printUsageAndExit(options.parser,
+                    "A tool for merging the log files from several brokers to reconstruct a unified history of what happened.");
+                return;
+            }
+            if (CommandLineUtils.isPrintVersionNeeded(options)) {
+                CommandLineUtils.printVersionAndExit();
+                return;
+            }
+
+            if ((!options.hasFiles() && !options.hasRegex()) || (options.hasFiles() && options.hasRegex())) {

Review Comment:
   I know the original code use this pattern, however, can't we use `CommandLineUtils.checkInvalidArgs` here instead?
   



##########
tools/src/main/java/org/apache/kafka/tools/StateChangeLogMerger.java:
##########
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.tools;
+
+import joptsimple.OptionSpec;
+import org.apache.kafka.common.internals.Topic;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.util.CommandDefaultOptions;
+import org.apache.kafka.server.util.CommandLineUtils;
+import org.apache.kafka.server.util.ToolsUtils;
+
+import java.io.File;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.Arrays;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * A utility that merges the state change logs (possibly obtained from different brokers and over multiple days).
+ *
+ * This utility expects at least one of the following two arguments -
+ * 1. A list of state change log files
+ * 2. A regex to specify state change log file names.
+ *
+ * This utility optionally also accepts the following arguments -
+ * 1. The topic whose state change logs should be merged
+ * 2. A list of partitions whose state change logs should be merged (can be specified only when the topic argument
+ * is explicitly specified)
+ * 3. Start time from when the logs should be merged
+ * 4. End time until when the logs should be merged
+ */
+public class StateChangeLogMerger {
+    private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss,SSS";
+    private static final Pattern DATE_PATTERN = Pattern.compile("([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}){1}");
+    private static final Pattern TOPIC_PART_PATTERN = Pattern.compile("\\[(" + Topic.LEGAL_CHARS + "+),([0-9]+)\\]");
+
+    private static List<String> files;
+    private static String topic;
+    private static List<Integer> partitions;
+    private static Date startDate;
+    private static Date endDate;
+
+    public static void main(String[] args) {
+        try {
+            StateChangeLogMergerOptions options = new StateChangeLogMergerOptions(args);
+            if (CommandLineUtils.isPrintHelpNeeded(options)) {
+                CommandLineUtils.printUsageAndExit(options.parser,
+                    "A tool for merging the log files from several brokers to reconstruct a unified history of what happened.");
+                return;
+            }
+            if (CommandLineUtils.isPrintVersionNeeded(options)) {
+                CommandLineUtils.printVersionAndExit();
+                return;
+            }
+
+            if ((!options.hasFiles() && !options.hasRegex()) || (options.hasFiles() && options.hasRegex())) {
+                CommandLineUtils.printUsageAndExit(options.parser,
+                    String.format("Provide arguments to exactly one of the two options \"%s\" or \"%s\"", options.filesOpt, options.regexOpt));
+                return;
+            }
+            if (options.hasPartitions() && !options.hasTopic()) {

Review Comment:
   Similiar to the previous comment can't we use `CommandLineUtils.checkRequiredArgs`. 



##########
server-common/src/main/java/org/apache/kafka/server/util/ToolsUtils.java:
##########
@@ -100,4 +104,17 @@ public static void prettyPrintTable(
         printRow(columnLengths, headers, out);
         rows.forEach(row -> printRow(columnLengths, row, out));
     }
+
+    /**
+     * Returns a set of duplicated items.
+     */
+    public static <T> Set<T> findDuplicates(Collection<T> collection) {

Review Comment:
   Are we planning to drop `CoreUtils.duplicates` in favor of `ToolsUtils.findDuplicates` at some point as `CoreUtils.duplicates` are only used by cli tools at the moment



##########
tools/src/main/java/org/apache/kafka/tools/StateChangeLogMerger.java:
##########
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.tools;
+
+import joptsimple.OptionSpec;
+import org.apache.kafka.common.internals.Topic;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.util.CommandDefaultOptions;
+import org.apache.kafka.server.util.CommandLineUtils;
+import org.apache.kafka.server.util.ToolsUtils;
+
+import java.io.File;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.Arrays;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * A utility that merges the state change logs (possibly obtained from different brokers and over multiple days).
+ *
+ * This utility expects at least one of the following two arguments -
+ * 1. A list of state change log files
+ * 2. A regex to specify state change log file names.
+ *
+ * This utility optionally also accepts the following arguments -
+ * 1. The topic whose state change logs should be merged
+ * 2. A list of partitions whose state change logs should be merged (can be specified only when the topic argument
+ * is explicitly specified)
+ * 3. Start time from when the logs should be merged
+ * 4. End time until when the logs should be merged
+ */
+public class StateChangeLogMerger {
+    private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss,SSS";
+    private static final Pattern DATE_PATTERN = Pattern.compile("([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}){1}");
+    private static final Pattern TOPIC_PART_PATTERN = Pattern.compile("\\[(" + Topic.LEGAL_CHARS + "+),([0-9]+)\\]");
+
+    private static List<String> files;
+    private static String topic;
+    private static List<Integer> partitions;
+    private static Date startDate;
+    private static Date endDate;
+
+    public static void main(String[] args) {

Review Comment:
   It may be better to unify the cli tool classes to use same pattern as @ `ClusterTools` and `MetadataQuorumCommand`to have `main`, `mainNoExit`, and `execute`. Example is here https://github.com/apache/kafka/blob/72cfc994f5675be349d4494ece3528efed290651/tools/src/main/java/org/apache/kafka/tools/MetadataQuorumCommand.java#L49



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscribe@kafka.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org