You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by GitBox <gi...@apache.org> on 2020/06/11 06:03:28 UTC

[GitHub] [kafka] hachikuji commented on a change in pull request #8850: KAFKA-10141: Add more detail to log segment delete messages

hachikuji commented on a change in pull request #8850:
URL: https://github.com/apache/kafka/pull/8850#discussion_r438559177



##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
   private def deleteRetentionMsBreachedSegments(): Int = {
     if (config.retentionMs < 0) return 0
     val startMs = time.milliseconds
-    deleteOldSegments((segment, _) => startMs - segment.largestTimestamp > config.retentionMs,
-      reason = s"retention time ${config.retentionMs}ms breach")
+
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) = {
+      if (startMs - segment.largestTimestamp > config.retentionMs) {
+        info(s"Segment with base offset ${segment.baseOffset} will be deleted due to" +

Review comment:
       `LogSegment.largestTimestamp` may refer to either the largest record timestamp for newer formats or the last modified time of the segment for older formats. I think it would be helpful if the log message indicated which case it is. Perhaps we could add a method like this to `LogSegment`?
   ```scala
   def largestRecordTimestamp: Option[Long]
   ```

##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1804,8 +1816,15 @@ class Log(@volatile private var _dir: File,
   }
 
   private def deleteLogStartOffsetBreachedSegments(): Int = {
-    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) =
-      nextSegmentOpt.exists(_.baseOffset <= logStartOffset)
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) = {
+      if (nextSegmentOpt.exists(_.baseOffset <= logStartOffset)) {
+        info (s"Segment with base offset ${segment.baseOffset} will be deleted due to" +

Review comment:
       nit: space after `info`

##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
   private def deleteRetentionMsBreachedSegments(): Int = {
     if (config.retentionMs < 0) return 0
     val startMs = time.milliseconds
-    deleteOldSegments((segment, _) => startMs - segment.largestTimestamp > config.retentionMs,
-      reason = s"retention time ${config.retentionMs}ms breach")
+
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) = {
+      if (startMs - segment.largestTimestamp > config.retentionMs) {
+        info(s"Segment with base offset ${segment.baseOffset} will be deleted due to" +
+          s" retentionMs breach. Largest timestamp of segment is ${segment.largestTimestamp}")
+        true
+      } else {
+        false
+      }
+    }
+
+    deleteOldSegments(shouldDelete, reason = s"retention time ${config.retentionMs}ms breach")

Review comment:
       With the logging we have above, do you think we still need the message in `deleteOldSegments`? Perhaps we could make it more concise at least. Maybe just mention the number of segments to be deleted for example.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org