You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@kafka.apache.org by Gerrit Avenant <ge...@vastech.co.za> on 2019/01/02 07:39:06 UTC
Kafka stops on cleaner-offset-checkpoint not found
Hi,
My Kafka service just stops after about running for a week to 2 weeks.
It seems the cleaner-offset-checkpoint gets deleted and then it fails upon
reading the file. Restarting the service creates the
cleaner-offset-checkpoint again.
I need ideas of where I can start debugging.
It does not look like the system is running out of file descriptors, memory
or disk space. It also does not seem load dependent, it happens from a
message every few seconds, to 100000 messages per second.
I'm running Kafka version 2.1.0, and currently only one broker to make sure
a single broker is stable enough.
Here's the log entry for the error:
---
Server.log:
[2019-01-01 07:38:29,759] ERROR Error while reading checkpoint file
/tmp/kafka/cleaner-offset-checkpoint (kafka.server.LogDirFailureChannel)
java.nio.file.NoSuchFileException: /tmp/kafka/cleaner-offset-checkpoint
at
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
at
sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java
:214)
at java.nio.file.Files.newByteChannel(Files.java:361)
at java.nio.file.Files.newByteChannel(Files.java:407)
at
java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:
384)
at java.nio.file.Files.newInputStream(Files.java:152)
at java.nio.file.Files.newBufferedReader(Files.java:2784)
at java.nio.file.Files.newBufferedReader(Files.java:2816)
at
kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:8
7)
at
kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86)
at
kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scal
a:61)
at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManag
er.scala:140)
at
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:24
0)
at scala.collection.Iterator.foreach(Iterator.scala:937)
at scala.collection.Iterator.foreach$(Iterator.scala:937)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
at
scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:209)
at
scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
at
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
at
scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManag
er.scala:138)
at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
at
kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:14
6)
at
kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerM
anager.scala:177)
at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
at
kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scal
a:174)
at
kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:313)
at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:300)
at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:82)
---
log-cleaner.log:
[2019-01-01 07:38:29,767] ERROR Failed to access checkpoint file
cleaner-offset-checkpoint in dir /tmp/kafka (kafka.log.LogCleaner)
org.apache.kafka.common.errors.KafkaStorageException: Error while reading
checkpoint file /tmp/kafka/cleaner-offset-checkpoint
Caused by: java.nio.file.NoSuchFileException:
/tmp/kafka/cleaner-offset-checkpoint
at
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
at
sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java
:214)
at java.nio.file.Files.newByteChannel(Files.java:361)
at java.nio.file.Files.newByteChannel(Files.java:407)
at
java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:
384)
at java.nio.file.Files.newInputStream(Files.java:152)
at java.nio.file.Files.newBufferedReader(Files.java:2784)
at java.nio.file.Files.newBufferedReader(Files.java:2816)
at
kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:8
7)
at
kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86)
at
kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scal
a:61)
at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManag
er.scala:140)
at
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:24
0)
at scala.collection.Iterator.foreach(Iterator.scala:937)
at scala.collection.Iterator.foreach$(Iterator.scala:937)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
at
scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:209)
at
scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
at
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
at
scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManag
er.scala:138)
at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
at
kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:14
6)
at
kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerM
anager.scala:177)
at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
at
kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scal
a:174)
at
kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:313)
at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:300)
at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:82)
---
server.properties:
num.partitions=1
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.ms=60000
log.segment.bytes=1073741824
log.retention.check.interval.ms=30000
group.initial.rebalance.delay.ms=0
--
Thanks