You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@kafka.apache.org by Gerrit Avenant <ge...@vastech.co.za> on 2019/01/02 07:39:06 UTC

Kafka stops on cleaner-offset-checkpoint not found

Hi,
My Kafka service just stops after about running for a week to 2 weeks.
It seems the cleaner-offset-checkpoint gets deleted and then it fails upon
reading the file. Restarting the service creates the
cleaner-offset-checkpoint again.

I need ideas of where I can start debugging. 
It does not look like the system is running out of file descriptors, memory
or disk space.  It also does not seem load dependent, it happens from a
message every few seconds, to 100000 messages per second.

I'm running Kafka version 2.1.0, and currently only one broker to make sure
a single broker is stable enough.
Here's the log entry for the error:

---
Server.log:
[2019-01-01 07:38:29,759] ERROR Error while reading checkpoint file
/tmp/kafka/cleaner-offset-checkpoint (kafka.server.LogDirFailureChannel)
java.nio.file.NoSuchFileException: /tmp/kafka/cleaner-offset-checkpoint
	at
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
	at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
	at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
	at
sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java
:214)
	at java.nio.file.Files.newByteChannel(Files.java:361)
	at java.nio.file.Files.newByteChannel(Files.java:407)
	at
java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:
384)
	at java.nio.file.Files.newInputStream(Files.java:152)
	at java.nio.file.Files.newBufferedReader(Files.java:2784)
	at java.nio.file.Files.newBufferedReader(Files.java:2816)
	at
kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:8
7)
	at
kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86)
	at
kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scal
a:61)
	at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManag
er.scala:140)
	at
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:24
0)
	at scala.collection.Iterator.foreach(Iterator.scala:937)
	at scala.collection.Iterator.foreach$(Iterator.scala:937)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
	at
scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:209)
	at
scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
	at
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
	at
scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
	at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManag
er.scala:138)
	at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
	at
kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:14
6)
	at
kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerM
anager.scala:177)
	at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
	at
kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scal
a:174)
	at
kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:313)
	at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:300)
	at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:82)
---
log-cleaner.log:
[2019-01-01 07:38:29,767] ERROR Failed to access checkpoint file
cleaner-offset-checkpoint in dir /tmp/kafka (kafka.log.LogCleaner)
org.apache.kafka.common.errors.KafkaStorageException: Error while reading
checkpoint file /tmp/kafka/cleaner-offset-checkpoint
Caused by: java.nio.file.NoSuchFileException:
/tmp/kafka/cleaner-offset-checkpoint
	at
sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
	at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
	at
sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
	at
sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java
:214)
	at java.nio.file.Files.newByteChannel(Files.java:361)
	at java.nio.file.Files.newByteChannel(Files.java:407)
	at
java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:
384)
	at java.nio.file.Files.newInputStream(Files.java:152)
	at java.nio.file.Files.newBufferedReader(Files.java:2784)
	at java.nio.file.Files.newBufferedReader(Files.java:2816)
	at
kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:8
7)
	at
kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86)
	at
kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scal
a:61)
	at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManag
er.scala:140)
	at
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:24
0)
	at scala.collection.Iterator.foreach(Iterator.scala:937)
	at scala.collection.Iterator.foreach$(Iterator.scala:937)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
	at
scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:209)
	at
scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
	at
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
	at
scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
	at
kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManag
er.scala:138)
	at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
	at
kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:14
6)
	at
kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerM
anager.scala:177)
	at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251)
	at
kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scal
a:174)
	at
kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:313)
	at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:300)
	at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:82)


---
server.properties:

num.partitions=1
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1

log.retention.ms=60000
log.segment.bytes=1073741824
log.retention.check.interval.ms=30000

group.initial.rebalance.delay.ms=0

--
Thanks