You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@kafka.apache.org by Mohammed Ait Haddou <mo...@gmail.com> on 2020/05/19 13:45:26 UTC
Persist Kafka Topics and ksqldb
After a *docker-compose restart. *All topics, ksqldb types are lost.
Is there any way to safely persist all data ?
docker-compse :
---
version: "2"
services:
zookeeper:
image: confluentinc/cp-zookeeper:latest
container_name: zookeeper
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
kafka:
image: confluentinc/cp-enterprise-kafka:latest
container_name: kafka
depends_on:
- zookeeper
links:
- zookeeper
ports:
# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
# An important note about accessing Kafka from clients on other machines:
# -----------------------------------------------------------------------
#
# The config used here exposes port 9092 for _external_ connections to
the broker
# i.e. those from _outside_ the docker network. This could be from the
host machine
# running docker, or maybe further afield if you've got a more
complicated setup.
# If the latter is true, you will need to change the value 'localhost' in
# KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker
host from those
# remote clients
#
# For connections _internal_ to the docker network, such as from other services
# and components, use kafka:29092.
#
# See https://rmoff.net/2018/08/02/kafka-listeners-explained/
for details
# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
#
- 9092:9092
- "29092:29092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP:
PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_ADVERTISED_LISTENERS:
PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
# -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v
# Useful settings for development/laptop use - modify as needed for Prod
# This one makes ksqlDB feel a bit more responsive when queries start running
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100
command:
- bash
- -c
- |
echo '127.0.0.1 kafka' >> /etc/hosts
/etc/confluent/docker/run
sleep infinity
schema-registry:
image: confluentinc/cp-schema-registry:5.5.0
container_name: schema-registry
depends_on:
- zookeeper
- kafka
links:
- zookeeper
- kafka
ports:
- 8081:8081
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181
cassandra:
image: cassandra:latest
container_name: cassandra
ports:
- 7000:7000
kafka-connect-01:
image: confluentinc/cp-kafka-connect:5.5.0
container_name: kafka-connect-01
depends_on:
- kafka
- schema-registry
- cassandra
links:
- schema-registry
- kafka
- mysql
- cassandra
ports:
- 8083:8083
environment:
CONNECT_BOOTSTRAP_SERVERS: "kafka:29092"
CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01"
CONNECT_REST_PORT: 8083
CONNECT_GROUP_ID: kafka-connect-01
CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-01-configs
CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-01-offsets
CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-01-status
CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: "
http://schema-registry:8081"
CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: "
http://schema-registry:8081"
CONNECT_INTERNAL_KEY_CONVERTER:
"org.apache.kafka.connect.json.JsonConverter"
CONNECT_INTERNAL_VALUE_CONVERTER:
"org.apache.kafka.connect.json.JsonConverter"
CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
CONNECT_LOG4J_LOGGERS:
"org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN:
"[%d] %p %X{connector.context}%m (%c:%L)%n"
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
CONNECT_PLUGIN_PATH:
"/usr/share/java,/usr/share/confluent-hub-components/,/connectors/"
# If you want to use the Confluent Hub installer to d/l component, but
make them available
# when running this offline, spin up the stack once and then run :
# docker cp kafka-connect-01:/usr/share/confluent-hub-components ./connectors
# mv ./connectors/confluent-hub-components/* ./connectors
# rm -rf ./connectors/confluent-hub-components
# In the command section, $ are replaced with $$ to avoid the error
'Invalid interpolation format for "command" option'
command:
- bash
- -c
- |
#
echo "Installing connector plugins"
confluent-hub install --no-prompt
debezium/debezium-connector-mysql:1.1.0
confluent-hub install --no-prompt
confluentinc/kafka-connect-datagen:0.3.1
# confluent-hub install --no-prompt
jcustenborder/kafka-connect-transform-common:0.1.0.35
# confluent-hub install --no-prompt
jcustenborder/kafka-connect-transform-xml:0.1.0.18
# confluent-hub install --no-prompt
streamthoughts/kafka-connect-file-pulse:1.2.1
confluent-hub install --no-prompt
confluentinc/kafka-connect-cassandra:1.2.0
#
echo "Launching Kafka Connect worker"
/etc/confluent/docker/run &
#
echo "Waiting for Kafka Connect to start listening on localhost ⏳"
while : ; do
curl_status=$$(curl -s -o /dev/null -w %{http_code}
http://localhost:8083/connectors)
echo -e $$(date) " Kafka Connect listener HTTP state: "
$$curl_status " (waiting for 200)"
if [ $$curl_status -eq 200 ] ; then
break
fi
sleep 5
done
sleep infinity
ksqldb:
image: confluentinc/ksqldb-server:latest
hostname: ksqldb
container_name: ksqldb
links:
- schema-registry
- kafka-connect-01
- kafka
depends_on:
- kafka
- kafka-connect-01
ports:
- "8088:8088"
environment:
KSQL_LISTENERS: http://0.0.0.0:8088
KSQL_BOOTSTRAP_SERVERS: kafka:29092
KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
KSQL_KSQL_CONNECT_URL: http://kafka-connect-01:8083
KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081
KSQL_KSQL_SERVICE_ID: confluent_rmoff_01
KSQL_KSQL_HIDDEN_TOPICS: "^_.*"
# Other systems
mysql:
# *-----------------------------*
# To connect to the DB:
# docker exec -it mysql bash -c 'mysql -u root -p$MYSQL_ROOT_PASSWORD'
# or
# docker exec -it mysql bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD demo'
# *-----------------------------*
image: debezium/example-mysql:1.1
container_name: mysql
ports:
- 3306:3306
environment:
- MYSQL_ROOT_PASSWORD=root
- MYSQL_USER=user
- MYSQL_PASSWORD=pwd
--
Mohammed Ait Haddou
Linkedin.com/in/medait
+212697937189
Re: Persist Kafka Topics and ksqldb
Posted by Robin Moffatt <ro...@confluent.io>.
You need to externalise your container data stores. Here's an
example Docker Compose that does that:
https://github.com/confluentinc/demo-scene/blob/master/wifi-fun/docker-compose.yml
<https://github.com/confluentinc/demo-scene/blob/master/wifi-fun/docker-compose.yml#L10>
--
Robin Moffatt | Senior Developer Advocate | robin@confluent.io | @rmoff
On Tue, 19 May 2020 at 15:55, Mohammed Ait Haddou <
mohammedaithaddou@gmail.com> wrote:
> After a *docker-compose restart. *All topics, ksqldb types are lost.
> Is there any way to safely persist all data ?
> docker-compse :
> ---
> version: "2"
> services:
> zookeeper:
> image: confluentinc/cp-zookeeper:latest
> container_name: zookeeper
> environment:
> ZOOKEEPER_CLIENT_PORT: 2181
> ZOOKEEPER_TICK_TIME: 2000
>
> kafka:
> image: confluentinc/cp-enterprise-kafka:latest
> container_name: kafka
> depends_on:
> - zookeeper
> links:
> - zookeeper
> ports:
>
> # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
>
> # An important note about accessing Kafka from clients on other machines:
>
> # -----------------------------------------------------------------------
> #
>
> # The config used here exposes port 9092 for _external_ connections to
> the broker
>
> # i.e. those from _outside_ the docker network. This could be from the
> host machine
>
> # running docker, or maybe further afield if you've got a more
> complicated setup.
>
> # If the latter is true, you will need to change the value 'localhost' in
>
> # KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker
> host from those
> # remote clients
> #
>
> # For connections _internal_ to the docker network, such as from other
> services
> # and components, use kafka:29092.
> #
> # See https://rmoff.net/2018/08/02/kafka-listeners-explained/
> for details
>
> # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
> #
> - 9092:9092
> - "29092:29092"
> environment:
> KAFKA_BROKER_ID: 1
> KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
> KAFKA_LISTENER_SECURITY_PROTOCOL_MAP:
> PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
> KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
> KAFKA_ADVERTISED_LISTENERS:
> PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
> KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
> KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
> KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
> KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
>
> # -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v
>
> # Useful settings for development/laptop use - modify as needed for Prod
>
> # This one makes ksqlDB feel a bit more responsive when queries start
> running
> KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100
> command:
> - bash
> - -c
> - |
> echo '127.0.0.1 kafka' >> /etc/hosts
> /etc/confluent/docker/run
> sleep infinity
>
> schema-registry:
> image: confluentinc/cp-schema-registry:5.5.0
> container_name: schema-registry
> depends_on:
> - zookeeper
> - kafka
> links:
> - zookeeper
> - kafka
> ports:
> - 8081:8081
> environment:
> SCHEMA_REGISTRY_HOST_NAME: schema-registry
> SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181
>
> cassandra:
> image: cassandra:latest
> container_name: cassandra
> ports:
> - 7000:7000
>
> kafka-connect-01:
> image: confluentinc/cp-kafka-connect:5.5.0
> container_name: kafka-connect-01
> depends_on:
> - kafka
> - schema-registry
> - cassandra
> links:
> - schema-registry
> - kafka
> - mysql
> - cassandra
> ports:
> - 8083:8083
> environment:
> CONNECT_BOOTSTRAP_SERVERS: "kafka:29092"
> CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01"
> CONNECT_REST_PORT: 8083
> CONNECT_GROUP_ID: kafka-connect-01
> CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-01-configs
> CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-01-offsets
> CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-01-status
> CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
> CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: "
> http://schema-registry:8081"
> CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
> CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: "
> http://schema-registry:8081"
> CONNECT_INTERNAL_KEY_CONVERTER:
> "org.apache.kafka.connect.json.JsonConverter"
> CONNECT_INTERNAL_VALUE_CONVERTER:
> "org.apache.kafka.connect.json.JsonConverter"
> CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
> CONNECT_LOG4J_LOGGERS:
> "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
> CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN:
> "[%d] %p %X{connector.context}%m (%c:%L)%n"
> CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
> CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
> CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
> CONNECT_PLUGIN_PATH:
> "/usr/share/java,/usr/share/confluent-hub-components/,/connectors/"
>
> # If you want to use the Confluent Hub installer to d/l component, but
> make them available
> # when running this offline, spin up the stack once and then run :
>
> # docker cp kafka-connect-01:/usr/share/confluent-hub-components
> ./connectors
> # mv ./connectors/confluent-hub-components/* ./connectors
> # rm -rf ./connectors/confluent-hub-components
>
>
> # In the command section, $ are replaced with $$ to avoid the error
> 'Invalid interpolation format for "command" option'
> command:
> - bash
> - -c
> - |
> #
> echo "Installing connector plugins"
> confluent-hub install --no-prompt
> debezium/debezium-connector-mysql:1.1.0
> confluent-hub install --no-prompt
> confluentinc/kafka-connect-datagen:0.3.1
> # confluent-hub install --no-prompt
> jcustenborder/kafka-connect-transform-common:0.1.0.35
> # confluent-hub install --no-prompt
> jcustenborder/kafka-connect-transform-xml:0.1.0.18
> # confluent-hub install --no-prompt
> streamthoughts/kafka-connect-file-pulse:1.2.1
> confluent-hub install --no-prompt
> confluentinc/kafka-connect-cassandra:1.2.0
> #
> echo "Launching Kafka Connect worker"
> /etc/confluent/docker/run &
> #
> echo "Waiting for Kafka Connect to start listening on localhost ⏳"
> while : ; do
> curl_status=$$(curl -s -o /dev/null -w %{http_code}
> http://localhost:8083/connectors)
> echo -e $$(date) " Kafka Connect listener HTTP state: "
> $$curl_status " (waiting for 200)"
> if [ $$curl_status -eq 200 ] ; then
> break
> fi
> sleep 5
> done
>
> sleep infinity
>
> ksqldb:
> image: confluentinc/ksqldb-server:latest
> hostname: ksqldb
> container_name: ksqldb
> links:
> - schema-registry
> - kafka-connect-01
> - kafka
> depends_on:
> - kafka
> - kafka-connect-01
> ports:
> - "8088:8088"
> environment:
> KSQL_LISTENERS: http://0.0.0.0:8088
> KSQL_BOOTSTRAP_SERVERS: kafka:29092
> KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
> KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
> KSQL_KSQL_CONNECT_URL: http://kafka-connect-01:8083
> KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081
> KSQL_KSQL_SERVICE_ID: confluent_rmoff_01
> KSQL_KSQL_HIDDEN_TOPICS: "^_.*"
>
> # Other systems
>
> mysql:
> # *-----------------------------*
> # To connect to the DB:
> # docker exec -it mysql bash -c 'mysql -u root
> -p$MYSQL_ROOT_PASSWORD'
> # or
>
> # docker exec -it mysql bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD
> demo'
> # *-----------------------------*
> image: debezium/example-mysql:1.1
> container_name: mysql
> ports:
> - 3306:3306
> environment:
> - MYSQL_ROOT_PASSWORD=root
> - MYSQL_USER=user
> - MYSQL_PASSWORD=pwd
>
> --
> Mohammed Ait Haddou
> Linkedin.com/in/medait
> +212697937189
>