You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@kafka.apache.org by Mohammed Ait Haddou <mo...@gmail.com> on 2020/05/19 13:45:26 UTC

Persist Kafka Topics and ksqldb

After a *docker-compose restart. *All topics, ksqldb types are lost.
Is there any way to safely persist all data ?
docker-compse :
---
version: "2"
services:
  zookeeper:
    image: confluentinc/cp-zookeeper:latest
    container_name: zookeeper
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
      ZOOKEEPER_TICK_TIME: 2000

  kafka:
    image: confluentinc/cp-enterprise-kafka:latest
    container_name: kafka
    depends_on:
      - zookeeper
    links:
      - zookeeper
    ports:

# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-

# An important note about accessing Kafka from clients on other machines:

# -----------------------------------------------------------------------
      #

# The config used here exposes port 9092 for _external_ connections to
the broker

# i.e. those from _outside_ the docker network. This could be from the
host machine

# running docker, or maybe further afield if you've got a more
complicated setup.

# If the latter is true, you will need to change the value 'localhost' in

# KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker
host from those
      # remote clients
      #

# For connections _internal_ to the docker network, such as from other services
      # and components, use kafka:29092.
      #
      # See https://rmoff.net/2018/08/02/kafka-listeners-explained/
 for details

# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
      #
      - 9092:9092
      - "29092:29092"
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP:
PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_ADVERTISED_LISTENERS:
PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1

# -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v

# Useful settings for development/laptop use - modify as needed for Prod

# This one makes ksqlDB feel a bit more responsive when queries start running
      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100
    command:
      - bash
      - -c
      - |
        echo '127.0.0.1 kafka' >> /etc/hosts
        /etc/confluent/docker/run
        sleep infinity

  schema-registry:
    image: confluentinc/cp-schema-registry:5.5.0
    container_name: schema-registry
    depends_on:
      - zookeeper
      - kafka
    links:
      - zookeeper
      - kafka
    ports:
      - 8081:8081
    environment:
      SCHEMA_REGISTRY_HOST_NAME: schema-registry
      SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181

  cassandra:
    image: cassandra:latest
    container_name: cassandra
    ports:
      - 7000:7000

  kafka-connect-01:
    image: confluentinc/cp-kafka-connect:5.5.0
    container_name: kafka-connect-01
    depends_on:
      - kafka
      - schema-registry
      - cassandra
    links:
      - schema-registry
      - kafka
      - mysql
      - cassandra
    ports:
      - 8083:8083
    environment:
      CONNECT_BOOTSTRAP_SERVERS: "kafka:29092"
      CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01"
      CONNECT_REST_PORT: 8083
      CONNECT_GROUP_ID: kafka-connect-01
      CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-01-configs
      CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-01-offsets
      CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-01-status
      CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
      CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: "
http://schema-registry:8081"
      CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
      CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: "
http://schema-registry:8081"
      CONNECT_INTERNAL_KEY_CONVERTER:
"org.apache.kafka.connect.json.JsonConverter"
      CONNECT_INTERNAL_VALUE_CONVERTER:
"org.apache.kafka.connect.json.JsonConverter"
      CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
      CONNECT_LOG4J_LOGGERS:
"org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
      CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN:
"[%d] %p %X{connector.context}%m (%c:%L)%n"
      CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_PLUGIN_PATH:
"/usr/share/java,/usr/share/confluent-hub-components/,/connectors/"

# If you want to use the Confluent Hub installer to d/l component, but
make them available
    # when running this offline, spin up the stack once and then run :

#   docker cp kafka-connect-01:/usr/share/confluent-hub-components ./connectors
    #   mv ./connectors/confluent-hub-components/* ./connectors
    #   rm -rf ./connectors/confluent-hub-components


# In the command section, $ are replaced with $$ to avoid the error
'Invalid interpolation format for "command" option'
    command:
      - bash
      - -c
      - |
        #
        echo "Installing connector plugins"
        confluent-hub install --no-prompt
debezium/debezium-connector-mysql:1.1.0
        confluent-hub install --no-prompt
confluentinc/kafka-connect-datagen:0.3.1
        # confluent-hub install --no-prompt
jcustenborder/kafka-connect-transform-common:0.1.0.35
        # confluent-hub install --no-prompt
jcustenborder/kafka-connect-transform-xml:0.1.0.18
        # confluent-hub install --no-prompt
streamthoughts/kafka-connect-file-pulse:1.2.1
        confluent-hub install --no-prompt
confluentinc/kafka-connect-cassandra:1.2.0
        #
        echo "Launching Kafka Connect worker"
        /etc/confluent/docker/run &
        #
        echo "Waiting for Kafka Connect to start listening on localhost ⏳"
        while : ; do
          curl_status=$$(curl -s -o /dev/null -w %{http_code}
http://localhost:8083/connectors)
          echo -e $$(date) " Kafka Connect listener HTTP state: "
$$curl_status " (waiting for 200)"
          if [ $$curl_status -eq 200 ] ; then
            break
          fi
          sleep 5
        done

        sleep infinity

  ksqldb:
    image: confluentinc/ksqldb-server:latest
    hostname: ksqldb
    container_name: ksqldb
    links:
      - schema-registry
      - kafka-connect-01
      - kafka
    depends_on:
      - kafka
      - kafka-connect-01
    ports:
      - "8088:8088"
    environment:
      KSQL_LISTENERS: http://0.0.0.0:8088
      KSQL_BOOTSTRAP_SERVERS: kafka:29092
      KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
      KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
      KSQL_KSQL_CONNECT_URL: http://kafka-connect-01:8083
      KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081
      KSQL_KSQL_SERVICE_ID: confluent_rmoff_01
      KSQL_KSQL_HIDDEN_TOPICS: "^_.*"

  # Other systems

  mysql:
    # *-----------------------------*
    # To connect to the DB:
    #   docker exec -it mysql bash -c 'mysql -u root -p$MYSQL_ROOT_PASSWORD'
    # or

#   docker exec -it mysql bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD demo'
    # *-----------------------------*
    image: debezium/example-mysql:1.1
    container_name: mysql
    ports:
      - 3306:3306
    environment:
      - MYSQL_ROOT_PASSWORD=root
      - MYSQL_USER=user
      - MYSQL_PASSWORD=pwd

-- 
Mohammed Ait Haddou
Linkedin.com/in/medait
+212697937189

Re: Persist Kafka Topics and ksqldb

Posted by Robin Moffatt <ro...@confluent.io>.
You need to externalise your container data stores. Here's an
example Docker Compose that does that:
https://github.com/confluentinc/demo-scene/blob/master/wifi-fun/docker-compose.yml
<https://github.com/confluentinc/demo-scene/blob/master/wifi-fun/docker-compose.yml#L10>


-- 

Robin Moffatt | Senior Developer Advocate | robin@confluent.io | @rmoff


On Tue, 19 May 2020 at 15:55, Mohammed Ait Haddou <
mohammedaithaddou@gmail.com> wrote:

> After a *docker-compose restart. *All topics, ksqldb types are lost.
> Is there any way to safely persist all data ?
> docker-compse :
> ---
> version: "2"
> services:
>   zookeeper:
>     image: confluentinc/cp-zookeeper:latest
>     container_name: zookeeper
>     environment:
>       ZOOKEEPER_CLIENT_PORT: 2181
>       ZOOKEEPER_TICK_TIME: 2000
>
>   kafka:
>     image: confluentinc/cp-enterprise-kafka:latest
>     container_name: kafka
>     depends_on:
>       - zookeeper
>     links:
>       - zookeeper
>     ports:
>
> # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
>
> # An important note about accessing Kafka from clients on other machines:
>
> # -----------------------------------------------------------------------
>       #
>
> # The config used here exposes port 9092 for _external_ connections to
> the broker
>
> # i.e. those from _outside_ the docker network. This could be from the
> host machine
>
> # running docker, or maybe further afield if you've got a more
> complicated setup.
>
> # If the latter is true, you will need to change the value 'localhost' in
>
> # KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker
> host from those
>       # remote clients
>       #
>
> # For connections _internal_ to the docker network, such as from other
> services
>       # and components, use kafka:29092.
>       #
>       # See https://rmoff.net/2018/08/02/kafka-listeners-explained/
>  for details
>
> # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
>       #
>       - 9092:9092
>       - "29092:29092"
>     environment:
>       KAFKA_BROKER_ID: 1
>       KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
>       KAFKA_LISTENER_SECURITY_PROTOCOL_MAP:
> PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
>       KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
>       KAFKA_ADVERTISED_LISTENERS:
> PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
>       KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
>       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
>       KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
>       KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
>
> # -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v
>
> # Useful settings for development/laptop use - modify as needed for Prod
>
> # This one makes ksqlDB feel a bit more responsive when queries start
> running
>       KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100
>     command:
>       - bash
>       - -c
>       - |
>         echo '127.0.0.1 kafka' >> /etc/hosts
>         /etc/confluent/docker/run
>         sleep infinity
>
>   schema-registry:
>     image: confluentinc/cp-schema-registry:5.5.0
>     container_name: schema-registry
>     depends_on:
>       - zookeeper
>       - kafka
>     links:
>       - zookeeper
>       - kafka
>     ports:
>       - 8081:8081
>     environment:
>       SCHEMA_REGISTRY_HOST_NAME: schema-registry
>       SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181
>
>   cassandra:
>     image: cassandra:latest
>     container_name: cassandra
>     ports:
>       - 7000:7000
>
>   kafka-connect-01:
>     image: confluentinc/cp-kafka-connect:5.5.0
>     container_name: kafka-connect-01
>     depends_on:
>       - kafka
>       - schema-registry
>       - cassandra
>     links:
>       - schema-registry
>       - kafka
>       - mysql
>       - cassandra
>     ports:
>       - 8083:8083
>     environment:
>       CONNECT_BOOTSTRAP_SERVERS: "kafka:29092"
>       CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01"
>       CONNECT_REST_PORT: 8083
>       CONNECT_GROUP_ID: kafka-connect-01
>       CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-01-configs
>       CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-01-offsets
>       CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-01-status
>       CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
>       CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: "
> http://schema-registry:8081"
>       CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
>       CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: "
> http://schema-registry:8081"
>       CONNECT_INTERNAL_KEY_CONVERTER:
> "org.apache.kafka.connect.json.JsonConverter"
>       CONNECT_INTERNAL_VALUE_CONVERTER:
> "org.apache.kafka.connect.json.JsonConverter"
>       CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
>       CONNECT_LOG4J_LOGGERS:
> "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
>       CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN:
> "[%d] %p %X{connector.context}%m (%c:%L)%n"
>       CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
>       CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
>       CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
>       CONNECT_PLUGIN_PATH:
> "/usr/share/java,/usr/share/confluent-hub-components/,/connectors/"
>
> # If you want to use the Confluent Hub installer to d/l component, but
> make them available
>     # when running this offline, spin up the stack once and then run :
>
> #   docker cp kafka-connect-01:/usr/share/confluent-hub-components
> ./connectors
>     #   mv ./connectors/confluent-hub-components/* ./connectors
>     #   rm -rf ./connectors/confluent-hub-components
>
>
> # In the command section, $ are replaced with $$ to avoid the error
> 'Invalid interpolation format for "command" option'
>     command:
>       - bash
>       - -c
>       - |
>         #
>         echo "Installing connector plugins"
>         confluent-hub install --no-prompt
> debezium/debezium-connector-mysql:1.1.0
>         confluent-hub install --no-prompt
> confluentinc/kafka-connect-datagen:0.3.1
>         # confluent-hub install --no-prompt
> jcustenborder/kafka-connect-transform-common:0.1.0.35
>         # confluent-hub install --no-prompt
> jcustenborder/kafka-connect-transform-xml:0.1.0.18
>         # confluent-hub install --no-prompt
> streamthoughts/kafka-connect-file-pulse:1.2.1
>         confluent-hub install --no-prompt
> confluentinc/kafka-connect-cassandra:1.2.0
>         #
>         echo "Launching Kafka Connect worker"
>         /etc/confluent/docker/run &
>         #
>         echo "Waiting for Kafka Connect to start listening on localhost ⏳"
>         while : ; do
>           curl_status=$$(curl -s -o /dev/null -w %{http_code}
> http://localhost:8083/connectors)
>           echo -e $$(date) " Kafka Connect listener HTTP state: "
> $$curl_status " (waiting for 200)"
>           if [ $$curl_status -eq 200 ] ; then
>             break
>           fi
>           sleep 5
>         done
>
>         sleep infinity
>
>   ksqldb:
>     image: confluentinc/ksqldb-server:latest
>     hostname: ksqldb
>     container_name: ksqldb
>     links:
>       - schema-registry
>       - kafka-connect-01
>       - kafka
>     depends_on:
>       - kafka
>       - kafka-connect-01
>     ports:
>       - "8088:8088"
>     environment:
>       KSQL_LISTENERS: http://0.0.0.0:8088
>       KSQL_BOOTSTRAP_SERVERS: kafka:29092
>       KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
>       KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
>       KSQL_KSQL_CONNECT_URL: http://kafka-connect-01:8083
>       KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081
>       KSQL_KSQL_SERVICE_ID: confluent_rmoff_01
>       KSQL_KSQL_HIDDEN_TOPICS: "^_.*"
>
>   # Other systems
>
>   mysql:
>     # *-----------------------------*
>     # To connect to the DB:
>     #   docker exec -it mysql bash -c 'mysql -u root
> -p$MYSQL_ROOT_PASSWORD'
>     # or
>
> #   docker exec -it mysql bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD
> demo'
>     # *-----------------------------*
>     image: debezium/example-mysql:1.1
>     container_name: mysql
>     ports:
>       - 3306:3306
>     environment:
>       - MYSQL_ROOT_PASSWORD=root
>       - MYSQL_USER=user
>       - MYSQL_PASSWORD=pwd
>
> --
> Mohammed Ait Haddou
> Linkedin.com/in/medait
> +212697937189
>