You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@apisix.apache.org by GitBox <gi...@apache.org> on 2022/05/15 13:30:43 UTC

[GitHub] [apisix] spacewander commented on a diff in pull request #7032: feat(pubsub): support kafka

spacewander commented on code in PR #7032:
URL: https://github.com/apache/apisix/pull/7032#discussion_r873169258


##########
docs/en/latest/pubsub/kafka.md:
##########
@@ -0,0 +1,91 @@
+---
+title: Apache Kafka
+keywords:
+  - APISIX
+  - Pub-Sub
+  - Kafka
+description: This document contains information about the Apache APISIX kafka pub-sub scenario.
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+## Connect to Apache Kafka
+
+Connecting to Apache Kafka in Apache APISIX is very simple.
+
+Currently we provide a simpler way to integrate by combining two APIs, ListOffsets and Fetch, to quickly implement the ability to pull Kafka messages, but do not support Apache Kafka's consumer group feature for now, and cannot be managed by Kafka for offsets.
+
+### Limitations
+
+- Offsets need to be managed manually

Review Comment:
   After rendering, these two lines are merged together...



##########
t/pubsub/kafka.t:
##########
@@ -0,0 +1,229 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+use t::APISIX 'no_plan';
+
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if ((!defined $block->error_log) && (!defined $block->no_error_log)) {
+        $block->set_value("no_error_log", "[error]");
+    }
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: setup all-in-one test
+--- config
+    location /t {
+        content_by_lua_block {
+            local data = {
+                {
+                    url = "/apisix/admin/routes/kafka",
+                    data = [[{
+                        "upstream": {
+                            "nodes": {
+                                "127.0.0.1:9092": 1
+                            },
+                            "type": "none",
+                            "scheme": "kafka"
+                        },
+                        "uri": "/kafka"
+                    }]],
+                },
+                {
+                    url = "/apisix/admin/routes/kafka-invalid",
+                    data = [[{
+                        "upstream": {
+                            "nodes": {
+                                "127.0.0.1:59092": 1
+                            },
+                            "type": "none",
+                            "scheme": "kafka"
+                        },
+                        "uri": "/kafka-invalid"
+                    }]],
+                },
+            }
+
+            local t = require("lib.test_admin").test
+
+            for _, data in ipairs(data) do
+                local code, body = t(data.url, ngx.HTTP_PUT, data.data)
+                ngx.say(body)
+            end
+        }
+    }
+--- response_body eval
+"passed\n"x2
+
+
+
+=== TEST 2: hit route (with HTTP request)
+--- request
+GET /kafka
+--- error_code: 400
+--- error_log
+failed to initialize pubsub module, err: bad "upgrade" request header: nil
+
+
+
+=== TEST 3: hit route (Kafka)
+--- config
+    # The messages used in this test are produced in the linux-ci-init-service.sh
+    # script that prepares the CI environment
+    location /t {
+        content_by_lua_block {
+            local lib_pubsub = require("lib.pubsub")
+            local test_pubsub = lib_pubsub.new_ws("ws://127.0.0.1:1984/kafka")
+            local data = {
+                {
+                    sequence = 0,
+                    cmd_kafka_list_offset = {
+                        topic = "not-exist",
+                        partition = 0,
+                        timestamp = -1,
+                    },
+                },
+                {
+                    sequence = 1,
+                    cmd_kafka_fetch = {
+                        topic = "not-exist",
+                        partition = 0,
+                        offset = 0,
+                    },
+                },
+                {
+                    -- Query first message offset
+                    sequence = 2,
+                    cmd_kafka_list_offset = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        timestamp = -2,
+                    },
+                },
+                {
+                    -- Query last message offset
+                    sequence = 3,
+                    cmd_kafka_list_offset = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        timestamp = -1,
+                    },
+                },
+                {
+                    -- Query by timestamp, 9999999999999 later than the
+                    -- production time of any message
+                    sequence = 4,
+                    cmd_kafka_list_offset = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        timestamp = "9999999999999",
+                    },
+                },
+                {
+                    -- Query by timestamp, 1500000000000 ms earlier than the
+                    -- production time of any message
+                    sequence = 5,
+                    cmd_kafka_list_offset = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        timestamp = "1500000000000",
+                    },
+                },
+                {
+                    sequence = 6,
+                    cmd_kafka_fetch = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        offset = 14,
+                    },
+                },
+                {
+                    sequence = 7,
+                    cmd_kafka_fetch = {
+                        topic = "test-consumer",
+                        partition = 0,
+                        offset = 999,
+                    },
+                },
+            }
+
+            for i = 1, #data do
+                local data = test_pubsub:send_recv_ws_binary(data[i])
+                if data.error_resp then
+                    ngx.say(data.sequence..data.error_resp.message)
+                end
+                if data.kafka_list_offset_resp then
+                    ngx.say(data.sequence.."offset: "..data.kafka_list_offset_resp.offset)
+                end
+                if data.kafka_fetch_resp then
+                    ngx.say(data.sequence.."offset: "..data.kafka_fetch_resp.messages[1].offset..
+                        " msg: "..data.kafka_fetch_resp.messages[1].value)
+                end
+            end
+            test_pubsub:close_ws()
+        }
+    }
+--- response_body
+0failed to list offset, topic: not-exist, partition: 0, err: not found topic
+1failed to fetch message, topic: not-exist, partition: 0, err: not found topic
+2offset: 0
+3offset: 30

Review Comment:
   Better to doc where the data and offset are coming from



##########
docs/en/latest/pubsub/kafka.md:
##########
@@ -0,0 +1,91 @@
+---
+title: Apache Kafka
+keywords:
+  - APISIX
+  - Pub-Sub

Review Comment:
   Could we use PubSub like pubsub in other places?



##########
docs/en/latest/pubsub/kafka.md:
##########
@@ -0,0 +1,91 @@
+---
+title: Apache Kafka
+keywords:
+  - APISIX
+  - Pub-Sub
+  - Kafka
+description: This document contains information about the Apache APISIX kafka pub-sub scenario.
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+## Connect to Apache Kafka
+
+Connecting to Apache Kafka in Apache APISIX is very simple.
+
+Currently we provide a simpler way to integrate by combining two APIs, ListOffsets and Fetch, to quickly implement the ability to pull Kafka messages, but do not support Apache Kafka's consumer group feature for now, and cannot be managed by Kafka for offsets.
+
+### Limitations
+
+- Offsets need to be managed manually
+They can be stored by a custom backend service or obtained via the list_offset command before starting to fetch the message, which can use timestamp to get the starting offset, or to get the initial and end offsets.
+- Unsupported batch data acquisition
+A single instruction can only obtain the data of a Topic Partition, does not support batch data acquisition through a single instruction
+
+### Prepare
+
+First, it is necessary to compile the [communication protocol](../../../../apisix/pubsub.proto) as a language-specific SDK using the `protoc`, which provides the command and response definitions to connect to Kafka via APISIX using the WebSocket.

Review Comment:
   Let's update the path of definition. We should use absolute path as it's not in website



##########
apisix/pubsub/kafka.lua:
##########
@@ -0,0 +1,133 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core      = require("apisix.core")
+local bconsumer = require("resty.kafka.basic-consumer")
+local ffi       = require("ffi")
+local C         = ffi.C
+local tostring  = tostring
+local type      = type
+local ipairs    = ipairs
+local str_sub   = string.sub
+
+ffi.cdef[[
+    int64_t atoll(const char *num);
+]]
+
+
+local _M = {}
+
+
+-- Handles the conversion of 64-bit integers in the lua-protobuf.
+--
+-- Because of the limitations of luajit, we cannot use native 64-bit
+-- numbers, so pb decode converts int64 to a string in #xxx format
+-- to avoid loss of precision, by this function, we convert this
+-- string to int64 cdata numbers.
+local function pb_convert_to_int64(src)
+    if type(src) == "string" then
+        return C.atoll(ffi.cast("char *", src) + 1)

Review Comment:
   Let's check src length to avoid out of bound



##########
apisix/include/apisix/model/pubsub.proto:
##########
@@ -55,16 +73,18 @@ message CmdEmpty {}
 message PubSubReq {
     int64 sequence = 1;
     oneof req {
-        CmdEmpty cmd_empty = 31;
-        CmdPing cmd_ping = 32;
+        CmdEmpty cmd_empty                       = 31;

Review Comment:
   We can remove cmd_empty  which is test-only? Using `cmd_kafka_fetch` in pubsub.t is enough.



##########
docs/en/latest/pubsub.md:
##########
@@ -42,6 +42,10 @@ In Apache APISIX, the most common scenario is handling north-south traffic from
 
 Currently, Apache APISIX supports WebSocket communication with the client, which can be any application that supports WebSocket, with Protocol Buffer as the serialization mechanism, see the [protocol definition](../../../apisix/pubsub.proto).

Review Comment:
   Let's update the path of definition. We should use absolute path as it's not in website



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@apisix.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org