You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by "EricJoy2048 (via GitHub)" <gi...@apache.org> on 2023/01/31 09:46:50 UTC

[GitHub] [incubator-seatunnel] EricJoy2048 commented on a diff in pull request #3679: [Feature][Connector-V2] StarRocks source connector

EricJoy2048 commented on code in PR #3679:
URL: https://github.com/apache/incubator-seatunnel/pull/3679#discussion_r1091506642


##########
docs/en/connector-v2/source/StarRocks.md:
##########
@@ -0,0 +1,146 @@
+# StarRocks
+
+> StarRocks source connector
+
+## Description
+Read external data source data through StarRocks.
+The internal implementation of StarRocks source connector is obtains the query plan from the frontend (FE), 
+delivers the query plan as a parameter to BE nodes, and then obtains data results from BE nodes..
+## Key features
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [ ] [stream](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [schema projection](../../concept/connector-v2-features.md)
+
+## Options
+
+| name                    | type   | required | default value     |
+|-------------------------|--------|----------|-------------------|
+| node_urls               | list   | yes      | -                 |
+| username                | string | yes      | -                 |
+| password                | string | yes      | -                 |
+| database                | string | yes      | -                 |
+| table                   | string | yes      | -                 |
+| scan_filter             | string | no       | -                 |
+| fields                  | config | yes      | -                 |

Review Comment:
   Please use `schema` options



##########
seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/StarRocksBeReadClient.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.starrocks.client.source;
+
+import static org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorErrorCode.CLOSE_BE_READER_FAILED;
+
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.client.source.model.QueryPartition;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.config.SourceConfig;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorException;
+
+import com.starrocks.shade.org.apache.thrift.TException;
+import com.starrocks.shade.org.apache.thrift.protocol.TBinaryProtocol;
+import com.starrocks.shade.org.apache.thrift.protocol.TProtocol;
+import com.starrocks.shade.org.apache.thrift.transport.TSocket;
+import com.starrocks.shade.org.apache.thrift.transport.TTransportException;
+import com.starrocks.thrift.TScanBatchResult;
+import com.starrocks.thrift.TScanCloseParams;
+import com.starrocks.thrift.TScanNextBatchParams;
+import com.starrocks.thrift.TScanOpenParams;
+import com.starrocks.thrift.TScanOpenResult;
+import com.starrocks.thrift.TStarrocksExternalService;
+import com.starrocks.thrift.TStatusCode;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+@Slf4j
+public class StarRocksBeReadClient implements Serializable {
+    private static final String DEFAULT_CLUSTER_NAME = "default_cluster";
+
+    private TStarrocksExternalService.Client client;
+    private final String ip;
+    private final int port;
+    private String contextId;
+    private int readerOffset = 0;
+    private final SourceConfig sourceConfig;
+    private final SeaTunnelRowType seaTunnelRowType;
+    private StarRocksRowBatchReader rowBatch;
+
+    private final List<Long> tabletIds;
+
+    private final String queryPlan;
+    protected AtomicBoolean eos = new AtomicBoolean(false);
+
+    public StarRocksBeReadClient(QueryPartition queryPartition,
+                                 SourceConfig sourceConfig,
+                                 SeaTunnelRowType seaTunnelRowType) {
+        this.sourceConfig = sourceConfig;
+        this.seaTunnelRowType = seaTunnelRowType;
+        String beNodeInfo = queryPartition.getBeAddress();
+        log.debug("Parse StarRocks BE address: '{}'.", beNodeInfo);
+        String[] hostPort = beNodeInfo.split(":");
+        if (hostPort.length != 2) {
+            throw new StarRocksConnectorException(StarRocksConnectorErrorCode.CREATE_BE_READER_FAILED,
+                    String.format("Format of StarRocks BE address[%s] is illegal", beNodeInfo));
+        }
+        this.ip = hostPort[0].trim();
+        this.port = Integer.parseInt(hostPort[1].trim());
+        this.queryPlan = queryPartition.getQueryPlan();
+        this.tabletIds = new ArrayList<>(queryPartition.getTabletIds());
+        TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
+        TSocket socket = new TSocket(ip, port, sourceConfig.getConnectTimeoutMs(), sourceConfig.getConnectTimeoutMs());
+        try {
+            socket.open();
+        } catch (TTransportException e) {
+            socket.close();
+            throw new StarRocksConnectorException(StarRocksConnectorErrorCode.CREATE_BE_READER_FAILED,
+                    "Failed to open socket", e);
+        }
+        TProtocol protocol = factory.getProtocol(socket);
+        client = new TStarrocksExternalService.Client(protocol);
+
+    }
+
+    public void openScanner() {
+        TScanOpenParams params = new TScanOpenParams();
+        params.setTablet_ids(tabletIds);
+        params.setOpaqued_query_plan(queryPlan);
+        params.setCluster(DEFAULT_CLUSTER_NAME);
+        params.setDatabase(sourceConfig.getDatabase());
+        params.setTable(sourceConfig.getTable());
+        params.setUser(sourceConfig.getUsername());
+        params.setPasswd(sourceConfig.getPassword());
+        params.setBatch_size(sourceConfig.getBatchRows());
+        if (sourceConfig.getSourceOptionProps() != null) {
+            params.setProperties(sourceConfig.getSourceOptionProps());
+        }
+        short keepAliveMin = (short) Math.min(Short.MAX_VALUE, sourceConfig.getKeepAliveMin());
+        params.setKeep_alive_min(keepAliveMin);
+        params.setQuery_timeout(sourceConfig.getQueryTimeoutSec());
+        params.setMem_limit(sourceConfig.getMemLimit());
+        log.info("open Scan params.mem_limit {} B", params.getMem_limit());
+        log.info("open Scan params.keep-alive-min {} min", params.getKeep_alive_min());
+        log.info("open Scan params.batch_size {}", params.getBatch_size());
+        TScanOpenResult result = null;
+        try {
+            result = client.open_scanner(params);
+            if (!TStatusCode.OK.equals(result.getStatus().getStatus_code())) {
+                throw new StarRocksConnectorException(StarRocksConnectorErrorCode.SCAN_BE_DATA_FAILED,
+                        "Failed to open scanner."
+                                + result.getStatus().getStatus_code()
+                                + result.getStatus().getError_msgs()
+                );
+            }
+        } catch (TException e) {
+            throw new StarRocksConnectorException(StarRocksConnectorErrorCode.SCAN_BE_DATA_FAILED,
+                    e.getMessage());
+        }
+        this.contextId = result.getContext_id();
+        log.info("Open scanner for {}:{} with context id {}, and there are {} tablets {}",
+                ip, port, contextId, tabletIds.size(), tabletIds);
+    }
+
+    public boolean hasNext() {
+        boolean hasNext = false;
+        // Arrow data was acquired synchronously during the iterative process
+        if (!eos.get() && (rowBatch == null || !rowBatch.hasNext())) {
+            if (rowBatch != null) {
+                readerOffset += rowBatch.getReadRowCount();
+                rowBatch.close();
+            }
+            TScanNextBatchParams nextBatchParams = new TScanNextBatchParams();
+            nextBatchParams.setContext_id(contextId);
+            nextBatchParams.setOffset(readerOffset);
+            TScanBatchResult result;
+            try {
+                result = client.get_next(nextBatchParams);
+                if (!TStatusCode.OK.equals(result.getStatus().getStatus_code())) {
+                    throw new StarRocksConnectorException(StarRocksConnectorErrorCode.SCAN_BE_DATA_FAILED, "Failed to get next from be -> ip:[" + ip + "] "
+                            + result.getStatus().getStatus_code() + " msg:" + result.getStatus().getError_msgs());
+                }
+                eos.set(result.isEos());
+                if (!eos.get()) {
+                    rowBatch = new StarRocksRowBatchReader(result, seaTunnelRowType).readArrow();
+                }
+            } catch (TException e) {
+                throw new RuntimeException(e.getMessage());

Review Comment:
   Please use the connector custom Exception.



##########
docs/en/connector-v2/source/StarRocks.md:
##########
@@ -0,0 +1,146 @@
+# StarRocks
+
+> StarRocks source connector
+
+## Description
+Read external data source data through StarRocks.
+The internal implementation of StarRocks source connector is obtains the query plan from the frontend (FE), 
+delivers the query plan as a parameter to BE nodes, and then obtains data results from BE nodes..
+## Key features
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [ ] [stream](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [schema projection](../../concept/connector-v2-features.md)
+

Review Comment:
   <img width="342" alt="image" src="https://user-images.githubusercontent.com/32193458/215684511-cebe342f-d0b0-44d9-96c0-297297f9e446.png">
   
   You lost some key features.



##########
seatunnel-connectors-v2/connector-starrocks/pom.xml:
##########
@@ -54,5 +61,57 @@
             <artifactId>httpcore</artifactId>
             <version>${httpcore.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.starrocks</groupId>
+            <artifactId>starrocks-thrift-sdk</artifactId>
+            <version>${starrocks.thrift.sdk.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-vector</artifactId>
+            <version>${arrow.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-memory-netty</artifactId>
+            <version>${arrow.version}</version>
+        </dependency>
     </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <configuration>
+                    <relocations>
+                        <relocation>
+                            <pattern>org.apache.arrow</pattern>
+                            <shadedPattern>com.starrocks.shaded.org.apache.arrow</shadedPattern>

Review Comment:
   Please use this format
   <img width="1162" alt="image" src="https://user-images.githubusercontent.com/32193458/215685318-dc59ef23-ba30-4bc2-bd5f-30bc0518962b.png">
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@seatunnel.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org