You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:15 UTC

[impala] 01/03: IMPALA-12414: Add scripts to run Trino in the dev environment

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit cf819f967b2ba01e9903c8f69fea0ad09e35e34e
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Aug 31 17:01:31 2023 +0200

    IMPALA-12414: Add scripts to run Trino in the dev environment
    
    This patch set adds a couple of scripts that makes it easy to run
    Trino in the dev environment.
    
    At first one need to build our custom Trino docker image via:
    
     testdata/bin/build-trino-docker-image.sh
    
    Then we can run the Trino container via:
    
     testdata/bin/run-trino.sh
    
    One can connect to the Trino CLI by:
    
     testdata/bin/trino-cli.sh
    
    Trino will be configured to access our HMS and HDFS. We add the
    'hive' and 'iceberg' catalogs for Trino. Legacy Hive tables can
    be accessed via the 'hive' catalog, while Iceberg tables (only the
    ones reside in HiveCatalog) can be accessed via the 'iceberg'
    catalog. E.g.:
    
    trino> use iceberg.functional_parquet;
    trino:functional_parquet> select count(*)
                              from iceberg_mixed_file_format;
     _col0
    -------
         3
    (1 row)
    
    Testing
     * no tests added
     * later we might always run Trino as part of the minicluster and
       add interop tests with it
    
    Change-Id: I49818c7a95e23988b3fbc3d31b4c7fa738e0d952
    Reviewed-on: http://gerrit.cloudera.org:8080/20444
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 testdata/bin/build-trino-docker-image.sh          | 28 +++++++++++++++++++
 testdata/bin/kill-trino.sh                        | 20 ++++++++++++++
 testdata/bin/minicluster_trino/Dockerfile         | 33 +++++++++++++++++++++++
 testdata/bin/minicluster_trino/hive.properties    | 20 ++++++++++++++
 testdata/bin/minicluster_trino/iceberg.properties | 21 +++++++++++++++
 testdata/bin/run-trino.sh                         | 20 ++++++++++++++
 testdata/bin/trino-cli.sh                         | 20 ++++++++++++++
 7 files changed, 162 insertions(+)

diff --git a/testdata/bin/build-trino-docker-image.sh b/testdata/bin/build-trino-docker-image.sh
new file mode 100755
index 000000000..4c86a7a7d
--- /dev/null
+++ b/testdata/bin/build-trino-docker-image.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copy cluster configs to trino docker directory.
+pushd ${HADOOP_CONF_DIR}
+cp hive-site.xml core-site.xml hdfs-site.xml ${IMPALA_HOME}/testdata/bin/minicluster_trino
+popd
+
+# Build trino image with USERNAME=$USER so Trino will be able to write to HDFS.
+docker build ${IMPALA_HOME}/testdata/bin/minicluster_trino -t impala-minicluster-trino \
+    --build-arg USERNAME=$USER
+
diff --git a/testdata/bin/kill-trino.sh b/testdata/bin/kill-trino.sh
new file mode 100755
index 000000000..aca786cdc
--- /dev/null
+++ b/testdata/bin/kill-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker stop impala-minicluster-trino
diff --git a/testdata/bin/minicluster_trino/Dockerfile b/testdata/bin/minicluster_trino/Dockerfile
new file mode 100644
index 000000000..fa88a68c6
--- /dev/null
+++ b/testdata/bin/minicluster_trino/Dockerfile
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use an official Trino image as the base
+FROM trinodb/trino:latest
+
+# Use the developer username, so Trino will have write access to HDFS
+ARG USERNAME
+
+RUN \
+    sed -i 's/http-server.http.port=8080/http-server.http.port=9091/' /etc/trino/config.properties && \
+    sed -i 's/localhost:8080/localhost:9091/' /etc/trino/config.properties && \
+    echo "-DHADOOP_USER_NAME=$USERNAME" >> /etc/trino/jvm.config
+
+COPY hive-site.xml core-site.xml hdfs-site.xml /etc/
+COPY iceberg.properties hive.properties /etc/trino/catalog/
+
+# Expose the Trino port
+EXPOSE 9091
diff --git a/testdata/bin/minicluster_trino/hive.properties b/testdata/bin/minicluster_trino/hive.properties
new file mode 100644
index 000000000..4a442f019
--- /dev/null
+++ b/testdata/bin/minicluster_trino/hive.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=hive
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/minicluster_trino/iceberg.properties b/testdata/bin/minicluster_trino/iceberg.properties
new file mode 100644
index 000000000..bab71238d
--- /dev/null
+++ b/testdata/bin/minicluster_trino/iceberg.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=iceberg
+iceberg.catalog.type=hive_metastore
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/run-trino.sh b/testdata/bin/run-trino.sh
new file mode 100755
index 000000000..e96899576
--- /dev/null
+++ b/testdata/bin/run-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker run -detach --network=host --name impala-minicluster-trino impala-minicluster-trino
diff --git a/testdata/bin/trino-cli.sh b/testdata/bin/trino-cli.sh
new file mode 100755
index 000000000..31f1b3b02
--- /dev/null
+++ b/testdata/bin/trino-cli.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker exec -it impala-minicluster-trino trino --server=localhost:9091