You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/29 12:14:25 UTC
[arrow-datafusion] branch master updated: Generate expected benchmark query results (#4010)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 71f05a317 Generate expected benchmark query results (#4010)
71f05a317 is described below
commit 71f05a3175e931943276a8778162e37504e00503
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sat Oct 29 06:14:19 2022 -0600
Generate expected benchmark query results (#4010)
---
benchmarks/entrypoint.sh | 22 ----------------------
benchmarks/tpch-gen.sh | 17 ++++++++++++-----
benchmarks/tpchgen.dockerfile | 32 --------------------------------
3 files changed, 12 insertions(+), 59 deletions(-)
diff --git a/benchmarks/entrypoint.sh b/benchmarks/entrypoint.sh
deleted file mode 100755
index 312376fed..000000000
--- a/benchmarks/entrypoint.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-cd /tpch-dbgen
-./dbgen -vf -s $1
-mv *.tbl /data
\ No newline at end of file
diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh
index 2f79ade76..90230e666 100755
--- a/benchmarks/tpch-gen.sh
+++ b/benchmarks/tpch-gen.sh
@@ -16,20 +16,27 @@
# specific language governing permissions and limitations
# under the License.
-#set -e
+mkdir -p data/answers 2>/dev/null
+
+set -e
pushd ..
. ./dev/build-set-env.sh
popd
-docker build -f tpchgen.dockerfile -t datafusion-tpchgen:$DATAFUSION_VERSION .
-
# Generate data into the ./data directory if it does not already exist
FILE=./data/supplier.tbl
if test -f "$FILE"; then
echo "$FILE exists."
else
- mkdir data 2>/dev/null
- docker run -v `pwd`/data:/data -it --rm datafusion-tpchgen:$DATAFUSION_VERSION $1
+ docker run -v `pwd`/data:/data -it --rm ghcr.io/databloom-ai/tpch-docker:main -vf -s $1
ls -l data
+fi
+
+# Copy expected answers (at SF=1) into the ./data/answers directory if it does not already exist
+FILE=./data/answers/q1.out
+if test -f "$FILE"; then
+ echo "$FILE exists."
+else
+ docker run -v `pwd`/data:/data -it --entrypoint /bin/bash --rm ghcr.io/databloom-ai/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
fi
\ No newline at end of file
diff --git a/benchmarks/tpchgen.dockerfile b/benchmarks/tpchgen.dockerfile
deleted file mode 100644
index 30acdead7..000000000
--- a/benchmarks/tpchgen.dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu
-
-RUN apt-get update && \
- apt-get install -y git build-essential
-
-RUN git clone https://github.com/databricks/tpch-dbgen.git && \
- cd tpch-dbgen && \
- make
-
-WORKDIR /tpch-dbgen
-ADD entrypoint.sh /tpch-dbgen/
-
-VOLUME /data
-
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]