You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/29 12:14:25 UTC

[arrow-datafusion] branch master updated: Generate expected benchmark query results (#4010)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 71f05a317 Generate expected benchmark query results (#4010)
71f05a317 is described below

commit 71f05a3175e931943276a8778162e37504e00503
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sat Oct 29 06:14:19 2022 -0600

    Generate expected benchmark query results (#4010)
---
 benchmarks/entrypoint.sh      | 22 ----------------------
 benchmarks/tpch-gen.sh        | 17 ++++++++++++-----
 benchmarks/tpchgen.dockerfile | 32 --------------------------------
 3 files changed, 12 insertions(+), 59 deletions(-)

diff --git a/benchmarks/entrypoint.sh b/benchmarks/entrypoint.sh
deleted file mode 100755
index 312376fed..000000000
--- a/benchmarks/entrypoint.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-cd /tpch-dbgen
-./dbgen -vf -s $1
-mv *.tbl /data
\ No newline at end of file
diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh
index 2f79ade76..90230e666 100755
--- a/benchmarks/tpch-gen.sh
+++ b/benchmarks/tpch-gen.sh
@@ -16,20 +16,27 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#set -e
+mkdir -p data/answers 2>/dev/null
+
+set -e
 
 pushd ..
 . ./dev/build-set-env.sh
 popd
 
-docker build  -f tpchgen.dockerfile -t datafusion-tpchgen:$DATAFUSION_VERSION .
-
 # Generate data into the ./data directory if it does not already exist
 FILE=./data/supplier.tbl
 if test -f "$FILE"; then
     echo "$FILE exists."
 else
-  mkdir data 2>/dev/null
-  docker run -v `pwd`/data:/data -it --rm datafusion-tpchgen:$DATAFUSION_VERSION $1
+  docker run -v `pwd`/data:/data -it --rm ghcr.io/databloom-ai/tpch-docker:main -vf -s $1
   ls -l data
+fi
+
+# Copy expected answers (at SF=1) into the ./data/answers directory if it does not already exist
+FILE=./data/answers/q1.out
+if test -f "$FILE"; then
+    echo "$FILE exists."
+else
+  docker run -v `pwd`/data:/data -it --entrypoint /bin/bash --rm ghcr.io/databloom-ai/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
 fi
\ No newline at end of file
diff --git a/benchmarks/tpchgen.dockerfile b/benchmarks/tpchgen.dockerfile
deleted file mode 100644
index 30acdead7..000000000
--- a/benchmarks/tpchgen.dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu
-
-RUN apt-get update && \
-    apt-get install -y git build-essential
-
-RUN git clone https://github.com/databricks/tpch-dbgen.git && \
-    cd tpch-dbgen && \
-    make
-
-WORKDIR /tpch-dbgen
-ADD entrypoint.sh /tpch-dbgen/
-
-VOLUME /data
-
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]