You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2018/07/17 16:53:21 UTC

[1/7] impala git commit: [DOCS] A typo fix in impala_perf_stats.xml

Repository: impala
Updated Branches:
  refs/heads/master 9bcc8c6ac -> 7f3f63424


[DOCS] A typo fix in impala_perf_stats.xml

Change-Id: I2de22e61ce016cb9c1f780bca84a7338482b212d
Reviewed-on: http://gerrit.cloudera.org:8080/10958
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2ddff521
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2ddff521
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2ddff521

Branch: refs/heads/master
Commit: 2ddff521bb69034b3979bfaa60b1505cbcad40fe
Parents: 9bcc8c6
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon Jul 16 19:22:44 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Jul 17 02:33:17 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_perf_stats.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2ddff521/docs/topics/impala_perf_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_perf_stats.xml b/docs/topics/impala_perf_stats.xml
index 5e5d487..b7106a4 100644
--- a/docs/topics/impala_perf_stats.xml
+++ b/docs/topics/impala_perf_stats.xml
@@ -476,7 +476,7 @@ show column stats year_month_day;
             Set the impalad start-up configuration "--enable_stats_extrapolation" to
             enable the features globally. To enable them only for a specific table, set
             the "impala.enable.stats.extrapolation" table property to "true" for the
-            desired table. The tbale-level property overrides the global setting, so
+            desired table. The table-level property overrides the global setting, so
             it is also possible to enable sampling and extrapolation globally, but
             disable it for specific tables by setting the table property to "false".
             Example:


[5/7] impala git commit: Remove dead multiple_filesystems member from THdfsTable

Posted by jo...@apache.org.
Remove dead multiple_filesystems member from THdfsTable

This member was added long ago with a TODO to remove it. The member has
not been in use for quite some time, so this patch removes the member as
well as its assignment.

Change-Id: I33197a52e05d0c9647f5ce64a77c59950d9a1b94
Reviewed-on: http://gerrit.cloudera.org:8080/10935
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/558fa145
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/558fa145
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/558fa145

Branch: refs/heads/master
Commit: 558fa145a10c3a333b6113363b7e6d8777400520
Parents: 0411974
Author: Todd Lipcon <to...@cloudera.com>
Authored: Thu Jul 12 16:17:07 2018 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Jul 17 16:11:54 2018 +0000

----------------------------------------------------------------------
 common/thrift/CatalogObjects.thrift                           | 4 +---
 fe/src/main/java/org/apache/impala/catalog/HdfsTable.java     | 7 -------
 .../java/org/apache/impala/catalog/local/LocalFsTable.java    | 1 -
 3 files changed, 1 insertion(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/558fa145/common/thrift/CatalogObjects.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift
index bc068d6..d463300 100644
--- a/common/thrift/CatalogObjects.thrift
+++ b/common/thrift/CatalogObjects.thrift
@@ -314,9 +314,7 @@ struct THdfsTable {
   // than duplicate the list of network address, which helps reduce memory usage.
   7: optional list<Types.TNetworkAddress> network_addresses
 
-  // Indicates that this table's partitions reside on more than one filesystem.
-  // TODO: remove once INSERT across filesystems is supported.
-  8: optional bool multiple_filesystems
+  // REMOVED: 8: optional bool multiple_filesystems
 
   // The prefixes of locations of partitions in this table. See THdfsPartitionLocation for
   // the description of how a prefix is computed.

http://git-wip-us.apache.org/repos/asf/impala/blob/558fa145/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 69ecb6c..de9e3d8 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -206,9 +206,6 @@ public class HdfsTable extends Table implements FeFsTable {
   // True iff this table has incremental stats in any of its partitions.
   private boolean hasIncrementalStats_ = false;
 
-  // True iff the table's partitions are located on more than one filesystem.
-  private boolean multipleFileSystems_ = false;
-
   private HdfsPartitionLocationCompressor partitionLocationCompressor_;
 
   // Base Hdfs directory where files of this table are stored.
@@ -1078,8 +1075,6 @@ public class HdfsTable extends Table implements FeFsTable {
     Path partDirPath = new Path(storageDescriptor.getLocation());
     try {
       FileSystem fs = partDirPath.getFileSystem(CONF);
-      multipleFileSystems_ = multipleFileSystems_ ||
-          !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs);
       if (msPartition != null) {
         HdfsCachingUtil.validateCacheParams(msPartition.getParameters());
       }
@@ -1668,7 +1663,6 @@ public class HdfsTable extends Table implements FeFsTable {
     hdfsBaseDir_ = hdfsTable.getHdfsBaseDir();
     nullColumnValue_ = hdfsTable.nullColumnValue;
     nullPartitionKeyValue_ = hdfsTable.nullPartitionKeyValue;
-    multipleFileSystems_ = hdfsTable.multiple_filesystems;
     hostIndex_.populate(hdfsTable.getNetwork_addresses());
     resetPartitions();
     try {
@@ -1765,7 +1759,6 @@ public class HdfsTable extends Table implements FeFsTable {
     THdfsTable hdfsTable = new THdfsTable(hdfsBaseDir_, getColumnNames(),
         nullPartitionKeyValue_, nullColumnValue_, idToPartition, prototypePartition);
     hdfsTable.setAvroSchema(avroSchema_);
-    hdfsTable.setMultiple_filesystems(multipleFileSystems_);
     if (includeFileDesc) {
       // Network addresses are used only by THdfsFileBlocks which are inside
       // THdfsFileDesc, so include network addreses only when including THdfsFileDesc.

http://git-wip-us.apache.org/repos/asf/impala/blob/558fa145/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
index 8a6c0f6..b652e0d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
@@ -187,7 +187,6 @@ public class LocalFsTable extends LocalTable implements FeFsTable {
         /*includeIncrementalStats=*/false);
 
     // TODO(todd): implement avro schema support
-    // TODO(todd): set multiple_filesystems member?
     THdfsTable hdfsTable = new THdfsTable(getHdfsBaseDir(), getColumnNames(),
         getNullPartitionKeyValue(), schemaInfo_.getNullColumnValue(), idToPartition,
         tPrototypePartition);


[3/7] impala git commit: IMPALA-7132: Filter out useless output from run_clang_tidy.sh

Posted by jo...@apache.org.
IMPALA-7132: Filter out useless output from run_clang_tidy.sh

Clang's run-clang-tidy.py script produces a lot of
output even when there are no warnings or errors.
None of this output is useful.

This patch has two parts:
1. Bump LLVM to 5.0.1-p1, which has patched run-clang-tidy.py
   to make it reduce its own output when passed -quiet
   (along with other enhancements).
2. Pass -quiet to run-clang-tidy.py and pipe the stderr output
   to a temporary file. Display this output only if
   run-clang-tidy.py hits an error, as this output is not
   useful otherwise.

Testing with a known clang tidy issue shows that warnings
and errors are still in the output, and the output is
clean on a clean Impala checkout.

Change-Id: I63c46a7d57295eba38fac8ab49c7a15d2802df1d
Reviewed-on: http://gerrit.cloudera.org:8080/10615
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/27c788f8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/27c788f8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/27c788f8

Branch: refs/heads/master
Commit: 27c788f826820791fcdd972b55afd970d6643c1a
Parents: b1ec9e3
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Wed Jun 6 12:25:56 2018 -0700
Committer: Jim Apple <jb...@apache.org>
Committed: Tue Jul 17 04:58:28 2018 +0000

----------------------------------------------------------------------
 bin/bootstrap_toolchain.py |  2 +-
 bin/impala-config.sh       |  8 ++++----
 bin/run_clang_tidy.sh      | 10 +++++++++-
 3 files changed, 14 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/27c788f8/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index d2a3e71..cbddfbb 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -429,7 +429,7 @@ if __name__ == "__main__":
       "flatbuffers", "gcc", "gflags", "glog", "gperftools", "gtest", "libev", "libunwind",
       "lz4", "openldap", "openssl", "orc", "protobuf",
       "rapidjson", "re2", "snappy", "thrift", "tpc-h", "tpc-ds", "zlib"])
-  packages.insert(0, Package("llvm", "5.0.1-asserts"))
+  packages.insert(0, Package("llvm", "5.0.1-asserts-p1"))
   bootstrap(toolchain_root, packages)
 
   # Download the CDH components if necessary.

http://git-wip-us.apache.org/repos/asf/impala/blob/27c788f8/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 0091cd3..d4f40af 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -68,7 +68,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=137-93cacec18d
+export IMPALA_TOOLCHAIN_BUILD_ID=146-f2d5380be6
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -105,14 +105,14 @@ export IMPALA_LIBEV_VERSION=4.20
 unset IMPALA_LIBEV_URL
 export IMPALA_LIBUNWIND_VERSION=1.3-rc1-p3
 unset IMPALA_LIBUNWIND_URL
-export IMPALA_LLVM_VERSION=5.0.1
+export IMPALA_LLVM_VERSION=5.0.1-p1
 unset IMPALA_LLVM_URL
-export IMPALA_LLVM_ASAN_VERSION=5.0.1
+export IMPALA_LLVM_ASAN_VERSION=5.0.1-p1
 unset IMPALA_LLVM_ASAN_URL
 
 # Debug builds should use the release+asserts build to get additional coverage.
 # Don't use the LLVM debug build because the binaries are too large to distribute.
-export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts
+export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts-p1
 unset IMPALA_LLVM_DEBUG_URL
 export IMPALA_LZ4_VERSION=1.7.5
 unset IMPALA_LZ4_URL

http://git-wip-us.apache.org/repos/asf/impala/blob/27c788f8/bin/run_clang_tidy.sh
----------------------------------------------------------------------
diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh
index e879b35..a0e022a 100755
--- a/bin/run_clang_tidy.sh
+++ b/bin/run_clang_tidy.sh
@@ -54,4 +54,12 @@ fi
 export PATH="${IMPALA_TOOLCHAIN}/llvm-${IMPALA_LLVM_VERSION}/share/clang\
 :${IMPALA_TOOLCHAIN}/llvm-${IMPALA_LLVM_VERSION}/bin/\
 :$PATH"
-run-clang-tidy.py -header-filter "${PIPE_DIRS%?}" -j"${CORES}" ${DIRS}
+TMP_STDERR=$(mktemp)
+trap "rm $TMP_STDERR" EXIT
+if ! run-clang-tidy.py -quiet -header-filter "${PIPE_DIRS%?}" \
+                       -j"${CORES}" ${DIRS} 2> ${TMP_STDERR};
+then
+  echo "run-clang-tidy.py hit an error, dumping stderr output"
+  cat ${TMP_STDERR} >&2
+  exit 1
+fi


[7/7] impala git commit: IMPALA-7199: Add scripts to create code coverage reports

Posted by jo...@apache.org.
IMPALA-7199: Add scripts to create code coverage reports

gcovr is a python library that uses gcov to generate
code coverage reports. This adds gcovr to the python
dependencies and adds bin/impala-gcovr to provide
easy access to gcovr's command line. gcovr 3.4
supports python 2.6+.

This also adds bin/coverage_helper.sh to provide a
simplified interface to generate reports and zero
coverage counters.

Code coverage data is written out when a program
exits, so it is important to avoid hard kills
to shut down the impalads when generating coverage.
This modifies testdata/bin/kill-all.sh to call
start-impala-cluster.py --kill when shutting down
the minicluster to try to avoid doing a hard kill.
It will still do a hard kill if impala is still
running after the softer kill.

Change-Id: I5b2e0b794c64f9343ec976de7a3f235e54d2badd
Reviewed-on: http://gerrit.cloudera.org:8080/10791
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7f3f6342
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7f3f6342
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7f3f6342

Branch: refs/heads/master
Commit: 7f3f63424f1a82390386ab1a21408d0b820e6c81
Parents: 6887fc2
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Thu May 24 14:58:16 2018 -0700
Committer: Joe McDonnell <jo...@cloudera.com>
Committed: Tue Jul 17 16:45:44 2018 +0000

----------------------------------------------------------------------
 bin/coverage_helper.sh             | 86 +++++++++++++++++++++++++++++++++
 bin/impala-gcovr                   | 21 ++++++++
 infra/python/deps/requirements.txt |  1 +
 testdata/bin/kill-all.sh           |  3 ++
 4 files changed, 111 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7f3f6342/bin/coverage_helper.sh
----------------------------------------------------------------------
diff --git a/bin/coverage_helper.sh b/bin/coverage_helper.sh
new file mode 100755
index 0000000..dad1498
--- /dev/null
+++ b/bin/coverage_helper.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu -o pipefail
+
+ZERO_COUNTERS_ACTION=0
+REPORT_ACTION=0
+REPORT_DIRECTORY=${IMPALA_HOME}/logs/coverage
+
+function print_usage {
+  echo "coverage_helper.sh - Driver script for coverage"
+  echo "[-zerocounters] : Reset coverage counters"
+  echo "[-report] : Generate a detailed coverage report"
+  echo "[-reportdirectory <directory>] : Output directory for coverage report files"
+}
+
+while [ -n "$*" ]
+do
+  case "$1" in
+    -zerocounters)
+       ZERO_COUNTERS_ACTION=1
+       ;;
+    -report)
+       REPORT_ACTION=1
+       ;;
+    -reportdirectory)
+       REPORT_DIRECTORY="${2-}"
+       shift
+       ;;
+    -help|*)
+       print_usage
+       exit 1
+       ;;
+  esac
+  shift
+done
+
+if [[ ${ZERO_COUNTERS_ACTION} -eq 0 && ${REPORT_ACTION} -eq 0 ]]; then
+  print_usage
+  exit 1
+fi
+
+if pgrep -U "$USER" impalad; then
+  echo "Warning: impalad is running. Coverage counters are only updated when"
+  echo "a program exits. Any report will not include the information from"
+  echo "the running impalad. Similarly, zeroing the counters will not zero"
+  echo "the counters for the running impalad."
+fi
+
+if [ ${REPORT_ACTION} -eq 1 ]; then
+  mkdir -p "${REPORT_DIRECTORY}"
+  rm -f "${REPORT_DIRECTORY}"/index*.html
+  # src/util/bit-packing.inline.h gets lots of hits, so generating a detailed report
+  # for it takes several minutes. Exclude it to keep the execution time down.
+  # gcovr excludes are buggy, so on some environments these excludes won't work.
+  echo "Generating report in directory: ${REPORT_DIRECTORY}"
+  cd "${IMPALA_HOME}"
+  "${IMPALA_HOME}/bin/impala-gcovr" -v -r "${IMPALA_HOME}/be" \
+    --exclude=".*src/benchmarks.*" \
+    --exclude=".*generated-sources/gen-cpp.*" \
+    --exclude=".*src/util/bit-packing.inline.h.*" \
+    --html --html-details -o "${REPORT_DIRECTORY}/index.html" > "${REPORT_DIRECTORY}/gcovr.out"
+fi
+
+if [ ${ZERO_COUNTERS_ACTION} -eq 1 ]; then
+  # The .gcda files contain the counters for coverage. Deleting them zeros the
+  # the counters.
+  echo "Zeroing Counters"
+  find "${IMPALA_HOME}/be" -name "*.gcda" -delete
+fi

http://git-wip-us.apache.org/repos/asf/impala/blob/7f3f6342/bin/impala-gcovr
----------------------------------------------------------------------
diff --git a/bin/impala-gcovr b/bin/impala-gcovr
new file mode 100755
index 0000000..6e8be6c
--- /dev/null
+++ b/bin/impala-gcovr
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "$(dirname "$0")/impala-python-common.sh"
+exec "$PY_DIR/env/bin/gcovr" "$@"

http://git-wip-us.apache.org/repos/asf/impala/blob/7f3f6342/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index b0b7f23..a92fdc6 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -43,6 +43,7 @@ Flask == 0.10.1
   MarkupSafe == 0.23
   Werkzeug == 0.11.3
   itsdangerous == 0.24
+gcovr == 3.4
 kazoo == 2.2.1
 ordereddict == 1.1
 pexpect == 3.3

http://git-wip-us.apache.org/repos/asf/impala/blob/7f3f6342/testdata/bin/kill-all.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/kill-all.sh b/testdata/bin/kill-all.sh
index 5462162..0e8d201 100755
--- a/testdata/bin/kill-all.sh
+++ b/testdata/bin/kill-all.sh
@@ -20,6 +20,9 @@
 set -euo pipefail
 trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
 
+# Shutdown Impala if it is alive
+${IMPALA_HOME}/bin/start-impala-cluster.py --kill
+
 # Kill HBase, then MiniLlama (which includes a MiniDfs, a Yarn RM several NMs).
 $IMPALA_HOME/testdata/bin/kill-sentry-service.sh
 $IMPALA_HOME/testdata/bin/kill-hive-server.sh


[6/7] impala git commit: IMPALA-7238: Use custom timeout for create unique database

Posted by jo...@apache.org.
IMPALA-7238: Use custom timeout for create unique database

test_kudu.TestCreateExternalTables() saw a timeout when
creating the unique database for its tests.

__unique_conn() opens a connection, creates a unique database,
then returns another connection in that database. It takes
a custom timeout argument, but the timeout is only for the
returned connection. The first connection to create the
unique database uses the default timeout of 45 seconds.

This patch changes the first connection to use the custom
timeout. For Kudu tests, this is 5 minutes rather than 45
seconds.

Change-Id: I4f2beb5bc027a4bb44e854bf1dd8919807a92ea0
Reviewed-on: http://gerrit.cloudera.org:8080/10862
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6887fc21
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6887fc21
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6887fc21

Branch: refs/heads/master
Commit: 6887fc2190da26af2ae5b6bd3b90e5b22d20c963
Parents: 558fa14
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Tue Jul 3 16:20:15 2018 -0700
Committer: Joe McDonnell <jo...@cloudera.com>
Committed: Tue Jul 17 16:45:34 2018 +0000

----------------------------------------------------------------------
 tests/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/6887fc21/tests/conftest.py
----------------------------------------------------------------------
diff --git a/tests/conftest.py b/tests/conftest.py
index fe6b332..a1f1859 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -356,7 +356,7 @@ def conn(request):
     with __unique_conn(db_name=db_name, timeout=timeout) as conn:
       yield conn
   else:
-    with __auto_closed_conn(db_name=db_name) as conn:
+    with __auto_closed_conn(db_name=db_name, timeout=timeout) as conn:
       yield conn
 
 
@@ -388,7 +388,7 @@ def __unique_conn(db_name=None, timeout=DEFAULT_CONN_TIMEOUT):
   """
   if not db_name:
     db_name = choice(ascii_lowercase) + "".join(sample(ascii_lowercase + digits, 5))
-  with __auto_closed_conn() as conn:
+  with __auto_closed_conn(timeout=timeout) as conn:
     with __auto_closed_cursor(conn) as cur:
       cur.execute("CREATE DATABASE %s" % db_name)
   with __auto_closed_conn(db_name=db_name, timeout=timeout) as conn:


[2/7] impala git commit: IMPALA-7106: Log the original and rewritten SQL when SQL rewrite fails

Posted by jo...@apache.org.
IMPALA-7106: Log the original and rewritten SQL when SQL rewrite fails

toSql() method is used to print SQL string that is close to the original
SQL string when errors arise or as the result of "SHOW CREATE". When
debugging issues related to SQL rewrites, it can be very useful to be
able to get the SQL string that is being rewritten. This patch adds a
new method toSql(boolean rewritten) to get the rewritten SQL string. This
patch also logs the original and rewritten SQL when SQL rewrite fails.

Testing:
- Added FE test for the rewritten SQL string
- Ran all FE tests

Change-Id: Iab58b0cc865135d261dd4a7f72be130f2e7bde53
Reviewed-on: http://gerrit.cloudera.org:8080/10571
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b1ec9e37
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b1ec9e37
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b1ec9e37

Branch: refs/heads/master
Commit: b1ec9e375f6a2104585b1acae29f9fed407679f6
Parents: 2ddff52
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Fri Jun 1 00:11:11 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Jul 17 04:36:59 2018 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/AnalysisContext.java |  13 ++-
 .../analysis/CreateTableAsSelectStmt.java       |   4 +-
 .../org/apache/impala/analysis/DeleteStmt.java  |   8 +-
 .../org/apache/impala/analysis/FromClause.java  |   6 +-
 .../apache/impala/analysis/InlineViewRef.java   |   7 +-
 .../org/apache/impala/analysis/InsertStmt.java  |   6 +-
 .../org/apache/impala/analysis/ModifyStmt.java  |   9 +-
 .../org/apache/impala/analysis/SelectStmt.java  |  13 +--
 .../apache/impala/analysis/StatementBase.java   |   8 +-
 .../apache/impala/analysis/StmtRewriter.java    |   3 +-
 .../org/apache/impala/analysis/TableRef.java    |  12 +-
 .../org/apache/impala/analysis/ToSqlUtils.java  |  10 +-
 .../org/apache/impala/analysis/UnionStmt.java   |  15 +--
 .../org/apache/impala/analysis/UpdateStmt.java  |  10 +-
 .../org/apache/impala/analysis/WithClause.java  |   6 +-
 .../impala/analysis/AnalyzeSubqueriesTest.java  |   2 +-
 .../impala/analysis/ExprRewriterTest.java       | 114 +++++++++++++++++++
 17 files changed, 203 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
index 9ad0807..eeda2eb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
@@ -456,7 +456,7 @@ public class AnalysisContext {
       // reset() and re-analyzed. For a CTAS statement, the types represent column types
       // of the table that will be created, including the partition columns, if any.
       List<Type> origResultTypes = Lists.newArrayList();
-      for (Expr e: analysisResult_.stmt_.getResultExprs()) {
+      for (Expr e : analysisResult_.stmt_.getResultExprs()) {
         origResultTypes.add(e.getType());
       }
       List<String> origColLabels =
@@ -472,13 +472,20 @@ public class AnalysisContext {
       // Re-analyze the stmt with a new analyzer.
       analysisResult_.analyzer_ = createAnalyzer(stmtTableCache);
       analysisResult_.stmt_.reset();
-      analysisResult_.stmt_.analyze(analysisResult_.analyzer_);
+      try {
+        analysisResult_.stmt_.analyze(analysisResult_.analyzer_);
+      } catch (AnalysisException e) {
+        LOG.error(String.format("Error analyzing the rewritten query.\n" +
+            "Original SQL: %s\nRewritten SQL: %s", analysisResult_.stmt_.toSql(),
+            analysisResult_.stmt_.toSql(true)));
+        throw e;
+      }
 
       // Restore the original result types and column labels.
       analysisResult_.stmt_.castResultExprs(origResultTypes);
       analysisResult_.stmt_.setColLabels(origColLabels);
       if (LOG.isTraceEnabled()) {
-        LOG.trace("rewrittenStmt: " + analysisResult_.stmt_.toSql());
+        LOG.trace("Rewritten SQL: " + analysisResult_.stmt_.toSql(true));
       }
 
       // Restore privilege requests found during the first pass

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
index 3e03ef4..76b63c9 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
@@ -108,7 +108,9 @@ public class CreateTableAsSelectStmt extends StatementBase {
   public InsertStmt getInsertStmt() { return insertStmt_; }
   public CreateTableStmt getCreateStmt() { return createStmt_; }
   @Override
-  public String toSql() { return ToSqlUtils.getCreateTableSql(this); }
+  public String toSql(boolean rewritten) {
+    return ToSqlUtils.getCreateTableSql(this, rewritten);
+  }
 
   @Override
   public void collectTableRefs(List<TableRef> tblRefs) {

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/DeleteStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/DeleteStmt.java b/fe/src/main/java/org/apache/impala/analysis/DeleteStmt.java
index 52d58a7..3542548 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DeleteStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DeleteStmt.java
@@ -68,7 +68,9 @@ public class DeleteStmt extends ModifyStmt {
   }
 
   @Override
-  public String toSql() {
+  public String toSql(boolean rewritten) {
+    if (!rewritten && sqlString_ != null) return sqlString_;
+
     StringBuilder b = new StringBuilder();
     b.append("DELETE");
     if (fromClause_.size() > 1 || targetTableRef_.hasExplicitAlias()) {
@@ -76,10 +78,10 @@ public class DeleteStmt extends ModifyStmt {
       if (targetTableRef_.hasExplicitAlias()) {
         b.append(targetTableRef_.getExplicitAlias());
       } else {
-        b.append(targetTableRef_.toSql());
+        b.append(targetTableRef_.toSql(rewritten));
       }
     }
-    b.append(fromClause_.toSql());
+    b.append(fromClause_.toSql(rewritten));
     if (wherePredicate_ != null) {
       b.append(" WHERE ");
       b.append(wherePredicate_.toSql());

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/FromClause.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/FromClause.java b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
index 294c324..10a021e 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FromClause.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
@@ -113,11 +113,15 @@ public class FromClause implements ParseNode, Iterable<TableRef> {
 
   @Override
   public String toSql() {
+    return toSql(false);
+  }
+
+  public String toSql(boolean rewritten) {
     StringBuilder builder = new StringBuilder();
     if (!tableRefs_.isEmpty()) {
       builder.append(" FROM ");
       for (int i = 0; i < tableRefs_.size(); ++i) {
-        builder.append(tableRefs_.get(i).toSql());
+        builder.append(tableRefs_.get(i).toSql(rewritten));
       }
     }
     return builder.toString();

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
index 4fee2fa..dbcb59a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
@@ -324,6 +324,11 @@ public class InlineViewRef extends TableRef {
 
   @Override
   protected String tableRefToSql() {
+    return tableRefToSql(false);
+  }
+
+  @Override
+  protected String tableRefToSql(boolean rewritten) {
     // Enclose the alias in quotes if Hive cannot parse it without quotes.
     // This is needed for view compatibility between Impala and Hive.
     String aliasSql = null;
@@ -335,7 +340,7 @@ public class InlineViewRef extends TableRef {
     Preconditions.checkNotNull(aliasSql);
     StringBuilder sql = new StringBuilder()
         .append("(")
-        .append(queryStmt_.toSql())
+        .append(queryStmt_.toSql(rewritten))
         .append(") ")
         .append(aliasSql);
     // Add explicit col labels for debugging even though this syntax isn't supported.

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
index 3483d28..32efc2c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
@@ -895,10 +895,10 @@ public class InsertStmt extends StatementBase {
   }
 
   @Override
-  public String toSql() {
+  public String toSql(boolean rewritten) {
     StringBuilder strBuilder = new StringBuilder();
 
-    if (withClause_ != null) strBuilder.append(withClause_.toSql() + " ");
+    if (withClause_ != null) strBuilder.append(withClause_.toSql(rewritten) + " ");
 
     strBuilder.append(getOpName());
     if (!planHints_.isEmpty() && hintLoc_ == HintLocation.Start) {
@@ -927,7 +927,7 @@ public class InsertStmt extends StatementBase {
       strBuilder.append(" " + ToSqlUtils.getPlanHintsSql(getPlanHints()));
     }
     if (!needsGeneratedQueryStatement_) {
-      strBuilder.append(" " + queryStmt_.toSql());
+      strBuilder.append(" " + queryStmt_.toSql(rewritten));
     }
     return strBuilder.toString();
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/ModifyStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ModifyStmt.java b/fe/src/main/java/org/apache/impala/analysis/ModifyStmt.java
index ec24685..29266f6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ModifyStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ModifyStmt.java
@@ -89,6 +89,9 @@ public abstract class ModifyStmt extends StatementBase {
   // position in the target table. Set in createSourceStmt() during analysis.
   protected ArrayList<Integer> referencedColumns_;
 
+  // SQL string of the ModifyStmt. Set in analyze().
+  protected String sqlString_;
+
   public ModifyStmt(List<String> targetTablePath, FromClause fromClause,
       List<Pair<SlotRef, Expr>> assignmentExprs, Expr wherePredicate) {
     targetTablePath_ = Preconditions.checkNotNull(targetTablePath);
@@ -166,6 +169,8 @@ public abstract class ModifyStmt extends StatementBase {
     sourceStmt_.analyze(analyzer);
     // Add target table to descriptor table.
     analyzer.getDescTbl().setTargetTable(table_);
+
+    sqlString_ = toSql();
   }
 
   @Override
@@ -311,7 +316,5 @@ public abstract class ModifyStmt extends StatementBase {
   public QueryStmt getQueryStmt() { return sourceStmt_; }
   public abstract DataSink createDataSink();
   @Override
-  public abstract String toSql();
-
-
+  public abstract String toSql(boolean rewritten);
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index fda44cf..6156f0b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -934,17 +934,14 @@ public class SelectStmt extends QueryStmt {
     }
   }
 
-  /**
-   * Returns the SQL string corresponding to this SelectStmt.
-   */
   @Override
-  public String toSql() {
+  public String toSql(boolean rewritten) {
     // Return the SQL string before inline-view expression substitution.
-    if (sqlString_ != null) return sqlString_;
+    if (!rewritten && sqlString_ != null) return sqlString_;
 
     StringBuilder strBuilder = new StringBuilder();
     if (withClause_ != null) {
-      strBuilder.append(withClause_.toSql());
+      strBuilder.append(withClause_.toSql(rewritten));
       strBuilder.append(" ");
     }
 
@@ -961,7 +958,9 @@ public class SelectStmt extends QueryStmt {
       strBuilder.append((i+1 != selectList_.getItems().size()) ? ", " : "");
     }
     // From clause
-    if (!fromClause_.isEmpty()) { strBuilder.append(fromClause_.toSql()); }
+    if (!fromClause_.isEmpty()) {
+      strBuilder.append(fromClause_.toSql(rewritten));
+    }
     // Where clause
     if (whereClause_ != null) {
       strBuilder.append(" WHERE ");

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/StatementBase.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/StatementBase.java b/fe/src/main/java/org/apache/impala/analysis/StatementBase.java
index c770742..c4609de 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StatementBase.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StatementBase.java
@@ -130,7 +130,13 @@ public abstract class StatementBase implements ParseNode {
   public Analyzer getAnalyzer() { return analyzer_; }
   public boolean isAnalyzed() { return analyzer_ != null; }
 
-  public String toSql() { return ""; }
+  public String toSql() { return toSql(false); }
+  /**
+   * If rewritten is true, returns the rewritten SQL only if the statement was
+   * rewritten. Otherwise, the original SQL will be returned instead. It is the caller's
+   * responsibility to know if/when the statement was indeed rewritten.
+   */
+  public String toSql(boolean rewritten) { return ""; }
   public void setIsExplain() { isExplain_ = true; }
   public boolean isExplain() { return isExplain_; }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index 4206e99..edf380d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -97,8 +97,7 @@ public class StmtRewriter {
     // Currently only SubqueryRewriter touches the where clause. Recurse into the where
     // clause when the need arises.
     rewriteSelectStmtHook(stmt, analyzer);
-    stmt.sqlString_ = null;
-    if (LOG.isTraceEnabled()) LOG.trace("rewritten stmt: " + stmt.toSql());
+    if (LOG.isTraceEnabled()) LOG.trace("Rewritten SQL: " + stmt.toSql(true));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/TableRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index 86cc25b..98861d1 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -572,17 +572,25 @@ public class TableRef implements ParseNode {
     return ToSqlUtils.getPathSql(path) + ((aliasSql != null) ? " " + aliasSql : "");
   }
 
+  protected String tableRefToSql(boolean rewritten) {
+    return tableRefToSql();
+  }
+
   @Override
   public String toSql() {
+    return toSql(false);
+  }
+
+  public String toSql(boolean rewritten) {
     if (joinOp_ == null) {
       // prepend "," if we're part of a sequence of table refs w/o an
       // explicit JOIN clause
-      return (leftTblRef_ != null ? ", " : "") + tableRefToSql();
+      return (leftTblRef_ != null ? ", " : "") + tableRefToSql(rewritten);
     }
 
     StringBuilder output = new StringBuilder(" " + joinOp_.toString() + " ");
     if(!joinHints_.isEmpty()) output.append(ToSqlUtils.getPlanHintsSql(joinHints_) + " ");
-    output.append(tableRefToSql());
+    output.append(tableRefToSql(rewritten));
     if (usingColNames_ != null) {
       output.append(" USING (").append(Joiner.on(", ").join(usingColNames_)).append(")");
     } else if (onClause_ != null) {

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
index 444e211..cdd67ce 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
@@ -186,9 +186,13 @@ public class ToSqlUtils {
 
   /**
    * Returns the "CREATE TABLE" SQL string corresponding to the given
-   * CreateTableAsSelectStmt statement.
+   * CreateTableAsSelectStmt statement. If rewritten is true, returns the rewritten SQL
+   * only if the statement was rewritten. Otherwise, the original SQL will be returned
+   * instead. It is the caller's responsibility to know if/when the statement was indeed
+   * rewritten.
    */
-  public static String getCreateTableSql(CreateTableAsSelectStmt stmt) {
+  public static String getCreateTableSql(CreateTableAsSelectStmt stmt,
+      boolean rewritten) {
     CreateTableStmt innerStmt = stmt.getCreateStmt();
     // Only add partition column labels to output. Table columns must not be specified as
     // they are deduced from the select statement.
@@ -212,7 +216,7 @@ public class ToSqlUtils {
         innerStmt.isExternal(), innerStmt.getIfNotExists(), innerStmt.getRowFormat(),
         HdfsFileFormat.fromThrift(innerStmt.getFileFormat()), HdfsCompression.NONE, null,
         innerStmt.getLocation());
-    return createTableSql + " AS " + stmt.getQueryStmt().toSql();
+    return createTableSql + " AS " + stmt.getQueryStmt().toSql(rewritten);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
index 1e58fad..313479e 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
@@ -551,24 +551,25 @@ public class UnionStmt extends QueryStmt {
   }
 
   @Override
-  public String toSql() {
-    if (toSqlString_ != null) return toSqlString_;
+  public String toSql(boolean rewritten) {
+    if (!rewritten && toSqlString_ != null) return toSqlString_;
+
     StringBuilder strBuilder = new StringBuilder();
     Preconditions.checkState(operands_.size() > 0);
 
     if (withClause_ != null) {
-      strBuilder.append(withClause_.toSql());
+      strBuilder.append(withClause_.toSql(rewritten));
       strBuilder.append(" ");
     }
 
-    strBuilder.append(operands_.get(0).getQueryStmt().toSql());
+    strBuilder.append(operands_.get(0).getQueryStmt().toSql(rewritten));
     for (int i = 1; i < operands_.size() - 1; ++i) {
       strBuilder.append(" UNION " +
           ((operands_.get(i).getQualifier() == Qualifier.ALL) ? "ALL " : ""));
       if (operands_.get(i).getQueryStmt() instanceof UnionStmt) {
         strBuilder.append("(");
       }
-      strBuilder.append(operands_.get(i).getQueryStmt().toSql());
+      strBuilder.append(operands_.get(i).getQueryStmt().toSql(rewritten));
       if (operands_.get(i).getQueryStmt() instanceof UnionStmt) {
         strBuilder.append(")");
       }
@@ -583,10 +584,10 @@ public class UnionStmt extends QueryStmt {
             !lastQueryStmt.hasLimit() && !lastQueryStmt.hasOffset() &&
             !lastQueryStmt.hasOrderByClause())) {
       strBuilder.append("(");
-      strBuilder.append(lastQueryStmt.toSql());
+      strBuilder.append(lastQueryStmt.toSql(rewritten));
       strBuilder.append(")");
     } else {
-      strBuilder.append(lastQueryStmt.toSql());
+      strBuilder.append(lastQueryStmt.toSql(rewritten));
     }
     // Order By clause
     if (hasOrderByClause()) {

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/UpdateStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/UpdateStmt.java b/fe/src/main/java/org/apache/impala/analysis/UpdateStmt.java
index ddce618..3a977b7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UpdateStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UpdateStmt.java
@@ -77,17 +77,19 @@ public class UpdateStmt extends ModifyStmt {
   }
 
   @Override
-  public String toSql() {
+  public String toSql(boolean rewritten) {
+    if (!rewritten && sqlString_ != null) return sqlString_;
+
     StringBuilder b = new StringBuilder();
     b.append("UPDATE ");
 
     if (fromClause_ == null) {
-      b.append(targetTableRef_.toSql());
+      b.append(targetTableRef_.toSql(rewritten));
     } else {
       if (targetTableRef_.hasExplicitAlias()) {
         b.append(targetTableRef_.getExplicitAlias());
       } else {
-        b.append(targetTableRef_.toSql());
+        b.append(targetTableRef_.toSql(rewritten));
       }
     }
     b.append(" SET");
@@ -104,7 +106,7 @@ public class UpdateStmt extends ModifyStmt {
           i.second.toSql()));
     }
 
-    b.append(fromClause_.toSql());
+    b.append(fromClause_.toSql(rewritten));
 
     if (wherePredicate_ != null) {
       b.append(" WHERE ");

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/main/java/org/apache/impala/analysis/WithClause.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/WithClause.java b/fe/src/main/java/org/apache/impala/analysis/WithClause.java
index 4f93f36..d253d91 100644
--- a/fe/src/main/java/org/apache/impala/analysis/WithClause.java
+++ b/fe/src/main/java/org/apache/impala/analysis/WithClause.java
@@ -117,6 +117,10 @@ public class WithClause implements ParseNode {
 
   @Override
   public String toSql() {
+    return toSql(false);
+  }
+
+  public String toSql(boolean rewritten) {
     List<String> viewStrings = Lists.newArrayList();
     for (View view: views_) {
       // Enclose the view alias and explicit labels in quotes if Hive cannot parse it
@@ -126,7 +130,7 @@ public class WithClause implements ParseNode {
         aliasSql += "(" + Joiner.on(", ").join(
             ToSqlUtils.getIdentSqlList(view.getOriginalColLabels())) + ")";
       }
-      viewStrings.add(aliasSql + " AS (" + view.getQueryStmt().toSql() + ")");
+      viewStrings.add(aliasSql + " AS (" + view.getQueryStmt().toSql(rewritten) + ")");
     }
     return "WITH " + Joiner.on(",").join(viewStrings);
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index 06e834d..6e19189 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -711,7 +711,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
         " where a.int_col between t.tinyint_col and t.bigint_col)",
         "Unsupported predicate with subquery: " +
         "EXISTS (SELECT id FROM functional.alltypessmall a " +
-        "WHERE a.int_col >= t.tinyint_col AND a.int_col <= t.bigint_col)");
+        "WHERE a.int_col BETWEEN t.tinyint_col AND t.bigint_col)");
 
     // Uncorrelated EXISTS in a query with GROUP BY
     AnalyzesOk("select id, count(*) from functional.alltypes t " +

http://git-wip-us.apache.org/repos/asf/impala/blob/b1ec9e37/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index de58d31..20d4fd9 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -24,6 +24,7 @@ import org.apache.impala.common.RuntimeEnv;
 import org.apache.impala.rewrite.EqualityDisjunctsToInRule;
 import org.apache.impala.rewrite.ExprRewriteRule;
 import org.apache.impala.rewrite.ExprRewriter;
+import org.apache.impala.thrift.TQueryOptions;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -223,4 +224,117 @@ public class ExprRewriterTest extends AnalyzerTest {
       CheckNumChangesByEqualityDisjunctsToInRule(stmtSb.toString(), 1);
     }
   }
+
+  @Test
+  public void TestToSql() {
+    TQueryOptions options = new TQueryOptions();
+    options.setEnable_expr_rewrites(true);
+    AnalysisContext ctx = createAnalysisCtx(options);
+
+    //----------------------
+    // Test query rewrites.
+    //----------------------
+    assertToSql(ctx, "select 1 + 1", "SELECT 1 + 1", "SELECT 2");
+
+    assertToSql(ctx,
+        "select (case when true then 1 else id end) from functional.alltypes " +
+        "union " +
+        "select 1 + 1",
+        "SELECT (CASE WHEN TRUE THEN 1 ELSE id END) FROM functional.alltypes " +
+        "UNION " +
+        "SELECT 1 + 1",
+        "SELECT 1 FROM functional.alltypes UNION SELECT 2");
+
+    assertToSql(ctx,
+        "values(1, '2', 3, 4.1), (1, '2', 3, 4.1)",
+        "VALUES((1, '2', 3, 4.1), (1, '2', 3, 4.1))",
+        "SELECT 1, '2', 3, 4.1 UNION ALL SELECT 1, '2', 3, 4.1");
+
+    //-------------------------
+    // Test subquery rewrites.
+    //-------------------------
+    assertToSql(ctx, "select * from (" +
+        "select * from functional.alltypes where id = (select 1 + 1)) a",
+        "SELECT * FROM (SELECT * FROM functional.alltypes WHERE id = (SELECT 1 + 1)) a",
+        "SELECT * FROM (SELECT * FROM functional.alltypes LEFT SEMI JOIN " +
+        "(SELECT 2) `$a$1` (`$c$1`) ON id = `$a$1`.`$c$1`) a");
+
+    assertToSql(ctx,
+        "select * from (select * from functional.alltypes where id = (select 1 + 1)) a " +
+        "union " +
+        "select * from (select * from functional.alltypes where id = (select 1 + 1)) b",
+        "SELECT * FROM (SELECT * FROM functional.alltypes WHERE id = (SELECT 1 + 1)) a " +
+        "UNION " +
+        "SELECT * FROM (SELECT * FROM functional.alltypes WHERE id = (SELECT 1 + 1)) b",
+        "SELECT * FROM (SELECT * FROM functional.alltypes LEFT SEMI JOIN (SELECT 2) " +
+        "`$a$1` (`$c$1`) ON id = `$a$1`.`$c$1`) a " +
+        "UNION " +
+        "SELECT * FROM (SELECT * FROM functional.alltypes LEFT SEMI JOIN (SELECT 2) " +
+        "`$a$1` (`$c$1`) ON id = `$a$1`.`$c$1`) b");
+
+    assertToSql(ctx, "select * from " +
+        "(select (case when true then 1 else id end) from functional.alltypes " +
+        "union select 1 + 1) v",
+        "SELECT * FROM (SELECT (CASE WHEN TRUE THEN 1 ELSE id END) " +
+        "FROM functional.alltypes UNION SELECT 1 + 1) v",
+        "SELECT * FROM (SELECT 1 FROM functional.alltypes " +
+        "UNION SELECT 2) v");
+
+    //---------------------
+    // Test CTAS rewrites.
+    //---------------------
+    assertToSql(ctx,
+        "create table ctas_test as select 1 + 1",
+        "CREATE TABLE default.ctas_test\n" +
+        "STORED AS TEXTFILE\n" +
+        " AS SELECT 1 + 1",
+        "CREATE TABLE default.ctas_test\n" +
+        "STORED AS TEXTFILE\n" +
+        " AS SELECT 2");
+
+    //--------------------
+    // Test DML rewrites.
+    //--------------------
+    // Insert
+    assertToSql(ctx,
+        "insert into functional.alltypes(id) partition(year=2009, month=10) " +
+        "select 1 + 1",
+        "INSERT INTO TABLE functional.alltypes(id) " +
+        "PARTITION (year=2009, month=10) SELECT 1 + 1",
+        "INSERT INTO TABLE functional.alltypes(id) " +
+        "PARTITION (year=2009, month=10) SELECT 2");
+
+    if (RuntimeEnv.INSTANCE.isKuduSupported()) {
+      // Update.
+      assertToSql(ctx, "update functional_kudu.alltypes " +
+          "set string_col = 'test' where id = (select 1 + 1)",
+          "UPDATE functional_kudu.alltypes SET string_col = 'test' " +
+          "FROM functional_kudu.alltypes WHERE id = (SELECT 1 + 1)",
+          "UPDATE functional_kudu.alltypes SET string_col = 'test' " +
+          "FROM functional_kudu.alltypes LEFT SEMI JOIN (SELECT 2) `$a$1` (`$c$1`) " +
+          "ON id = `$a$1`.`$c$1` WHERE id = (SELECT 1 + 1)");
+
+      // Delete
+      assertToSql(ctx, "delete functional_kudu.alltypes " +
+          "where id = (select 1 + 1)",
+          "DELETE FROM functional_kudu.alltypes " +
+          "WHERE id = (SELECT 1 + 1)",
+          "DELETE functional_kudu.alltypes " +
+          "FROM functional_kudu.alltypes LEFT SEMI JOIN (SELECT 2) `$a$1` (`$c$1`) " +
+          "ON id = `$a$1`.`$c$1` WHERE id = (SELECT 1 + 1)");
+    }
+
+    // We don't do any rewrite for WITH clause.
+    StatementBase stmt = (StatementBase) AnalyzesOk("with t as (select 1 + 1) " +
+        "select id from functional.alltypes union select id from functional.alltypesagg",
+        ctx);
+    Assert.assertEquals(stmt.toSql(), stmt.toSql(true));
+  }
+
+  private void assertToSql(AnalysisContext ctx, String query, String expectedToSql,
+      String expectedToRewrittenSql) {
+    StatementBase stmt = (StatementBase) AnalyzesOk(query, ctx);
+    Assert.assertEquals(expectedToSql, stmt.toSql());
+    Assert.assertEquals(expectedToRewrittenSql, stmt.toSql(true));
+  }
 }


[4/7] impala git commit: IMPALA-7304: Don't write floating column index until PARQUET-1222 is resolved.

Posted by jo...@apache.org.
IMPALA-7304: Don't write floating column index until PARQUET-1222 is resolved.

Impala master branch can already write the Parquet
page index. However, we still don't have a well-defined
ordering for floating-point numbers in Parquet, see
PARQUET-1222

Currently impala writes the page index with
fmax()/fmin() semantics, but it might contradicts the
future semantics that will be defined once PARQUET-1222
is resolved.

>From this patch Impala won't write the column index
for floating-point columns until PARQUET-1222 is
resolved and implemented.

I updated the python test accordingly.

Change-Id: I50aa2e6607de6a8943eb068b8162b0506763078b
Reviewed-on: http://gerrit.cloudera.org:8080/10951
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/04119744
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/04119744
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/04119744

Branch: refs/heads/master
Commit: 041197444d2a73bc3e3da4c6dbfdf1d63c236fbf
Parents: 27c788f
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Mon Jul 16 14:24:45 2018 +0200
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Jul 17 12:27:55 2018 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-parquet-table-writer.cc    | 6 ++++++
 tests/query_test/test_parquet_page_index.py | 5 +++++
 2 files changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/04119744/be/src/exec/hdfs-parquet-table-writer.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-table-writer.cc b/be/src/exec/hdfs-parquet-table-writer.cc
index 91a2084..8aa4f7a 100644
--- a/be/src/exec/hdfs-parquet-table-writer.cc
+++ b/be/src/exec/hdfs-parquet-table-writer.cc
@@ -338,10 +338,16 @@ class HdfsParquetTableWriter::ColumnWriter :
       plain_encoded_value_size_(
           ParquetPlainEncoder::EncodedByteSize(eval->root().type())) {
     DCHECK_NE(eval->root().type().type, TYPE_BOOLEAN);
+    // IMPALA-7304: Don't write column index for floating-point columns until
+    // PARQUET-1222 is resolved.
+    if (std::is_floating_point<T>::value) valid_column_index_ = false;
   }
 
   virtual void Reset() {
     BaseColumnWriter::Reset();
+    // IMPALA-7304: Don't write column index for floating-point columns until
+    // PARQUET-1222 is resolved.
+    if (std::is_floating_point<T>::value) valid_column_index_ = false;
     // Default to dictionary encoding.  If the cardinality ends up being too high,
     // it will fall back to plain.
     current_encoding_ = parquet::Encoding::PLAIN_DICTIONARY;

http://git-wip-us.apache.org/repos/asf/impala/blob/04119744/tests/query_test/test_parquet_page_index.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_parquet_page_index.py b/tests/query_test/test_parquet_page_index.py
index 0ee5d37..6235819 100644
--- a/tests/query_test/test_parquet_page_index.py
+++ b/tests/query_test/test_parquet_page_index.py
@@ -226,6 +226,11 @@ class TestHdfsParquetTableIndexWriter(ImpalaTestSuite):
           index_size = len(column_info.offset_index.page_locations)
           assert index_size > 0
           self._validate_page_locations(column_info.offset_index.page_locations)
+          # IMPALA-7304: Impala doesn't write column index for floating-point columns
+          # until PARQUET-1222 is resolved.
+          if column_info.schema.type in [4, 5]:
+            assert column_info.column_index is None
+            continue
           self._validate_null_stats(index_size, column_info)
           self._validate_min_max_values(index_size, column_info)
           self._validate_boundary_order(column_info)