You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2017/01/12 05:21:23 UTC

[1/6] incubator-impala git commit: IMPALA-3671: Add SCRATCH_LIMIT query option.

Repository: incubator-impala
Updated Branches:
  refs/heads/master 75027c913 -> ed0aa66ee


IMPALA-3671: Add SCRATCH_LIMIT query option.

Clarify comment about # of DataNodes.

Change-Id: I662a59a59da8a170a2710d4a5245363ae1e3f754
Reviewed-on: http://gerrit.cloudera.org:8080/5651
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6a2c9049
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6a2c9049
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6a2c9049

Branch: refs/heads/master
Commit: 6a2c9049ffe1fc914508c880ab66eaae8bd5f07e
Parents: 75027c9
Author: John Russell <jr...@cloudera.com>
Authored: Mon Jan 9 16:34:58 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 11 23:38:00 2017 +0000

----------------------------------------------------------------------
 docs/impala.ditamap                  |   1 +
 docs/topics/impala_scratch_limit.xml | 100 ++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6a2c9049/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index fe4624c..380f25b 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -1027,6 +1027,7 @@ https://issues.cloudera.org/secure/IssueNavigator.jspa?reset=true&amp;jqlQuery=p
           <topicref rev="2.6.0" href="topics/impala_s3_skip_insert_staging.xml"/>
           <keydef href="topics/impala_s3_skip_insert_staging.xml" keys="s3_skip_insert_staging"/>
           <topicref rev="2.5.0" href="topics/impala_scan_node_codegen_threshold.xml"/>
+          <topicref rev="2.8.0 IMPALA-3671" href="topics/impala_scratch_limit.xml"/>
           <keydef href="topics/impala_scan_node_codegen_threshold.xml" keys="scan_node_codegen_threshold"/>
           <!-- This option is for internal use only and might go away without ever being documented. -->
           <!-- <topicref href="topics/impala_seq_compression_mode.xml"/> -->

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6a2c9049/docs/topics/impala_scratch_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_scratch_limit.xml b/docs/topics/impala_scratch_limit.xml
new file mode 100644
index 0000000..79ec987
--- /dev/null
+++ b/docs/topics/impala_scratch_limit.xml
@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="IMPALA-3671" id="scratch_limit">
+
+  <title>SCRATCH_LIMIT Query Option</title>
+  <titlealts audience="PDF"><navtitle>SCRATCH_LIMIT</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Memory"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="hidden">SCRATCH_LIMIT query option</indexterm>
+      Specifies the maximum amount of disk storage, in bytes, that any Impala query can consume
+      on any host using the <q>spill to disk</q> mechanism that handles queries that exceed
+      the memory limit.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      Specify the size in bytes, or with a trailing <codeph>m</codeph> or <codeph>g</codeph> character to indicate
+      megabytes or gigabytes. For example:
+    </p>
+
+<!-- Examples adapted from impala_parquet_file_size.xml.
+     To do: demonstrate with queries that succeed / fail
+     based on the query option setting, and interaction
+     with MEM_LIMIT. -->
+<codeblock>-- 128 megabytes.
+set SCRATCH_LIMIT=134217728
+
+-- 512 megabytes.
+set SCRATCH_LIMIT=512m;
+
+-- 1 gigabyte.
+set SCRATCH_LIMIT=1g;
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      A value of zero turns off the spill to disk feature for queries
+      in the current session, causing them to fail immediately if they
+      exceed the memory limit.
+    </p>
+
+    <p>
+      The amount of memory used per host for a query is limited by the
+      <codeph>MEM_LIMIT</codeph> query option.
+    </p>
+
+    <p>
+      The more Impala daemon hosts in the cluster, the less memory is used on each host,
+      and therefore also less scratch space is required for queries that
+      exceed the memory limit.
+    </p>
+
+    <p>
+      <b>Type:</b> numeric, with optional unit specifier
+    </p>
+
+    <p>
+      <b>Default:</b> -1 (amount of spill space is unlimited)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_scalability.xml#spill_to_disk"/>,
+      <xref href="impala_mem_limit.xml#mem_limit"/>
+    </p>
+
+  </conbody>
+</concept>


[3/6] incubator-impala git commit: Add Kudu cmake utilities

Posted by he...@apache.org.
Add Kudu cmake utilities

This commit imports some CMake utility methods from Kudu, in preparation
for adding KRPC and its dependencies to Impala's build.

The methods are unused in this patch, but will be used both by
thirdparty dependencies (e.g. Protobuf) and by the Kudu libraries
themselves.

Some methods are stubbed out to make it easier to import Kudu's
CMakeLists.txt files without adding extra test targets etc. to Impala's
build.

Change-Id: Ibaae645d650ab1555452e4cc2574d6c84a90d941
Reviewed-on: http://gerrit.cloudera.org:8080/5656
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/44bb99a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/44bb99a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/44bb99a6

Branch: refs/heads/master
Commit: 44bb99a61d03ac52097a7d2f7928a0c5be13a7de
Parents: 70ae2e3
Author: Henry Robinson <he...@cloudera.com>
Authored: Tue Dec 13 04:20:28 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Jan 12 02:53:45 2017 +0000

----------------------------------------------------------------------
 CMakeLists.txt                   |  35 +++++++++-
 cmake_modules/kudu_cmake_fns.txt | 126 ++++++++++++++++++++++++++++++++++
 2 files changed, 160 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44bb99a6/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 661c9a6..4c5d0d3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,12 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 3.2.3)
 
 # Explicitly define project() to allow modifying the compiler before the project is
 # initialized.
 project(Impala)
 
+include(cmake_modules/kudu_cmake_fns.txt)
+
 if (NOT DEFINED BUILD_SHARED_LIBS)
   set(BUILD_SHARED_LIBS OFF)
 endif()
@@ -125,6 +127,7 @@ include_directories(${BZIP2_INCLUDE_DIR})
 
 find_package(Zlib REQUIRED)
 include_directories(${ZLIB_INCLUDE_DIR})
+message(STATUS "zlib: Static -> ${ZLIB_STATIC}, Other -> ${ZLIB_LIBRARIES}")
 
 if(NOT BUILD_SHARED_LIBS)
   # Panic if we cannot find the static libraries as they are supposed to be
@@ -267,6 +270,36 @@ set(LIBS ${LIBS} ${AVRO_STATIC_LIB})
 message(STATUS "Avro include dir: " ${AVRO_INCLUDE_DIR})
 message(STATUS "Avro static library: " ${AVRO_STATIC_LIB})
 
+###################################################################
+# These dependencies use Kudu's CMake functions
+
+ADD_THIRDPARTY_LIB(glog
+  STATIC_LIB ${GLOG_STATIC_LIB})
+ADD_THIRDPARTY_LIB(gflags
+  STATIC_LIB ${GFLAGS_STATIC_LIB})
+ADD_THIRDPARTY_LIB(zlib
+  STATIC_LIB ${ZLIB_STATIC_LIBRARIES})
+ADD_THIRDPARTY_LIB(cyrus_sasl
+  SHARED_LIB ${SASL_SHARED_LIB})
+
+if (NOT APPLE)
+  find_library(RT_LIB_PATH rt)
+  if(NOT RT_LIB_PATH)
+    message(FATAL_ERROR "Could not find librt on the system path")
+  endif()
+  ADD_THIRDPARTY_LIB(rt
+    SHARED_LIB "${RT_LIB_PATH}")
+
+  find_library(DL_LIB_PATH dl)
+  if(NOT DL_LIB_PATH)
+    message(FATAL_ERROR "Could not find libdl on the system path")
+  endif()
+  ADD_THIRDPARTY_LIB(dl
+    SHARED_LIB "${DL_LIB_PATH}")
+endif()
+
+###################################################################
+
 # KuduClient can use GLOG
 add_definitions(-DKUDU_HEADERS_USE_GLOG)
 if(NOT $ENV{KUDU_CLIENT_DIR} EQUAL "")

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44bb99a6/cmake_modules/kudu_cmake_fns.txt
----------------------------------------------------------------------
diff --git a/cmake_modules/kudu_cmake_fns.txt b/cmake_modules/kudu_cmake_fns.txt
new file mode 100644
index 0000000..8cd2cbc
--- /dev/null
+++ b/cmake_modules/kudu_cmake_fns.txt
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file was adapted from https://github.com/apache/kudu/blob/master/CMakeLists.txt.
+# It adds two useful cmake methods (ADD_EXPORTABLE_LIBRARY and ADD_THIRDPARTY_LIB), and
+# also defines a shim method (ADD_KUDU_TEST) to simplify importing Kudu's utility
+# libraries.
+
+cmake_minimum_required(VERSION 3.2.3)
+include(CMakeParseArguments)
+
+# add_library() wrapper provided for compatibility with Kudu. In the original version,
+# this would add a second variant of the library, which is compiled with special
+# visibility flags to hide all symbols except those that are part of the public ABI. Here
+# it is a shim that simply calls add_library() to make the library available for internal
+# linking.
+#
+# Arguments:
+#
+# LIB_NAME is the name of the library. It must come first. Required.
+#
+# SRCS is the list of source files to compile into the library. Required.
+#
+# DEPS is the list of targets that both library variants depend on. Required.
+#
+# The following arguments are all optional, and supported for compatibility, but don't
+# have any effect:
+#
+#  NONLINK_DEPS, COMPILE_FLAGS, EXPORTED_SHARED, EXPORTED_OUTPUT_NAME,
+#  EXPORTED_OUTPUT_DIRECTORY, EXPORTED_DEPS
+function(ADD_EXPORTABLE_LIBRARY LIB_NAME)
+  set(options EXPORTED_SHARED)
+  set(one_value_args COMPILE_FLAGS EXPORTED_OUTPUT_NAME EXPORTED_OUTPUT_DIRECTORY)
+  set(multi_value_args SRCS DEPS EXPORTED_DEPS NONLINK_DEPS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIB_NAME}")
+  set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIB_NAME}")
+
+  add_library(${LIB_NAME} ${ARG_SRCS})
+  if (ARG_DEPS)
+    add_dependencies(${LIB_NAME} ${ARG_DEPS})
+  endif()
+endfunction()
+
+############################################################
+# Testing
+############################################################
+
+function(ADD_KUDU_TEST REL_TEST_NAME)
+  # Shim for compatibility, doesn't do anything.
+endfunction()
+
+
+############################################################
+# Dependencies
+############################################################
+function(ADD_THIRDPARTY_LIB LIB_NAME)
+  set(options)
+  set(one_value_args SHARED_LIB STATIC_LIB)
+  set(multi_value_args DEPS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(("${KUDU_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB))
+    if(NOT ARG_STATIC_LIB)
+      message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+    endif()
+    add_library(${LIB_NAME} STATIC IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+  else()
+    add_library(${LIB_NAME} SHARED IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+  endif()
+
+  if(ARG_DEPS)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
+  endif()
+
+  # Set up an "exported variant" for this thirdparty library (see "Visibility"
+  # above). It's the same as the real target, just with an "_exported" suffix.
+  # We prefer the static archive if it exists (as it's akin to an "internal"
+  # library), but we'll settle for the shared object if we must.
+  #
+  # A shared object exported variant will force any "leaf" library that
+  # transitively depends on it to also depend on it at runtime; this is
+  # desirable for some libraries (e.g. cyrus_sasl).
+  set(LIB_NAME_EXPORTED ${LIB_NAME}_exported)
+  if(ARG_STATIC_LIB)
+    add_library(${LIB_NAME_EXPORTED} STATIC IMPORTED)
+    set_target_properties(${LIB_NAME_EXPORTED}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+  else()
+    add_library(${LIB_NAME_EXPORTED} SHARED IMPORTED)
+    set_target_properties(${LIB_NAME_EXPORTED}
+      PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+  endif()
+  if(ARG_DEPS)
+    set_target_properties(${LIB_NAME_EXPORTED}
+      PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
+  endif()
+endfunction()


[6/6] incubator-impala git commit: IMPALA-4650: Allow protobuf to find non-system libraries and binaries

Posted by he...@apache.org.
IMPALA-4650: Allow protobuf to find non-system libraries and binaries

This change makes PROTOBUF_GENERATE_CPP able to pick up Protobuf
libraries and binaries that are found by CMake but not installed on the
system LD_LIBRARY_PATH.

Change-Id: I942b3f18e25e2abc5aac167412b65abb680d3c5a
Reviewed-on: http://gerrit.cloudera.org:8080/5658
Tested-by: Impala Public Jenkins
Reviewed-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ed0aa66e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ed0aa66e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ed0aa66e

Branch: refs/heads/master
Commit: ed0aa66ee1ec3dd31f451ec70432bf58c6b682c4
Parents: 4b3fdc3
Author: Henry Robinson <he...@cloudera.com>
Authored: Fri Dec 2 21:29:22 2016 -0800
Committer: Henry Robinson <he...@cloudera.com>
Committed: Thu Jan 12 05:18:33 2017 +0000

----------------------------------------------------------------------
 cmake_modules/FindProtobuf.cmake | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed0aa66e/cmake_modules/FindProtobuf.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindProtobuf.cmake b/cmake_modules/FindProtobuf.cmake
index a1092c1..4c2ab2f 100644
--- a/cmake_modules/FindProtobuf.cmake
+++ b/cmake_modules/FindProtobuf.cmake
@@ -139,6 +139,7 @@ function(PROTOBUF_GENERATE_CPP SRCS HDRS TGTS)
     SET(ARG_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}")
   endif()
   GET_FILENAME_COMPONENT(ARG_BINARY_ROOT ${ARG_BINARY_ROOT} ABSOLUTE)
+  GET_FILENAME_COMPONENT(PROTO_LIB_DIR ${PROTOBUF_SHARED_LIBRARY} DIRECTORY)
 
   foreach(FIL ${ARG_PROTO_FILES})
     get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
@@ -161,7 +162,9 @@ function(PROTOBUF_GENERATE_CPP SRCS HDRS TGTS)
 
     add_custom_command(
       OUTPUT "${PROTO_CC_OUT}" "${PROTO_H_OUT}"
-      COMMAND  ${PROTOBUF_PROTOC_EXECUTABLE}
+      COMMAND  ${CMAKE_COMMAND}
+        -E env "LD_LIBRARY_PATH=${PROTO_LIB_DIR}:$ENV{LD_LIBRARY_PATH}"
+        ${PROTOBUF_PROTOC_EXECUTABLE}
       ARGS
         --plugin $<TARGET_FILE:protoc-gen-insertions>
         --cpp_out ${ARG_BINARY_ROOT}


[4/6] incubator-impala git commit: IMPALA-4341: Add metadata load to planner timeline

Posted by he...@apache.org.
IMPALA-4341: Add metadata load to planner timeline

This moves the timeline from the Analyzer GlobalState to the AnalysisContext
and AnalysisContext.AnalysisResult. When analysis needs to load metadata
about missing tables, it marks an event noting the start of metadata load.
Then, when metadata load completes (or times out), it marks an
event noting that metadata load completed (or timed out). Keeping the
timeline on the AnalysisContext means that it persists across attempts at
analysis. AnalysisContext.AnalysisResult has a reference to the timeline,
so that it persists past analyzeStmt and can be used for the rest of
the planning.

Here is an example output of the planner timeline after this change:
    Planner Timeline: 4s371ms
       - Metadata load started: 41.388ms (41.388ms)
       - Metadata load finished: 4s260ms (4s219ms)
       - Analysis finished: 4s296ms (35.693ms)
       - Equivalence classes computed: 4s315ms (19.062ms)
       - Single node plan created: 4s323ms (7.812ms)
       - Runtime filters computed: 4s323ms (777.010us)
       - Distributed plan created: 4s325ms (1.464ms)
       - Planning finished: 4s371ms (46.697ms)

When there is no need to load metadata, the timeline looks like:
    Planner Timeline: 13.695ms
       - Analysis finished: 2.411ms (2.411ms)
       - Equivalence classes computed: 2.653ms (241.733us)
       - Single node plan created: 5.641ms (2.987ms)
       - Runtime filters computed: 5.726ms (85.204us)
       - Distributed plan created: 6.548ms (821.722us)
       - Planning finished: 13.695ms (7.147ms)

Change-Id: I6f01a35e5f9f5007a0298acfc8e16da00ef99c6c
Reviewed-on: http://gerrit.cloudera.org:8080/5685
Reviewed-by: Marcel Kornacker <ma...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5d028d93
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5d028d93
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5d028d93

Branch: refs/heads/master
Commit: 5d028d93b9b796af448c72396d99ccba43790f72
Parents: 44bb99a
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Wed Jan 11 13:41:00 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Jan 12 03:54:15 2017 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/analysis/AnalysisContext.java  |  9 +++++++++
 fe/src/main/java/org/apache/impala/analysis/Analyzer.java |  7 -------
 fe/src/main/java/org/apache/impala/planner/Planner.java   | 10 +++++-----
 .../java/org/apache/impala/planner/SingleNodePlanner.java |  2 +-
 fe/src/main/java/org/apache/impala/service/Frontend.java  | 10 ++++++++--
 5 files changed, 23 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d028d93/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
index d57c25f..c74acc5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
@@ -43,6 +43,7 @@ import org.apache.impala.rewrite.FoldConstantsRule;
 import org.apache.impala.thrift.TAccessEvent;
 import org.apache.impala.thrift.TLineageGraph;
 import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.util.EventSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -60,6 +61,10 @@ public class AnalysisContext {
   private final AuthorizationConfig authzConfig_;
   private final ExprRewriter rewriter_;
 
+  // Timeline of important events in the planning process, used for debugging
+  // and profiling
+  private final EventSequence timeline_ = new EventSequence("Planner Timeline");
+
   // Set in analyze()
   private AnalysisResult analysisResult_;
 
@@ -93,6 +98,7 @@ public class AnalysisContext {
   static public class AnalysisResult {
     private StatementBase stmt_;
     private Analyzer analyzer_;
+    private EventSequence timeline_;
 
     public boolean isAlterTableStmt() { return stmt_ instanceof AlterTableStmt; }
     public boolean isAlterViewStmt() { return stmt_ instanceof AlterViewStmt; }
@@ -332,6 +338,7 @@ public class AnalysisContext {
 
     public StatementBase getStmt() { return stmt_; }
     public Analyzer getAnalyzer() { return analyzer_; }
+    public EventSequence getTimeline() { return timeline_; }
     public Set<TAccessEvent> getAccessEvents() { return analyzer_.getAccessEvents(); }
     public boolean requiresSubqueryRewrite() {
       return analyzer_.containsSubquery() && !(stmt_ instanceof CreateViewStmt)
@@ -375,6 +382,7 @@ public class AnalysisContext {
       if (analysisResult_.analyzer_ == null) {
         analysisResult_.analyzer_ = new Analyzer(catalog_, queryCtx_, authzConfig_);
       }
+      analysisResult_.timeline_ = timeline_;
       analysisResult_.stmt_ = (StatementBase) parser.parse().value;
       if (analysisResult_.stmt_ == null) return;
 
@@ -583,4 +591,5 @@ public class AnalysisContext {
 
   public AnalysisResult getAnalysisResult() { return analysisResult_; }
   public Analyzer getAnalyzer() { return getAnalysisResult().getAnalyzer(); }
+  public EventSequence getTimeline() { return timeline_; }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d028d93/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 4d638a8..ca09b05 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -68,7 +68,6 @@ import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.DisjointSet;
-import org.apache.impala.util.EventSequence;
 import org.apache.impala.util.ListMap;
 import org.apache.impala.util.TSessionStateUtil;
 import org.slf4j.Logger;
@@ -303,10 +302,6 @@ public class Analyzer {
     private final ExprRewriter constantFolder_ =
         new ExprRewriter(FoldConstantsRule.INSTANCE);
 
-    // Timeline of important events in the planning process, used for debugging /
-    // profiling
-    private final EventSequence timeline = new EventSequence("Planner Timeline");
-
     public GlobalState(ImpaladCatalog catalog, TQueryCtx queryCtx,
         AuthorizationConfig authzConfig) {
       this.catalog = catalog;
@@ -2543,8 +2538,6 @@ public class Analyzer {
     return res;
   }
 
-  public EventSequence getTimeline() { return globalState_.timeline; }
-
   /**
    * Assign all remaining unassigned slots to their own equivalence classes.
    */

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d028d93/fe/src/main/java/org/apache/impala/planner/Planner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java
index 297e9b2..b47a7b3 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -85,7 +85,7 @@ public class Planner {
     SingleNodePlanner singleNodePlanner = new SingleNodePlanner(ctx_);
     DistributedPlanner distributedPlanner = new DistributedPlanner(ctx_);
     PlanNode singleNodePlan = singleNodePlanner.createSingleNodePlan();
-    ctx_.getRootAnalyzer().getTimeline().markEvent("Single node plan created");
+    ctx_.getAnalysisResult().getTimeline().markEvent("Single node plan created");
     ArrayList<PlanFragment> fragments = null;
 
     // Determine the maximum number of rows processed by any node in the plan tree
@@ -116,7 +116,7 @@ public class Planner {
       // Always compute filters, even if the BE won't always use all of them.
       RuntimeFilterGenerator.generateRuntimeFilters(ctx_.getRootAnalyzer(),
           singleNodePlan, ctx_.getQueryOptions().getMax_num_runtime_filters());
-      ctx_.getRootAnalyzer().getTimeline().markEvent(
+      ctx_.getAnalysisResult().getTimeline().markEvent(
           "Runtime filters computed");
     }
 
@@ -174,7 +174,7 @@ public class Planner {
     }
 
     Collections.reverse(fragments);
-    ctx_.getRootAnalyzer().getTimeline().markEvent("Distributed plan created");
+    ctx_.getAnalysisResult().getTimeline().markEvent("Distributed plan created");
 
     ColumnLineageGraph graph = ctx_.getRootAnalyzer().getColumnLineageGraph();
     if (BackendConfig.INSTANCE.getComputeLineage() || RuntimeEnv.INSTANCE.isTestEnv()) {
@@ -217,7 +217,7 @@ public class Planner {
         graph.computeLineageGraph(resultExprs, ctx_.getRootAnalyzer());
       }
       if (LOG.isTraceEnabled()) LOG.trace("lineage: " + graph.debugString());
-      ctx_.getRootAnalyzer().getTimeline().markEvent("Lineage info computed");
+      ctx_.getAnalysisResult().getTimeline().markEvent("Lineage info computed");
     }
 
     return fragments;
@@ -236,7 +236,7 @@ public class Planner {
     // Only use one scanner thread per scan-node instance since intra-node
     // parallelism is achieved via multiple fragment instances.
     ctx_.getQueryOptions().setNum_scanner_threads(1);
-    ctx_.getRootAnalyzer().getTimeline().markEvent("Parallel plans created");
+    ctx_.getAnalysisResult().getTimeline().markEvent("Parallel plans created");
     return parallelPlans;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d028d93/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 87ac427..ed43421 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -126,7 +126,7 @@ public class SingleNodePlanner {
     // to detect empty result sets.
     Analyzer analyzer = queryStmt.getAnalyzer();
     analyzer.computeEquivClasses();
-    analyzer.getTimeline().markEvent("Equivalence classes computed");
+    ctx_.getAnalysisResult().getTimeline().markEvent("Equivalence classes computed");
 
     // Mark slots referenced by output exprs as materialized, prior to generating the
     // plan tree.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d028d93/fe/src/main/java/org/apache/impala/service/Frontend.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index d236cb1..eca8d65 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -904,12 +904,18 @@ public class Frontend {
           // Only re-throw the AnalysisException if there were no missing tables.
           if (missingTbls.isEmpty()) throw e;
 
+          // Record that analysis needs table metadata
+          analysisCtx.getTimeline().markEvent("Metadata load started");
+
           // Some tables/views were missing, request and wait for them to load.
           if (!requestTblLoadAndWait(missingTbls, MISSING_TBL_LOAD_WAIT_TIMEOUT_MS)) {
             if (LOG.isTraceEnabled()) {
               LOG.trace(String.format("Missing tables were not received in %dms. Load " +
                   "request will be retried.", MISSING_TBL_LOAD_WAIT_TIMEOUT_MS));
             }
+            analysisCtx.getTimeline().markEvent("Metadata load timeout");
+          } else {
+            analysisCtx.getTimeline().markEvent("Metadata load finished");
           }
         }
       }
@@ -1037,7 +1043,7 @@ public class Frontend {
       throws ImpalaException {
     // Analyze the statement
     AnalysisContext.AnalysisResult analysisResult = analyzeStmt(queryCtx);
-    EventSequence timeline = analysisResult.getAnalyzer().getTimeline();
+    EventSequence timeline = analysisResult.getTimeline();
     timeline.markEvent("Analysis finished");
     Preconditions.checkNotNull(analysisResult.getStmt());
     TExecRequest result = new TExecRequest();
@@ -1150,7 +1156,7 @@ public class Frontend {
     }
 
     timeline.markEvent("Planning finished");
-    result.setTimeline(analysisResult.getAnalyzer().getTimeline().toThrift());
+    result.setTimeline(analysisResult.getTimeline().toThrift());
     return result;
   }
 


[2/6] incubator-impala git commit: IMPALA-4739: ExprRewriter fails on HAVING clauses

Posted by he...@apache.org.
IMPALA-4739: ExprRewriter fails on HAVING clauses

The bug was that expr rewrite rules such as ExtractCommonConjunctRule
analyzed their own output, which doesn't work for syntactic elements
that allow column aliases, such as the HAVING clause.
The fix was to remove the analysis step (the re-analysis happens anyway
in AnalysisCtx).

Change-Id: Ife74c61f549f620c42f74928f6474e8a5a7b7f00
Reviewed-on: http://gerrit.cloudera.org:8080/5662
Reviewed-by: Marcel Kornacker <ma...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/70ae2e38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/70ae2e38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/70ae2e38

Branch: refs/heads/master
Commit: 70ae2e38eb0c4f9be0084e057c70ba427bbbbcfc
Parents: 6a2c904
Author: Marcel Kornacker <ma...@cloudera.com>
Authored: Mon Jan 9 18:13:59 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Jan 12 02:31:44 2017 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/analysis/Analyzer.java     |  3 +++
 .../apache/impala/rewrite/BetweenToCompoundRule.java  |  3 ++-
 .../impala/rewrite/ExtractCommonConjunctRule.java     |  4 ++--
 .../org/apache/impala/rewrite/FoldConstantsRule.java  |  4 ++--
 .../org/apache/impala/common/FrontendTestBase.java    |  4 ++--
 .../queries/PlannerTest/constant-folding.test         |  6 ++++--
 .../functional-query/queries/QueryTest/exprs.test     | 14 +++++++++++++-
 7 files changed, 28 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 8bea3aa..4d638a8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1060,6 +1060,9 @@ public class Analyzer {
           // analysis pass, so the conjunct may not have been rewritten yet.
           ExprRewriter rewriter = new ExprRewriter(BetweenToCompoundRule.INSTANCE);
           conjunct = rewriter.rewrite(conjunct, this);
+          // analyze this conjunct here: we know it can't contain references to select list
+          // aliases and having it analyzed is needed for the following EvalPredicate() call
+          conjunct.analyze(this);;
         }
         if (!FeSupport.EvalPredicate(conjunct, globalState_.queryCtx)) {
           if (fromHavingClause) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java b/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java
index 296780d..90110b8 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java
@@ -28,6 +28,8 @@ import org.apache.impala.common.AnalysisException;
 /**
  * Rewrites BetweenPredicates into an equivalent conjunctive/disjunctive
  * CompoundPredicate.
+ * It can be applied to pre-analysis expr trees and therefore does not reanalyze
+ * the transformation output itself.
  * Examples:
  * A BETWEEN X AND Y ==> A >= X AND A <= Y
  * A NOT BETWEEN X AND Y ==> A < X OR A > Y
@@ -55,7 +57,6 @@ public class BetweenToCompoundRule implements ExprRewriteRule {
           bp.getChild(0), bp.getChild(2));
       result = new CompoundPredicate(CompoundPredicate.Operator.AND, lower, upper);
     }
-    result.analyze(analyzer);
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java
index e1515d3..34934b0 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java
@@ -30,6 +30,8 @@ import com.google.common.collect.Lists;
 /**
  * This rule extracts common conjuncts from multiple disjunctions when it is applied
  * recursively bottom-up to a tree of CompoundPredicates.
+ * It can be applied to pre-analysis expr trees and therefore does not reanalyze
+ * the transformation output itself.
  *
  * Examples:
  * (a AND b AND c) OR (b AND d) ==> b AND ((a AND c) OR (d))
@@ -80,7 +82,6 @@ public class ExtractCommonConjunctRule implements ExprRewriteRule {
     if (child0Conjuncts.isEmpty() || child1Conjuncts.isEmpty()) {
       Preconditions.checkState(!commonConjuncts.isEmpty());
       Expr result = CompoundPredicate.createConjunctivePredicate(commonConjuncts);
-      result.analyze(analyzer);
       return result;
     }
 
@@ -94,7 +95,6 @@ public class ExtractCommonConjunctRule implements ExprRewriteRule {
     newDisjunction.setPrintSqlInParens(true);
     Expr result = CompoundPredicate.createConjunction(newDisjunction,
         CompoundPredicate.createConjunctivePredicate(commonConjuncts));
-    result.analyze(analyzer);
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java b/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java
index f3eb9a8..146dd83 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java
@@ -29,8 +29,8 @@ import org.apache.impala.common.AnalysisException;
  * TODO: Expressions fed into this rule are currently not required to be analyzed
  * in order to support constant folding in expressions that contain unresolved
  * references to select-list aliases (such expressions cannot be analyzed).
- * For sanity, we should restructure our analysis/rewriting to only allow analyzed exprs
- * to be rewritten.
+ * The cross-dependencies between rule transformations and analysis are vague at the
+ * moment and make rule application overly complicated.
  *
  * Examples:
  * 1 + 1 + 1 --> 3

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 9dc08df..297666f 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -244,8 +244,8 @@ public class FrontendTestBase {
     try {
       AnalysisContext analysisCtx = new AnalysisContext(catalog_,
           TestUtils.createQueryContext(Catalog.DEFAULT_DB,
-              System.getProperty("user.name")),
-              AuthorizationConfig.createAuthDisabledConfig());
+            System.getProperty("user.name")),
+          AuthorizationConfig.createAuthDisabledConfig());
       analysisCtx.analyze(stmt, analyzer);
       AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult();
       if (expectedWarning != null) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index d76bffa..d19d86e 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -52,17 +52,19 @@ data source predicates: tinyint_col < 256, 2 > int_col
 predicates: float_col != 0
 ====
 # Test aggregation.
-select sum(1 + 1 + id)
+select sum(1 + 1 + id) sm
 from functional.alltypes
 group by timestamp_col = cast('2015-11-15' as timestamp) + interval 1 year
 having 1024 * 1024 * count(*) % 2 = 0
+  and (sm > 1 or sm > 1)
+  and (sm between 5 and 10)
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: sum(2 + id), count(*)
 |  group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00'
-|  having: 1048576 * count(*) % 2 = 0
+|  having: sum(2 + id) <= 10, sum(2 + id) > 1, sum(2 + id) >= 5, 1048576 * count(*) % 2 = 0
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/70ae2e38/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 78c3e09..4b1ba76 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2547,7 +2547,7 @@ order by c, cast('2016-11-22 16:40:00.00' as timestamp)
 BIGINT, TIMESTAMP, TIMESTAMP
 ====
 ---- QUERY
-# Constant timestamp expresisons in a join condition / runtime filter as well
+# Constant timestamp expressions in a join condition / runtime filter as well
 # as a select node.
 select count(*) from (
   select a.timestamp_col from
@@ -2564,6 +2564,18 @@ where timestamp_col < cast('2013-02-18 20:46:00.01' as timestamp)
 BIGINT
 ====
 ---- QUERY
+# IMPALA-4739: rewrites in HAVING clause
+select tinyint_col, count(*) cnt
+from functional_parquet.alltypesagg
+group by 1
+having cnt > 1000 or cnt > 1000
+  and cnt between 1500 and 2500
+---- TYPES
+TINYINT, BIGINT
+---- RESULTS
+NULL,2000
+====
+---- QUERY
 # IMPALA-4550: Regression test for proper cast analysis after slot substitution within a
 # no-op explicit cast.
 select /* +straight_join */ a.id


[5/6] incubator-impala git commit: IMPALA-4650: Add Protobuf to build

Posted by he...@apache.org.
IMPALA-4650: Add Protobuf to build

This patch adds Protobuf 2.6.1 to Impala's build, and bumps the
toolchain version so that the dependency is available. Protobuf is
unused in this commit, but is required for KRPC.

FindProtobuf.cmake includes some utility CMake methods to generate
source code from Protobuf definitions. It is taken from Kudu.

Change-Id: Ic9357fe0f201cbf7df1ba19fe4773dfb6c10b4ef
Reviewed-on: http://gerrit.cloudera.org:8080/5657
Tested-by: Impala Public Jenkins
Reviewed-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4b3fdc33
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4b3fdc33
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4b3fdc33

Branch: refs/heads/master
Commit: 4b3fdc33019ffd6eeee075e9c8f233d99f066121
Parents: 5d028d9
Author: Henry Robinson <he...@cloudera.com>
Authored: Tue Oct 25 14:53:12 2016 -0700
Committer: Henry Robinson <he...@cloudera.com>
Committed: Thu Jan 12 05:18:17 2017 +0000

----------------------------------------------------------------------
 CMakeLists.txt                   |  12 ++
 be/CMakeLists.txt                |   1 +
 bin/bootstrap_toolchain.py       |   3 +-
 bin/impala-config.sh             |   3 +-
 cmake_modules/FindProtobuf.cmake | 233 ++++++++++++++++++++++++++++++++++
 5 files changed, 250 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4b3fdc33/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4c5d0d3..18be5d4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,7 @@ set_dep_root(LLVM)
 set(LLVM_DEBUG_ROOT $ENV{IMPALA_TOOLCHAIN}/llvm-$ENV{IMPALA_LLVM_DEBUG_VERSION})
 set_dep_root(LZ4)
 set_dep_root(OPENLDAP)
+set_dep_root(PROTOBUF)
 set_dep_root(RE2)
 set_dep_root(RAPIDJSON)
 set_dep_root(SNAPPY)
@@ -298,6 +299,17 @@ if (NOT APPLE)
     SHARED_LIB "${DL_LIB_PATH}")
 endif()
 
+# find protobuf headers, libs and compiler
+find_package(Protobuf REQUIRED)
+include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
+message(STATUS "Protobuf include dir: " ${PROTOBUF_INCLUDE_DIR})
+
+ADD_THIRDPARTY_LIB(protobuf
+  STATIC_LIB "${PROTOBUF_STATIC_LIBRARY}")
+ADD_THIRDPARTY_LIB(protoc
+  STATIC_LIB "${PROTOBUF_PROTOC_STATIC_LIBRARY}"
+  DEPS protobuf)
+
 ###################################################################
 
 # KuduClient can use GLOG

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4b3fdc33/be/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 6a076f8..3697ceb 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -239,6 +239,7 @@ set(CLANG_INCLUDE_FLAGS
   "-I${ZLIB_INCLUDE_DIR}"
   "-I${OPENSSL_INCLUDE_DIR}"
   "-I${LDAP_INCLUDE_DIR}"
+  "-I${PROTOBUF_INCLUDE_DIR}"
 )
 
 # allow linking of static libs into dynamic lib

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4b3fdc33/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index dbe53a2..c8e35de 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -351,7 +351,8 @@ if __name__ == "__main__":
 
   packages = ["avro", "binutils", "boost", "breakpad", "bzip2", "cmake", "gcc", "gflags",
       "glog", "gperftools", "gtest", "kudu", "llvm", ("llvm", "3.8.0-asserts-p1"), "lz4",
-      "openldap", "rapidjson", "re2", "snappy", "thrift", "tpc-h", "tpc-ds", "zlib"]
+      "openldap", "protobuf", "rapidjson", "re2", "snappy", "thrift", "tpc-h", "tpc-ds",
+      "zlib"]
   bootstrap(toolchain_root, packages)
 
   # Download the CDH components if necessary.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4b3fdc33/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index f185861..c980ea3 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=308-96a4cc516e
+export IMPALA_TOOLCHAIN_BUILD_ID=333-f7c19a394a
 
 # Versions of toolchain dependencies.
 # -----------------------------------
@@ -96,6 +96,7 @@ export IMPALA_LLVM_DEBUG_VERSION=3.8.0-asserts-p1
 export IMPALA_LZ4_VERSION=svn
 export IMPALA_OPENLDAP_VERSION=2.4.25
 export IMPALA_OPENSSL_VERSION=0.9.8zf
+export IMPALA_PROTOBUF_VERSION=2.6.1
 export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=9.0-801
 export IMPALA_RAPIDJSON_VERSION=0.11
 export IMPALA_RE2_VERSION=20130115-p1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4b3fdc33/cmake_modules/FindProtobuf.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindProtobuf.cmake b/cmake_modules/FindProtobuf.cmake
new file mode 100644
index 0000000..a1092c1
--- /dev/null
+++ b/cmake_modules/FindProtobuf.cmake
@@ -0,0 +1,233 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#=============================================================================
+# This file is heavily modified/rewritten from FindProtobuf.cmake from the
+# CMake project:
+#
+#   Copyright 2011 Kirill A. Korinskiy <ca...@catap.ru>
+#   Copyright 2009 Kitware, Inc.
+#   Copyright 2009 Philip Lowman <ph...@yhbt.com>
+#   Copyright 2008 Esben Mose Hansen, Ange Optimization ApS
+#
+#   Distributed under the OSI-approved BSD License (the "License"):
+#
+#   CMake - Cross Platform Makefile Generator
+#   Copyright 2000-2015 Kitware, Inc.
+#   Copyright 2000-2011 Insight Software Consortium
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+#   * Neither the names of Kitware, Inc., the Insight Software Consortium,
+#     nor the names of their contributors may be used to endorse or promote
+#     products derived from this software without specific prior written
+#     permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+
+#########
+# Local rewrite of the protobuf support in cmake.
+#
+# Supports cross-module protobuf dependencies and protobufs inside
+# packages much better than the one built into cmake.
+#########
+#
+# Locate and configure the Google Protocol Buffers library.
+# Defines the following variables:
+#
+#   PROTOBUF_INCLUDE_DIR - the include directory for protocol buffers
+#   PROTOBUF_SHARED_LIBRARY - path to protobuf's shared library
+#   PROTOBUF_STATIC_LIBRARY - path to protobuf's static library
+#   PROTOBUF_PROTOC_SHARED_LIBRARY - path to protoc's shared library
+#   PROTOBUF_PROTOC_STATIC_LIBRARY - path to protoc's static library
+#   PROTOBUF_PROTOC_EXECUTABLE - the protoc compiler
+#   PROTOBUF_FOUND - whether the Protocol Buffers library has been found
+#
+#  ====================================================================
+#  Example:
+#
+#   find_package(Protobuf REQUIRED)
+#   include_directories(${PROTOBUF_INCLUDE_DIR})
+#
+#   include_directories(${CMAKE_CURRENT_BINARY_DIR})
+#   PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS PROTO_TGTS
+#     [SOURCE_ROOT <root from which source is found>]
+#     [BINARY_ROOT <root into which binaries are built>]
+#     PROTO_FILES foo.proto)
+#   add_executable(bar bar.cc ${PROTO_SRCS} ${PROTO_HDRS})
+#   target_link_libraries(bar ${PROTOBUF_SHARED_LIBRARY})
+#
+# NOTE: You may need to link against pthreads, depending
+# on the platform.
+#  ====================================================================
+#
+# PROTOBUF_GENERATE_CPP (public function)
+#   SRCS = Variable to define with autogenerated
+#          source files
+#   HDRS = Variable to define with autogenerated
+#          header files
+#   TGTS = Variable to define with autogenerated
+#          custom targets; if SRCS/HDRS need to be used in multiple
+#          libraries, those libraries should depend on these targets
+#          in order to "serialize" the protoc invocations
+#  ====================================================================
+
+function(PROTOBUF_GENERATE_CPP SRCS HDRS TGTS)
+  if(NOT ARGN)
+    message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
+    return()
+  endif(NOT ARGN)
+
+  set(options)
+  set(one_value_args SOURCE_ROOT BINARY_ROOT)
+  set(multi_value_args EXTRA_PROTO_PATHS PROTO_FILES)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  set(${SRCS})
+  set(${HDRS})
+  set(${TGTS})
+
+  set(EXTRA_PROTO_PATH_ARGS)
+  foreach(PP ${ARG_EXTRA_PROTO_PATHS})
+    set(EXTRA_PROTO_PATH_ARGS ${EXTRA_PROTO_PATH_ARGS} --proto_path ${PP})
+  endforeach()
+
+  if("${ARG_SOURCE_ROOT}" STREQUAL "")
+    SET(ARG_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
+  endif()
+  GET_FILENAME_COMPONENT(ARG_SOURCE_ROOT ${ARG_SOURCE_ROOT} ABSOLUTE)
+
+  if("${ARG_BINARY_ROOT}" STREQUAL "")
+    SET(ARG_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}")
+  endif()
+  GET_FILENAME_COMPONENT(ARG_BINARY_ROOT ${ARG_BINARY_ROOT} ABSOLUTE)
+
+  foreach(FIL ${ARG_PROTO_FILES})
+    get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+    get_filename_component(FIL_WE ${FIL} NAME_WE)
+
+    # Ensure that the protobuf file is within the source root.
+    # This is a requirement of protoc.
+    FILE(RELATIVE_PATH PROTO_REL_TO_ROOT "${ARG_SOURCE_ROOT}" "${ABS_FIL}")
+
+    GET_FILENAME_COMPONENT(REL_DIR "${PROTO_REL_TO_ROOT}" PATH)
+
+    if(NOT REL_DIR STREQUAL "")
+      SET(REL_DIR "${REL_DIR}/")
+    endif()
+
+    set(PROTO_CC_OUT "${ARG_BINARY_ROOT}/${REL_DIR}${FIL_WE}.pb.cc")
+    set(PROTO_H_OUT "${ARG_BINARY_ROOT}/${REL_DIR}${FIL_WE}.pb.h")
+    list(APPEND ${SRCS} "${PROTO_CC_OUT}")
+    list(APPEND ${HDRS} "${PROTO_H_OUT}")
+
+    add_custom_command(
+      OUTPUT "${PROTO_CC_OUT}" "${PROTO_H_OUT}"
+      COMMAND  ${PROTOBUF_PROTOC_EXECUTABLE}
+      ARGS
+        --plugin $<TARGET_FILE:protoc-gen-insertions>
+        --cpp_out ${ARG_BINARY_ROOT}
+        --insertions_out ${ARG_BINARY_ROOT}
+        --proto_path ${ARG_SOURCE_ROOT}
+        # Used to find built-in .proto files (e.g. FileDescriptorProto)
+        --proto_path ${PROTOBUF_INCLUDE_DIR}
+        ${EXTRA_PROTO_PATH_ARGS} ${ABS_FIL}
+      DEPENDS ${ABS_FIL} protoc-gen-insertions
+      COMMENT "Running C++ protocol buffer compiler on ${FIL}"
+      VERBATIM )
+
+    # This custom target enforces that there's just one invocation of protoc
+    # when there are multiple consumers of the generated files. The target name
+    # must be unique; adding parts of the filename helps ensure this.
+    set(TGT_NAME ${REL_DIR}${FIL})
+    string(REPLACE "/" "-" TGT_NAME ${TGT_NAME})
+    add_custom_target(${TGT_NAME}
+      DEPENDS "${PROTO_CC_OUT}" "${PROTO_H_OUT}")
+    list(APPEND ${TGTS} "${TGT_NAME}")
+  endforeach()
+
+  set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+  set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+  set(${HDRS} ${${HDRS}} PARENT_SCOPE)
+  set(${TGTS} ${${TGTS}} PARENT_SCOPE)
+endfunction()
+
+
+find_path(PROTOBUF_INCLUDE_DIR google/protobuf/service.h
+  PATHS ${PROTOBUF_ROOT}/include
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(PROTOBUF_SHARED_LIBRARY protobuf
+    PATHS ${PROTOBUF_ROOT}/lib
+    DOC "The Google Protocol Buffers Library"
+    NO_CMAKE_SYSTEM_PATH
+    NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(PROTOBUF_STATIC_LIBRARY libprotobuf.a
+    PATHS ${PROTOBUF_ROOT}/lib
+    DOC "Static version of the Google Protocol Buffers Library"
+    NO_CMAKE_SYSTEM_PATH
+    NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(PROTOBUF_PROTOC_SHARED_LIBRARY protoc
+  PATHS ${PROTOBUF_ROOT}/lib
+  DOC "The Google Protocol Buffers Compiler Library"
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(PROTOBUF_PROTOC_STATIC_LIBRARY libprotoc.a
+    PATHS ${PROTOBUF_ROOT}/lib
+    DOC "Static version of the Google Protocol Buffers Compiler Library"
+    NO_CMAKE_SYSTEM_PATH
+    NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_program(PROTOBUF_PROTOC_EXECUTABLE protoc
+  PATHS ${PROTOBUF_ROOT}/bin
+  DOC "The Google Protocol Buffers Compiler"
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PROTOBUF REQUIRED_VARS
+  PROTOBUF_SHARED_LIBRARY PROTOBUF_STATIC_LIBRARY
+  PROTOBUF_PROTOC_SHARED_LIBRARY PROTOBUF_PROTOC_STATIC_LIBRARY
+  PROTOBUF_INCLUDE_DIR PROTOBUF_PROTOC_EXECUTABLE)