You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/15 01:18:28 UTC

[1/3] incubator-impala git commit: IMPALA-4654: KuduScanner must return when ReachedLimit()

Repository: incubator-impala
Updated Branches:
  refs/heads/master b222d90bd -> fc4ee65f9


IMPALA-4654: KuduScanner must return when ReachedLimit()

Fixes a bug in the KuduScanner where the scan node's limit
was not respected and thus the scanner thread would
continue executing until the scan range was fully consumed.
This could result in completed queries leaving fragments
running and those threads could be using significant CPU and
memory.

For example, the query 'select * from tpch_kudu.lineitem
limit 90' when running in the minicluster and lineitem is
partitioned into 3 hash partitions would end up leaving a
scanner thread running for ~60 seconds. In real world
scenarios this can cause unexpected resource consumption.
This could build up over time leading to query failures if
these queries are submitted frequently.

The fix is to ensure KuduScanner::GetNext() returns with
eos=true when it finds ReachedLimit=true. An unnecessary and
somewhat confusing flag 'batch_done' was being returned by a
helper function DecodeRowsIntoRowBatch, which isn't
necessary and was removed in order to make it more clear how
the code in GetNext() should behave.

Change-Id: Iaddd51111a1b2647995d68e6d37d0500b3a322de
Reviewed-on: http://gerrit.cloudera.org:8080/5493
Reviewed-by: Alex Behm <al...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/652e7d56
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/652e7d56
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/652e7d56

Branch: refs/heads/master
Commit: 652e7d56d9ac52a8c3d36ca4b04298d4b89897aa
Parents: b222d90
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Tue Dec 13 14:57:01 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 14 23:24:47 2016 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-scanner.cc   | 25 +++++++------------------
 be/src/exec/kudu-scanner.h    | 10 ++++++----
 tests/query_test/test_kudu.py | 12 ++++++------
 3 files changed, 19 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/652e7d56/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index 55975e0..9ec5201 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -105,12 +105,11 @@ Status KuduScanner::GetNext(RowBatch* row_batch, bool* eos) {
     RETURN_IF_CANCELLED(state_);
 
     if (cur_kudu_batch_num_read_ < cur_kudu_batch_.NumRows()) {
-      bool batch_done;
-      RETURN_IF_ERROR(DecodeRowsIntoRowBatch(row_batch, &tuple, &batch_done));
-      if (batch_done) break;
+      RETURN_IF_ERROR(DecodeRowsIntoRowBatch(row_batch, &tuple));
+      if (row_batch->AtCapacity()) break;
     }
 
-    if (scanner_->HasMoreRows()) {
+    if (scanner_->HasMoreRows() && !scan_node_->ReachedLimit()) {
       RETURN_IF_ERROR(GetNextScannerBatch());
       continue;
     }
@@ -161,26 +160,19 @@ void KuduScanner::CloseCurrentClientScanner() {
   scanner_.reset();
 }
 
-Status KuduScanner::HandleEmptyProjection(RowBatch* row_batch, bool* batch_done) {
+Status KuduScanner::HandleEmptyProjection(RowBatch* row_batch) {
   int num_rows_remaining = cur_kudu_batch_.NumRows() - cur_kudu_batch_num_read_;
   int rows_to_add = std::min(row_batch->capacity() - row_batch->num_rows(),
       num_rows_remaining);
   cur_kudu_batch_num_read_ += rows_to_add;
   row_batch->CommitRows(rows_to_add);
-  // If we've reached the capacity, or the LIMIT for the scan, return.
-  if (row_batch->AtCapacity() || scan_node_->ReachedLimit()) {
-    *batch_done = true;
-  }
   return Status::OK();
 }
 
-Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch, Tuple** tuple_mem,
-    bool* batch_done) {
-  *batch_done = false;
-
+Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch, Tuple** tuple_mem) {
   // Short-circuit the count(*) case.
   if (scan_node_->tuple_desc_->slots().empty()) {
-    return HandleEmptyProjection(row_batch, batch_done);
+    return HandleEmptyProjection(row_batch);
   }
 
   // Iterate through the Kudu rows, evaluate conjuncts and deep-copy survivors into
@@ -205,10 +197,7 @@ Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch, Tuple** tuple_me
     row->SetTuple(0, *tuple_mem);
     row_batch->CommitLastRow();
     // If we've reached the capacity, or the LIMIT for the scan, return.
-    if (row_batch->AtCapacity() || scan_node_->ReachedLimit()) {
-      *batch_done = true;
-      break;
-    }
+    if (row_batch->AtCapacity() || scan_node_->ReachedLimit()) break;
     // Move to the next tuple in the tuple buffer.
     *tuple_mem = next_tuple(*tuple_mem);
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/652e7d56/be/src/exec/kudu-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.h b/be/src/exec/kudu-scanner.h
index bf84b08..8c8c663 100644
--- a/be/src/exec/kudu-scanner.h
+++ b/be/src/exec/kudu-scanner.h
@@ -61,14 +61,16 @@ class KuduScanner {
  private:
   /// Handles the case where the projection is empty (e.g. count(*)).
   /// Does this by adding sets of rows to 'row_batch' instead of adding one-by-one.
-  Status HandleEmptyProjection(RowBatch* row_batch, bool* batch_done);
+  Status HandleEmptyProjection(RowBatch* row_batch);
 
   /// Decodes rows previously fetched from kudu, now in 'cur_rows_' into a RowBatch.
   ///  - 'batch' is the batch that will point to the new tuples.
   ///  - *tuple_mem should be the location to output tuples.
-  ///  - Sets 'batch_done' to true to indicate that the batch was filled to capacity or
-  ///    the limit was reached.
-  Status DecodeRowsIntoRowBatch(RowBatch* batch, Tuple** tuple_mem, bool* batch_done);
+  /// Returns OK when one of the following conditions occur:
+  ///  - cur_kudu_batch_ is fully consumed
+  ///  - batch is full
+  ///  - scan_node_ limit has been reached
+  Status DecodeRowsIntoRowBatch(RowBatch* batch, Tuple** tuple_mem);
 
   /// Fetches the next batch of rows from the current kudu::client::KuduScanner.
   Status GetNextScannerBatch();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/652e7d56/tests/query_test/test_kudu.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 17769bd..5b28120 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -617,11 +617,11 @@ class TestKuduMemLimits(KuduTestSuite):
           raise
         assert "Memory limit exceeded" in str(e)
 
-    # IMPALA-4645: Wait for fragments to complete; in some tests KuduScanNodes took some
-    # time to Close() after the query returned all rows. This is necessary to ensure
-    # these queries do not impact other tests.
-    # TODO: Scan nodes shouldn't take so long to shutdown; remove when this is
-    # fixed (IMPALA-4654).
+    # IMPALA-4654: Validate the fix for a bug where LimitReached() wasn't respected in
+    # the KuduScanner and the limit query above would result in a fragment running an
+    # additional minute. This ensures that the num fragments 'in flight' reaches 0 in
+    # less time than IMPALA-4654 was reproducing (~60sec) but yet still enough time that
+    # this test won't be flaky.
     verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
     for v in verifiers:
-      v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=120)
+      v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=30)


[2/3] incubator-impala git commit: IMPALA-4648: remove build_thirdparty.sh

Posted by ta...@apache.org.
IMPALA-4648: remove build_thirdparty.sh

It is not needed by any build processes.

Change-Id: I36e11384877e5115430baf0170b31a508cb01ba3
Reviewed-on: http://gerrit.cloudera.org:8080/5477
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e6ef3b40
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e6ef3b40
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e6ef3b40

Branch: refs/heads/master
Commit: e6ef3b409027ffa66d5851a3a49d06fc636b1302
Parents: 652e7d5
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Dec 12 10:21:12 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Dec 14 23:38:08 2016 +0000

----------------------------------------------------------------------
 bin/build_thirdparty.sh | 244 -------------------------------------------
 1 file changed, 244 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6ef3b40/bin/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/bin/build_thirdparty.sh b/bin/build_thirdparty.sh
deleted file mode 100755
index 4543225..0000000
--- a/bin/build_thirdparty.sh
+++ /dev/null
@@ -1,244 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cleans and rebuilds thirdparty/. The Impala build environment must be set up
-# by bin/impala-config.sh before running this script.
-
-# Exit on non-true return value
-set -e
-# Exit on reference to uninitialized variable
-set -u
-
-# By default, git clean every library we build
-CLEAN_ACTION=1
-
-# By default, build every library
-BUILD_ALL=1
-
-# If BUILD_ALL -eq 1, these are ignored, otherwise only build those libraries with
-# BUILD_<lib> -eq 1
-BUILD_AVRO=0
-BUILD_THRIFT=0
-BUILD_GLOG=0
-BUILD_GFLAGS=0
-BUILD_GTEST=0
-BUILD_RE2=0
-BUILD_SASL=0
-BUILD_LDAP=0
-BUILD_SNAPPY=0
-BUILD_PPROF=0
-BUILD_LZ4=0
-
-for ARG in $*
-do
-  case "$ARG" in
-    -noclean)
-      CLEAN_ACTION=0
-      ;;
-    -avro)
-      BUILD_ALL=0
-      BUILD_AVRO=1
-      ;;
-    -glog)
-      BUILD_ALL=0
-      BUILD_GLOG=1
-      ;;
-    -thrift)
-      BUILD_ALL=0
-      BUILD_THRIFT=1
-      ;;
-    -gflags)
-      BUILD_ALL=0
-      BUILD_GFLAGS=1
-      ;;
-    -gtest)
-      BUILD_ALL=0
-      BUILD_GTEST=1
-      ;;
-    -re2)
-      BUILD_ALL=0
-      BUILD_RE2=1
-      ;;
-    -sasl)
-      BUILD_ALL=0
-      BUILD_SASL=1
-      ;;
-    -ldap)
-      BUILD_ALL=0
-      BUILD_LDAP=1
-      ;;
-    -snappy)
-      BUILD_ALL=0
-      BUILD_SNAPPY=1
-      ;;
-    -lz4)
-      BUILD_ALL=0
-      BUILD_LZ4=1
-      ;;
-    -pprof)
-      BUILD_ALL=0
-      BUILD_PPROF=1
-      ;;
-    -*)
-      echo "Usage: build_thirdparty.sh [-noclean] \
-[-avro -glog -thrift -gflags -gtest -re2 -sasl -ldap -snappy -pprof]"
-      exit 1
-  esac
-done
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/impala-config.sh
-USE_PIC_LIB_PATH=${PIC_LIB_PATH:-}
-
-function build_preamble() {
-  echo
-  echo
-  echo "********************************************************************************"
-  echo "Building $2 in $1 $([ $CLEAN_ACTION -eq 1 ] && echo '(clean)')"
-  echo "********************************************************************************"
-  cd $1
-  if [ $CLEAN_ACTION -eq 1 ]; then
-    # remove everything that is not checked in
-    git clean -dfx
-  fi
-}
-
-# Build Sasl
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_SASL -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/cyrus-sasl-${IMPALA_CYRUS_SASL_VERSION} Sasl
-
-  # Need to specify which libdb to use on certain OSes
-  LIBDB_DIR=""
-  if [[ -e "/usr/lib64/libdb4" && -e "/usr/include/libdb4" ]]; then
-    LIBDB_DIR="--with-bdb-libdir=/usr/lib64/libdb4 --with-bdb-incdir=/usr/include/libdb4"
-  fi
-  # Disable everything except those protocols needed -- currently just Kerberos.
-  # Sasl does not have a --with-pic configuration.
-  CFLAGS="-fPIC -DPIC" CXXFLAGS="-fPIC -DPIC" ./configure \
-    --disable-sql --disable-otp --disable-ldap --disable-digest --with-saslauthd=no \
-    --prefix=$IMPALA_CYRUS_SASL_INSTALL_DIR --enable-static --enable-staticdlopen \
-    $LIBDB_DIR
-  # the first time you do a make it fails, build again.
-  (make || make)
-  make install
-fi
-
-set -e
-# build thrift
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_THRIFT -eq 1 ]; then
-  cd ${THRIFT_SRC_DIR}
-  build_preamble ${THRIFT_SRC_DIR} "Thrift"
-  if [ -d "${PIC_LIB_PATH:-}" ]; then
-    PIC_LIB_OPTIONS="--with-zlib=${PIC_LIB_PATH} "
-  fi
-  JAVA_PREFIX=${THRIFT_HOME}/java PY_PREFIX=${THRIFT_HOME}/python \
-    ./configure --with-pic --prefix=${THRIFT_HOME} \
-    --with-php=no --with-java=no --with-perl=no --with-erlang=no \
-    --with-ruby=no --with-haskell=no --with-erlang=no --with-d=no \
-    --with-go=no --with-qt4=no --with-libevent=no ${PIC_LIB_OPTIONS:-}
-  make # Make with -j fails
-  make install
-  cd ${THRIFT_SRC_DIR}/contrib/fb303
-  chmod 755 ./bootstrap.sh
-  ./bootstrap.sh
-  chmod 755 configure
-  CPPFLAGS="-I${THRIFT_HOME}/include" PY_PREFIX=${THRIFT_HOME}/python ./configure \
-    --with-java=no --with-php=no --prefix=${THRIFT_HOME} --with-thriftpath=${THRIFT_HOME}
-  make
-  make install
-fi
-
-# build gflags
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_GFLAGS -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/gflags-${IMPALA_GFLAGS_VERSION} GFlags
-  GFLAGS_INSTALL=`pwd`/third-party-install
-  ./configure --with-pic --prefix=${GFLAGS_INSTALL}
-   make -j${IMPALA_BUILD_THREADS:-4} install
-fi
-
-# Build pprof
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_PPROF -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/gperftools-${IMPALA_GPERFTOOLS_VERSION} \
-    GPerftools
-  # TODO: google perf tools indicates this might be necessary on 64 bit systems.
-  # we're not compiling the rest of our code to not omit frame pointers but it
-  # still seems to generate useful profiling data.
-  ./configure --enable-frame-pointers --with-pic
-   make -j${IMPALA_BUILD_THREADS:-4}
-fi
-
-# Build glog
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_GLOG -eq 1 ]; then
-  build_preamble  $IMPALA_HOME/thirdparty/glog-${IMPALA_GLOG_VERSION} GLog
-  ./configure --with-pic --with-gflags=${GFLAGS_INSTALL}
-  # SLES's gcc45-c++ is required for sse2 support (default is 4.3), but crashes
-  # when building logging_unittest-logging_unittest.o. Telling it to uses the
-  # stabs format for debugging symbols instead of dwarf exercises a different
-  # code path to work around this issue.
-  cat > Makefile.gcc45sles_workaround <<EOF
-logging_unittest-logging_unittest.o : CXXFLAGS= -gstabs -O2
-EOF
-  cat Makefile >> Makefile.gcc45sles_workaround
-  mv Makefile.gcc45sles_workaround Makefile
-   make -j${IMPALA_BUILD_THREADS:-4}
-fi
-
-# Build gtest
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_GTEST -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/gtest-${IMPALA_GTEST_VERSION} GTest
-  cmake .
-   make -j${IMPALA_BUILD_THREADS:-4}
-fi
-
-# Build Snappy
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_SNAPPY -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/snappy-${IMPALA_SNAPPY_VERSION} Snappy
-  ./autogen.sh
-  ./configure --with-pic --prefix=$IMPALA_HOME/thirdparty/snappy-${IMPALA_SNAPPY_VERSION}/build
-  make install
-fi
-
-# Build Lz4
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_LZ4 -eq 1 ]; then
-   build_preamble $IMPALA_HOME/thirdparty/lz4 Lz4
-   cmake .
-   make
-fi
-
-# Build re2
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_RE2 -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/re2 RE2
-   make -j${IMPALA_BUILD_THREADS:-4}
-fi
-
-# Build Ldap
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_LDAP -eq 1 ]; then
-    build_preamble $IMPALA_HOME/thirdparty/openldap-${IMPALA_OPENLDAP_VERSION} Openldap
-    ./configure --enable-slapd=no --prefix=`pwd`/impala_install --enable-static --with-pic
-     make -j${IMPALA_BUILD_THREADS:-4}
-     make -j${IMPALA_BUILD_THREADS:-4} depend
-    make install
-fi
-
-# Build Avro
-if [ $BUILD_ALL -eq 1 ] || [ $BUILD_AVRO -eq 1 ]; then
-  build_preamble $IMPALA_HOME/thirdparty/avro-c-${IMPALA_AVRO_VERSION} Avro
-  cmake .
-   make -j${IMPALA_BUILD_THREADS:-4}
-fi


[3/3] incubator-impala git commit: Add all build targets to CMake and speed up builds

Posted by ta...@apache.org.
Add all build targets to CMake and speed up builds

Use CMake's dependency resolution always instead of serial execution of
targets via shell scripts.  This improves parallelism by building fe,
be, and other targets at the same time and avoid some overhead from
invoking "make" multiple times. This reduces the time taken for
an incremental compilation of fe and be from 56s to 24s with this
command:

  ./buildall.sh -debug -noclean -notests -skiptests -ninja

Also use Impala-lzo's build script. This depends on the IMPALA-4277
fixes to the Impala-lzo build script.

Log directory creation is also moved from impala-config.sh to
buildall.sh. This means that impala-config.sh has no side-effects and
can be run concurrently with no issues.

Also make sure that "make" builds all the same artifacts as buildall.sh
when run with no args.

Testing:
Ran a jenkins core job, also experimented locally. Ran a jenkins core
job with distcc disabled - this exposed some concurrency bugs where
impala-config.sh fails if run concurrently.

Change-Id: I23617adf13bdeb034c24f6bba14b5ae480e8dd26
Reviewed-on: http://gerrit.cloudera.org:8080/4790
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/fc4ee65f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/fc4ee65f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/fc4ee65f

Branch: refs/heads/master
Commit: fc4ee65f9f51581915df55bfa07aaacc6eb610d0
Parents: e6ef3b4
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Oct 20 09:23:15 2016 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Dec 14 23:42:19 2016 +0000

----------------------------------------------------------------------
 CMakeLists.txt                   | 21 ++++++-------
 be/src/benchmarks/CMakeLists.txt |  4 +++
 be/src/service/CMakeLists.txt    |  4 +--
 bin/impala-config.sh             | 15 +---------
 bin/make_impala.sh               | 55 +++++++++++++++++++++++++----------
 buildall.sh                      | 48 +++++++++++++++---------------
 ext-data-source/CMakeLists.txt   |  2 +-
 fe/CMakeLists.txt                |  2 +-
 8 files changed, 85 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dff59ad..661c9a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -305,21 +305,22 @@ add_subdirectory(be)
 add_subdirectory(fe)
 add_subdirectory(ext-data-source)
 
-# Run FE and BE tests
-add_custom_target(testall
-  COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_SOURCE_DIR}/fe mvn test
-  COMMAND ${CMAKE_SOURCE_DIR}/bin/runbackendtests.sh
-)
+add_custom_target(tarballs ALL DEPENDS shell_tarball)
 
-# Load test data
-add_custom_target(loadtestdata
-  COMMAND ${CMAKE_SOURCE_DIR}/bin/load-test-data.sh
+add_custom_target(shell_tarball DEPENDS thrift-deps
+  COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
 )
 
-add_custom_target(benchmark_run
-  COMMAND ${CMAKE_SOURCE_DIR}/be/bin/run_hive_benchmark.py
+add_custom_target(cscope ALL
+  COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh"
 )
 
+if (DEFINED ENV{IMPALA_LZO} AND EXISTS $ENV{IMPALA_LZO})
+  add_custom_target(impala-lzo ALL DEPENDS thrift-deps
+    COMMAND $ENV{IMPALA_LZO}/build.sh ${CMAKE_SOURCE_DIR} $ENV{IMPALA_TOOLCHAIN}
+  )
+endif()
+
 # Dump include paths to a file
 if (DUMP_INCLUDE_PATHS)
   file(REMOVE "${DUMP_INCLUDE_PATHS}")

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/be/src/benchmarks/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index ba8bcfc..5d3dfbd 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -22,9 +22,13 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/benchmarks")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/benchmarks")
 
+# Add custom target to only build the backend benchmarks
+add_custom_target(be-benchmarks)
+
 FUNCTION(ADD_BE_BENCHMARK BENCHMARK_NAME)
   ADD_EXECUTABLE(${BENCHMARK_NAME} ${BENCHMARK_NAME}.cc)
   TARGET_LINK_LIBRARIES(${BENCHMARK_NAME} ${IMPALA_LINK_LIBS})
+  ADD_DEPENDENCIES(be-benchmarks ${BENCHMARK_NAME})
 ENDFUNCTION()
 
 ADD_BE_BENCHMARK(atod-benchmark)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/be/src/service/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index 6d5a897..c886bf4 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -53,11 +53,11 @@ add_executable(impalad
 # All Impala daemons run from the same binary. The code that is run is determined by the
 # name (i.e. argv[0]) of the command that executes the binary, so we create symlinks for
 # statestored and catalogd.
-ADD_CUSTOM_TARGET(statestored
+ADD_CUSTOM_TARGET(statestored ALL
   ${CMAKE_COMMAND} -E create_symlink ${EXECUTABLE_OUTPUT_PATH}/impalad
   ${BUILD_OUTPUT_ROOT_DIRECTORY}/statestore/statestored DEPENDS impalad)
 
-ADD_CUSTOM_TARGET(catalogd
+ADD_CUSTOM_TARGET(catalogd ALL
   ${CMAKE_COMMAND} -E create_symlink ${EXECUTABLE_OUTPUT_PATH}/impalad
   ${BUILD_OUTPUT_ROOT_DIRECTORY}/catalog/catalogd DEPENDS impalad)
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index aad231c..2a0ac83 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -228,25 +228,12 @@ export IMPALA_FE_TEST_COVERAGE_DIR="${IMPALA_FE_TEST_LOGS_DIR}/coverage"
 export IMPALA_BE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/be_tests"
 export IMPALA_EE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/ee_tests"
 export IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/custom_cluster_tests"
-# List of all Impala log dirs and create them.
+# List of all Impala log dirs so they can be created by buildall.sh
 export IMPALA_ALL_LOGS_DIRS="${IMPALA_CLUSTER_LOGS_DIR}
   ${IMPALA_DATA_LOADING_LOGS_DIR} ${IMPALA_DATA_LOADING_SQL_DIR}
   ${IMPALA_EE_TEST_LOGS_DIR} ${IMPALA_FE_TEST_COVERAGE_DIR}
   ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_EE_TEST_LOGS_DIR}
   ${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}"
-mkdir -p $IMPALA_ALL_LOGS_DIRS
-
-# Create symlinks Testing/Temporary and be/Testing/Temporary that point to the BE test
-# log dir to capture the all logs of BE unit tests. Gtest has Testing/Temporary
-# hardwired in its code, so we cannot change the output dir by configuration.
-# We create two symlinks to capture the logs when running ctest either from
-# ${IMPALA_HOME} or ${IMPALA_HOME}/be.
-rm -rf "${IMPALA_HOME}/Testing"
-mkdir -p "${IMPALA_HOME}/Testing"
-ln -fs "${IMPALA_BE_TEST_LOGS_DIR}" "${IMPALA_HOME}/Testing/Temporary"
-rm -rf "${IMPALA_HOME}/be/Testing"
-mkdir -p "${IMPALA_HOME}/be/Testing"
-ln -fs "${IMPALA_BE_TEST_LOGS_DIR}" "${IMPALA_HOME}/be/Testing/Temporary"
 
 # Reduce the concurrency for local tests to half the number of cores in the system.
 # Note than nproc may not be available on older distributions (centos5.5)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/bin/make_impala.sh
----------------------------------------------------------------------
diff --git a/bin/make_impala.sh b/bin/make_impala.sh
index c198378..ec8421a 100755
--- a/bin/make_impala.sh
+++ b/bin/make_impala.sh
@@ -17,19 +17,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Incrementally compiles the BE.
+# Incrementally compiles the frontend and backend.
 
 set -euo pipefail
 trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
 
 : ${IMPALA_TOOLCHAIN=}
 
+BUILD_EVERYTHING=1
+BUILD_FE_ONLY=0
 BUILD_TESTS=1
 CLEAN=0
 TARGET_BUILD_TYPE=${TARGET_BUILD_TYPE:-""}
 BUILD_SHARED_LIBS=${BUILD_SHARED_LIBS:-""}
 CMAKE_ONLY=0
 MAKE_CMD=make
+MAKE_ARGS=-j${IMPALA_BUILD_THREADS:-4}
+
+# The minimal make targets if BUILD_EVERYTHING is 0.
+MAKE_TARGETS="impalad statestored catalogd fesupport loggingsupport ImpalaUdf"
 
 # parse command line options
 for ARG in $*
@@ -37,6 +43,7 @@ do
   case "$ARG" in
     -notests)
       BUILD_TESTS=0
+      BUILD_EVERYTHING=0
       ;;
     -clean)
       CLEAN=1
@@ -56,6 +63,22 @@ do
     -cmake_only)
       CMAKE_ONLY=1
       ;;
+    -fe)
+      MAKE_TARGETS+=" fe"
+      ;;
+    -fe_only)
+      BUILD_FE_ONLY=1
+      BUILD_EVERYTHING=0
+      ;;
+    -cscope)
+      MAKE_TARGETS+=" cscope"
+      ;;
+    -impala-lzo)
+      MAKE_TARGETS+=" impala-lzo"
+      ;;
+    -tarballs)
+      MAKE_TARGETS+=" tarballs"
+      ;;
     -help|*)
       echo "make_impala.sh [-build_type=<build type> -notests -clean]"
       echo "[-build_type] : Target build type. Examples: Debug, Release, Address_sanitizer."
@@ -63,9 +86,14 @@ do
       echo "[-build_shared_libs] : Link all executables dynamically"
       echo "[-build_static_libs] : Link all executables statically (the default)"
       echo "[-cmake_only] : Generate makefiles and exit"
+      echo "[-fe] : Builds fe in addition to backend."
+      echo "[-fe_only] : Builds fe only."
       echo "[-ninja] : Use the Ninja build tool instead of Make"
-      echo "[-notests] : Omits building the tests."
+      echo "[-notests] : Omits building the backend tests and benchmarks."
       echo "[-clean] : Cleans previous build artifacts."
+      echo "[-cscope] : Builds cscope metadata."
+      echo "[-impala-lzo] : Builds Impala LZO."
+      echo "[-tarballs] : Builds additional tarballs like the shell tarball."
       echo ""
       echo "If either -build_type or -build_*_libs is set, cmake will be re-run for the "
       echo "project. Otherwise the last cmake configuration will continue to take effect."
@@ -74,6 +102,10 @@ do
   esac
 done
 
+if [ $BUILD_TESTS -eq 1 ]; then
+  MAKE_TARGETS+=" be-test be-benchmarks"
+fi
+
 echo "********************************************************************************"
 echo " Building Impala "
 if [ "x${TARGET_BUILD_TYPE}" != "x" ];
@@ -138,18 +170,11 @@ if [ $CMAKE_ONLY -eq 1 ]; then
   exit 0
 fi
 
-${MAKE_CMD} function-registry
-
-# With parallelism, make doesn't always make statestored and catalogd correctly if you
-# write make -jX impalad statestored catalogd. So we keep them separate and after impalad,
-# which they link to.
-${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4} impalad
-
-${MAKE_CMD} statestored
-${MAKE_CMD} catalogd
-if [ $BUILD_TESTS -eq 1 ]
-then
-  ${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4}
+MAKE_ARGS=-j${IMPALA_BUILD_THREADS:-4}
+if [ $BUILD_FE_ONLY -eq 1 ]; then
+  ${MAKE_CMD} ${MAKE_ARGS} fe
+elif [ $BUILD_EVERYTHING -eq 1 ]; then
+  ${MAKE_CMD} ${MAKE_ARGS}
 else
-  ${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4} fesupport loggingsupport ImpalaUdf
+  ${MAKE_CMD} ${MAKE_ARGS} ${MAKE_TARGETS}
 fi

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index a4791b7..fc66acb 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -286,6 +286,23 @@ if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
   NEED_MINICLUSTER=1
 fi
 
+create_log_dirs() {
+  # Create all of the log directories.
+  mkdir -p $IMPALA_ALL_LOGS_DIRS
+
+  # Create symlinks Testing/Temporary and be/Testing/Temporary that point to the BE test
+  # log dir to capture the all logs of BE unit tests. Gtest has Testing/Temporary
+  # hardwired in its code, so we cannot change the output dir by configuration.
+  # We create two symlinks to capture the logs when running ctest either from
+  # ${IMPALA_HOME} or ${IMPALA_HOME}/be.
+  rm -rf "${IMPALA_HOME}/Testing"
+  mkdir -p "${IMPALA_HOME}/Testing"
+  ln -fs "${IMPALA_BE_TEST_LOGS_DIR}" "${IMPALA_HOME}/Testing/Temporary"
+  rm -rf "${IMPALA_HOME}/be/Testing"
+  mkdir -p "${IMPALA_HOME}/be/Testing"
+  ln -fs "${IMPALA_BE_TEST_LOGS_DIR}" "${IMPALA_HOME}/be/Testing/Temporary"
+}
+
 bootstrap_dependencies() {
   # Populate necessary thirdparty components unless it's set to be skipped.
   if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
@@ -310,37 +327,20 @@ bootstrap_dependencies() {
 
 # Build the Impala frontend and its dependencies.
 build_fe() {
-  "$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS} -cmake_only
-  "${MAKE_CMD}" fe
+  "$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS} -fe_only
 }
 
 # Build all components.
 build_all_components() {
-  # Build common and backend. This also sets up the CMake files.
-  echo "Calling make_impala.sh ${MAKE_IMPALA_ARGS}"
-  "$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS}
-
+  # Build the Impala frontend, backend and external data source API.
+  MAKE_IMPALA_ARGS+=" -fe -cscope -tarballs"
   if [[ -e "$IMPALA_LZO" ]]
   then
-    pushd "$IMPALA_LZO"
-    LZO_CMAKE_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=./cmake_modules/toolchain.cmake"
-    rm -f CMakeCache.txt
-    cmake ${LZO_CMAKE_ARGS}
-    "${MAKE_CMD}"
-    popd
+    MAKE_IMPALA_ARGS+=" -impala-lzo"
   fi
 
-  # Build the Java components (fe and external data source API).
-  pushd "$IMPALA_HOME"
-  "${MAKE_CMD}" ext-data-source fe
-  popd
-
-  # Build the shell tarball
-  echo "Creating shell tarball"
-  "${IMPALA_HOME}/shell/make_shell_tarball.sh"
-
-  # Generate list of files for Cscope to index
-  "$IMPALA_HOME/bin/gen-cscope.sh"
+  echo "Running make_impala.sh ${MAKE_IMPALA_ARGS}"
+  "$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS}
 }
 
 # Do any configuration of the test cluster required by the script arguments.
@@ -426,6 +426,8 @@ if [[ "$CLEAN_ACTION" -eq 1 ]]; then
   "$IMPALA_HOME/bin/clean.sh"
 fi
 
+create_log_dirs
+
 bootstrap_dependencies
 
 if [[ "$BUILD_FE_ONLY" -eq 1 ]]; then

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/ext-data-source/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/ext-data-source/CMakeLists.txt b/ext-data-source/CMakeLists.txt
index d2c1077..1a53278 100644
--- a/ext-data-source/CMakeLists.txt
+++ b/ext-data-source/CMakeLists.txt
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_custom_target(ext-data-source DEPENDS thrift-deps
+add_custom_target(ext-data-source ALL DEPENDS thrift-deps
   COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
 )

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc4ee65f/fe/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/fe/CMakeLists.txt b/fe/CMakeLists.txt
index c162435..e06f467 100644
--- a/fe/CMakeLists.txt
+++ b/fe/CMakeLists.txt
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_custom_target(fe DEPENDS thrift-deps function-registry ext-data-source
+add_custom_target(fe ALL DEPENDS thrift-deps function-registry ext-data-source
   COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
 )