You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2017/05/08 21:46:10 UTC

[1/7] incubator-impala git commit: IMPALA-5246: MemTestClose() should handle Expr's preparation failure

Repository: incubator-impala
Updated Branches:
  refs/heads/master 50e3abdc3 -> bdad90e69


IMPALA-5246: MemTestClose() should handle Expr's preparation failure

UDF may fail to initialize due exceeding memory limit or
other reasons. In which case, its Prepare() function may
not have been called and its thread local state may not be
initialized.

MemTestClose() in test-udf.cc made the wrong assumption that
the thread local states are always initialized. This may lead
to de-referencing null pointer in Close(). This change fixes
this issue by checking the thread local state is not null and
returns early if so. Also sets the fragment or thread local
states in FunctionContext to nullptr after freeing them in
various built-in's Close() functions.

Change-Id: Id2c689246ed4f8dd38f104fa35904f3926a7039c
Reviewed-on: http://gerrit.cloudera.org:8080/6757
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Reviewed-by: Attila Jeges <at...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c26a485a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c26a485a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c26a485a

Branch: refs/heads/master
Commit: c26a485afeebb74477f4f34303411614e3cb6921
Parents: 50e3abd
Author: Michael Ho <kw...@cloudera.com>
Authored: Thu Apr 27 18:32:34 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu May 4 22:20:11 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/case-expr.cc           |  1 +
 be/src/exprs/hive-udf-call.cc       |  3 ++-
 be/src/exprs/in-predicate.h         |  1 +
 be/src/exprs/like-predicate.cc      |  6 ++++--
 be/src/exprs/math-functions-ir.cc   |  1 +
 be/src/exprs/string-functions-ir.cc |  9 ++++++---
 be/src/exprs/timestamp-functions.cc |  1 +
 be/src/exprs/udf-builtins-ir.cc     | 12 ++++--------
 be/src/exprs/utility-functions.cc   |  2 +-
 be/src/testutil/test-udfs.cc        | 10 ++++++----
 be/src/udf/udf-test.cc              |  2 +-
 11 files changed, 28 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/case-expr.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/case-expr.cc b/be/src/exprs/case-expr.cc
index 3c0db79..6847e82 100644
--- a/be/src/exprs/case-expr.cc
+++ b/be/src/exprs/case-expr.cc
@@ -80,6 +80,7 @@ void CaseExpr::Close(RuntimeState* state, ExprContext* ctx,
     FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_);
     void* case_state = fn_ctx->GetFunctionState(FunctionContext::THREAD_LOCAL);
     fn_ctx->Free(reinterpret_cast<uint8_t*>(case_state));
+    fn_ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
   }
   Expr::Close(state, ctx, scope);
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/hive-udf-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index 169b603..d39e1bd 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -237,7 +237,7 @@ Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx,
 }
 
 void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx,
-                        FunctionContext::FunctionStateScope scope) {
+    FunctionContext::FunctionStateScope scope) {
   if (fn_context_index_ != -1) {
     FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_);
     JniContext* jni_ctx = reinterpret_cast<JniContext*>(
@@ -265,6 +265,7 @@ void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx,
       }
       jni_ctx->output_anyval = NULL;
       delete jni_ctx;
+      fn_ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
     } else {
       DCHECK(!ctx->opened_);
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/in-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/in-predicate.h b/be/src/exprs/in-predicate.h
index 52d2015..087c0e6 100644
--- a/be/src/exprs/in-predicate.h
+++ b/be/src/exprs/in-predicate.h
@@ -367,6 +367,7 @@ void InPredicate::SetLookupClose(
   SetLookupState<SetType>* state =
       reinterpret_cast<SetLookupState<SetType>*>(ctx->GetFunctionState(scope));
   delete state;
+  ctx->SetFunctionState(scope, nullptr);
 }
 
 template <typename T, typename SetType>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/like-predicate.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
index 18e0f9a..f479f12 100644
--- a/be/src/exprs/like-predicate.cc
+++ b/be/src/exprs/like-predicate.cc
@@ -110,9 +110,10 @@ void LikePredicate::LikePrepareInternal(FunctionContext* context,
 void LikePredicate::LikeClose(FunctionContext* context,
     FunctionContext::FunctionStateScope scope) {
   if (scope == FunctionContext::THREAD_LOCAL) {
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
     delete state;
+    context->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
   }
 }
 
@@ -240,6 +241,7 @@ void LikePredicate::RegexClose(FunctionContext* context,
     LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
         context->GetFunctionState(FunctionContext::THREAD_LOCAL));
     delete state;
+    context->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/math-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/math-functions-ir.cc b/be/src/exprs/math-functions-ir.cc
index 6a331ba..0c76c4a 100644
--- a/be/src/exprs/math-functions-ir.cc
+++ b/be/src/exprs/math-functions-ir.cc
@@ -180,6 +180,7 @@ void MathFunctions::RandClose(FunctionContext* ctx,
     uint8_t* seed = reinterpret_cast<uint8_t*>(
         ctx->GetFunctionState(FunctionContext::THREAD_LOCAL));
     ctx->Free(seed);
+    ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/string-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index dcef9cf..0a05739 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -236,7 +236,8 @@ void StringFunctions::ReplaceClose(FunctionContext* context,
   if (scope != FunctionContext::FRAGMENT_LOCAL) return;
   ReplaceContext* rptr = reinterpret_cast<ReplaceContext*>
       (context->GetFunctionState(FunctionContext::FRAGMENT_LOCAL));
-  if (rptr != nullptr) context->Free(reinterpret_cast<uint8_t*>(rptr));
+  context->Free(reinterpret_cast<uint8_t*>(rptr));
+  context->SetFunctionState(scope, nullptr);
 }
 
 StringVal StringFunctions::Replace(FunctionContext* context, const StringVal& str,
@@ -612,6 +613,7 @@ void StringFunctions::RegexpClose(
   if (scope != FunctionContext::FRAGMENT_LOCAL) return;
   re2::RE2* re = reinterpret_cast<re2::RE2*>(context->GetFunctionState(scope));
   delete re;
+  context->SetFunctionState(scope, nullptr);
 }
 
 StringVal StringFunctions::RegexpExtract(FunctionContext* context, const StringVal& str,
@@ -879,8 +881,8 @@ void StringFunctions::ParseUrlClose(
   if (scope != FunctionContext::FRAGMENT_LOCAL) return;
   UrlParser::UrlPart* url_part =
       reinterpret_cast<UrlParser::UrlPart*>(ctx->GetFunctionState(scope));
-  if (url_part == NULL) return;
   delete url_part;
+  ctx->SetFunctionState(scope, nullptr);
 }
 
 StringVal StringFunctions::ParseUrlKey(FunctionContext* ctx, const StringVal& url,
@@ -941,7 +943,8 @@ void StringFunctions::BTrimClose(
   if (scope != FunctionContext::THREAD_LOCAL) return;
   bitset<256>* unique_chars = reinterpret_cast<bitset<256>*>(
       context->GetFunctionState(scope));
-  if (unique_chars != NULL) delete unique_chars;
+  delete unique_chars;
+  context->SetFunctionState(scope, nullptr);
 }
 
 StringVal StringFunctions::BTrimString(FunctionContext* ctx,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/timestamp-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc
index b2a33d4..b45610e 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -193,6 +193,7 @@ void TimestampFunctions::UnixAndFromUnixClose(FunctionContext* context,
     DateTimeFormatContext* dt_ctx =
         reinterpret_cast<DateTimeFormatContext*>(context->GetFunctionState(scope));
     delete dt_ctx;
+    context->SetFunctionState(scope, nullptr);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/udf-builtins-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins-ir.cc b/be/src/exprs/udf-builtins-ir.cc
index 452c5b4..40638f8 100644
--- a/be/src/exprs/udf-builtins-ir.cc
+++ b/be/src/exprs/udf-builtins-ir.cc
@@ -330,10 +330,8 @@ void UdfBuiltins::TruncPrepare(FunctionContext* ctx,
 void UdfBuiltins::TruncClose(FunctionContext* ctx,
     FunctionContext::FunctionStateScope scope) {
   void* state = ctx->GetFunctionState(scope);
-  if (state != NULL) {
-    ctx->Free(reinterpret_cast<uint8_t*>(state));
-    ctx->SetFunctionState(scope, NULL);
-  }
+  ctx->Free(reinterpret_cast<uint8_t*>(state));
+  ctx->SetFunctionState(scope, nullptr);
 }
 
 // Maps the user facing name of a unit to a TExtractField
@@ -464,10 +462,8 @@ void UdfBuiltins::SwappedExtractPrepare(FunctionContext* ctx,
 void UdfBuiltins::ExtractClose(FunctionContext* ctx,
     FunctionContext::FunctionStateScope scope) {
   void* state = ctx->GetFunctionState(scope);
-  if (state != NULL) {
-    ctx->Free(reinterpret_cast<uint8_t*>(state));
-    ctx->SetFunctionState(scope, NULL);
-  }
+  ctx->Free(reinterpret_cast<uint8_t*>(state));
+  ctx->SetFunctionState(scope, nullptr);
 }
 
 bool ValidateMADlibVector(FunctionContext* context, const StringVal& arr) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/exprs/utility-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/utility-functions.cc b/be/src/exprs/utility-functions.cc
index 6159d6d..4fe41b6 100644
--- a/be/src/exprs/utility-functions.cc
+++ b/be/src/exprs/utility-functions.cc
@@ -56,8 +56,8 @@ void UtilityFunctions::UuidClose(FunctionContext* ctx,
     boost::uuids::random_generator* uuid_gen =
         reinterpret_cast<boost::uuids::random_generator*>(
             ctx->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    DCHECK(uuid_gen != NULL);
     delete uuid_gen;
+    ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/testutil/test-udfs.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/test-udfs.cc b/be/src/testutil/test-udfs.cc
index 0c85c6e..894ce61 100644
--- a/be/src/testutil/test-udfs.cc
+++ b/be/src/testutil/test-udfs.cc
@@ -277,7 +277,7 @@ void CountClose(FunctionContext* context, FunctionContext::FunctionStateScope sc
   if (scope == FunctionContext::THREAD_LOCAL) {
     void* state = context->GetFunctionState(scope);
     context->Free(reinterpret_cast<uint8_t*>(state));
-    context->SetFunctionState(scope, NULL);
+    context->SetFunctionState(scope, nullptr);
   }
 }
 
@@ -306,7 +306,7 @@ void ConstantArgClose(
   if (scope == FunctionContext::THREAD_LOCAL) {
     void* state = context->GetFunctionState(scope);
     context->Free(reinterpret_cast<uint8_t*>(state));
-    context->SetFunctionState(scope, NULL);
+    context->SetFunctionState(scope, nullptr);
   }
 }
 
@@ -330,7 +330,7 @@ void ValidateOpenClose(
   if (scope == FunctionContext::THREAD_LOCAL) {
     void* state = context->GetFunctionState(scope);
     context->Free(reinterpret_cast<uint8_t*>(state));
-    context->SetFunctionState(scope, NULL);
+    context->SetFunctionState(scope, nullptr);
   }
 }
 
@@ -356,9 +356,11 @@ void MemTestClose(FunctionContext* context, FunctionContext::FunctionStateScope
   if (scope == FunctionContext::THREAD_LOCAL) {
     int64_t* total = reinterpret_cast<int64_t*>(
         context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    // Initialization could have failed. Prepare() may not have been called.
+    if (total == nullptr) return;
     context->Free(*total);
     context->Free(reinterpret_cast<uint8_t*>(total));
-    context->SetFunctionState(scope, NULL);
+    context->SetFunctionState(scope, nullptr);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c26a485a/be/src/udf/udf-test.cc
----------------------------------------------------------------------
diff --git a/be/src/udf/udf-test.cc b/be/src/udf/udf-test.cc
index 796bcb3..0bce70f 100644
--- a/be/src/udf/udf-test.cc
+++ b/be/src/udf/udf-test.cc
@@ -172,7 +172,7 @@ void ValidateSharedStateClose(
   if (scope == FunctionContext::THREAD_LOCAL) {
     void* state = context->GetFunctionState(scope);
     context->Free(reinterpret_cast<uint8_t*>(state));
-    context->SetFunctionState(scope, NULL);
+    context->SetFunctionState(scope, nullptr);
   }
 }

[3/7] incubator-impala git commit: IMPALA-5273: Replace StringCompare with glibc memcmp

Posted by ta...@apache.org.

IMPALA-5273: Replace StringCompare with glibc memcmp

glibc's memcmp, which dispatches dynamically based on the instructions
the processor supports, uses sse4.1's ptest, which is faster than our
implementation.

I ran a the benchmark below. The final query sped up by about 5x with
this patch.

    create table long_strings (s string) stored as parquet;
    insert into long_strings values (repeat("a", 2048));
    insert into long_strings select a.s from long_strings a,
      long_strings b;
    insert into long_strings select a.s from long_strings a,
      long_strings b;
    insert into long_strings select a.s from long_strings a,
      long_strings b;
    insert into long_strings select a.s from long_strings a,
      long_strings b;
    insert into long_strings select a.s from long_strings a,
      long_strings b;
    insert into long_strings select a.s from long_strings a,
      (select * from long_strings limit 10) b;
    select count(*) from long_strings where s <= repeat("a", 2048);

Change-Id: Ie4786a4a75fdaffedd6e17cf076b5368ba4b4e3e
Reviewed-on: http://gerrit.cloudera.org:8080/6768
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5cab97fd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5cab97fd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5cab97fd

Branch: refs/heads/master
Commit: 5cab97fd7faced41bbcc2370a400cbe92c3e0128
Parents: aa05c64
Author: Jim Apple <jb...@apache.org>
Authored: Sat Apr 29 15:47:40 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat May 6 22:05:04 2017 +0000

----------------------------------------------------------------------
 be/src/benchmarks/string-compare-benchmark.cc | 181 +++++----------------
 be/src/runtime/string-value.inline.h          |  28 +---
 2 files changed, 49 insertions(+), 160 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5cab97fd/be/src/benchmarks/string-compare-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/string-compare-benchmark.cc b/be/src/benchmarks/string-compare-benchmark.cc
index cddaaa5..30bfcb6 100644
--- a/be/src/benchmarks/string-compare-benchmark.cc
+++ b/be/src/benchmarks/string-compare-benchmark.cc
@@ -23,35 +23,43 @@
 #include "util/cpu-info.h"
 #include "util/sse-util.h"
 
+#include "gutil/strings/substitute.h"
+
 #include "common/names.h"
 
 using namespace impala;
 
 // Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 //
-// Long strings (10000):      Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+// Length 1:                  Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                       StringCompare           2.19e+05 2.32e+05 2.36e+05         1X         1X         1X
+//                             strncmp           3.68e+05 3.83e+05  3.9e+05      1.68X      1.65X      1.65X
+//                              memcmp           3.88e+05 4.01e+05 4.05e+05      1.77X      1.73X      1.72X
+//
+// Length 10:                 Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
 //                                                                          (relative) (relative) (relative)
 // ---------------------------------------------------------------------------------------------------------
-//                            Original                 85     86.5     86.5         1X         1X         1X
-//                  Simplified, broken               76.6       78       78     0.901X     0.901X     0.901X
-//                   Simplified, fixed               95.8     97.5     97.5      1.13X      1.13X      1.13X
+//                       StringCompare           1.86e+05 1.89e+05 1.92e+05         1X         1X         1X
+//                             strncmp           2.76e+05 2.78e+05  2.8e+05      1.48X      1.47X      1.46X
+//                              memcmp           3.24e+05 3.27e+05  3.3e+05      1.75X      1.73X      1.72X
 //
-// Med strings (100):         Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+// Length 100:                Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
 //                                                                          (relative) (relative) (relative)
 // ---------------------------------------------------------------------------------------------------------
-//                            Original           6.55e+03 6.66e+03 6.74e+03         1X         1X         1X
-//                  Simplified, broken           6.25e+03 6.32e+03 6.38e+03     0.955X     0.949X     0.947X
-//                   Simplified, fixed           7.38e+03 7.49e+03 7.55e+03      1.13X      1.12X      1.12X
+//                       StringCompare            4.9e+04 4.95e+04    5e+04         1X         1X         1X
+//                             strncmp            9.5e+04 9.65e+04 9.77e+04      1.94X      1.95X      1.95X
+//                              memcmp           1.69e+05 1.72e+05 1.74e+05      3.46X      3.47X      3.47X
 //
-// Short strings (10):        Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+// Length 10000:              Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
 //                                                                          (relative) (relative) (relative)
 // ---------------------------------------------------------------------------------------------------------
-//                            Original           1.59e+04 1.62e+04 1.63e+04         1X         1X         1X
-//                  Simplified, broken            2.8e+04 2.85e+04 2.87e+04      1.76X      1.76X      1.76X
-//                   Simplified, fixed           2.92e+04 2.96e+04 2.99e+04      1.83X      1.83X      1.84X
+//                       StringCompare                640      642      643         1X         1X         1X
+//                             strncmp           2.17e+03 2.18e+03  2.2e+03      3.39X       3.4X      3.42X
+//                              memcmp           4.62e+03 4.64e+03 4.69e+03      7.22X      7.23X      7.29X
 
-// Original
-int StringCompare1(const char* s1, int n1, const char* s2, int n2, int len) {
+int StringCompare(const char* s1, int n1, const char* s2, int n2, int len) {
   DCHECK_EQ(len, std::min(n1, n2));
   if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
     while (len >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
@@ -66,71 +74,18 @@ int StringCompare1(const char* s1, int n1, const char* s2, int n2, int len) {
       s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
       s2 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
     }
-    if (len >= SSEUtil::CHARS_PER_64_BIT_REGISTER) {
-      // Load 64 bits at a time, the upper 64 bits of the xmm register is set to 0
-      __m128i xmm0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s1));
-      __m128i xmm1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s2));
-      // The upper bits always match (always 0), hence the comparison to
-      // CHAR_PER_128_REGISTER
-      int chars_match = SSE4_cmpestri<SSEUtil::STRCMP_MODE>(xmm0,
-          SSEUtil::CHARS_PER_128_BIT_REGISTER, xmm1, SSEUtil::CHARS_PER_128_BIT_REGISTER);
-      if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        return s1[chars_match] - s2[chars_match];
-      }
-      len -= SSEUtil::CHARS_PER_64_BIT_REGISTER;
-      s1 += SSEUtil::CHARS_PER_64_BIT_REGISTER;
-      s2 += SSEUtil::CHARS_PER_64_BIT_REGISTER;
-    }
   }
-  // TODO: for some reason memcmp is way slower than strncmp (2.5x)  why?
-  int result = strncmp(s1, s2, len);
+  // memcmp has undefined behavior when called on nullptr for either pointer
+  int result = (len == 0) ? 0 : strncmp(s1, s2, len);
   if (result != 0) return result;
   return n1 - n2;
 }
 
-// Simplified but broken (can't safely load s1 and s2)
-int StringCompare2(const char* s1, int n1, const char* s2, int n2, int len) {
+template<typename T, int COMPARE(const T*, const T*, size_t)>
+int SimpleCompare(const char* s1, int n1, const char* s2, int n2, int len) {
   DCHECK_EQ(len, std::min(n1, n2));
-  if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
-    while (len > 0) {
-      __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
-      __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
-      int n = std::min(len, 16);
-      int chars_match = SSE4_cmpestri<SSEUtil::STRCMP_MODE>(xmm0, n, xmm1, n);
-      if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        return s1[chars_match] - s2[chars_match];
-      }
-      len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s2 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-    }
-    return n1 - n2;
-  }
-  // TODO: for some reason memcmp is way slower than strncmp (2.5x)  why?
-  int result = strncmp(s1, s2, len);
-  if (result != 0) return result;
-  return n1 - n2;
-}
-
-// Simplified and not broken
-int StringCompare3(const char* s1, int n1, const char* s2, int n2, int len) {
-  DCHECK_EQ(len, std::min(n1, n2));
-  if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
-    while (len >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-      __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
-      __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
-      int chars_match = SSE4_cmpestri<SSEUtil::STRCMP_MODE>(xmm0,
-          SSEUtil::CHARS_PER_128_BIT_REGISTER, xmm1, SSEUtil::CHARS_PER_128_BIT_REGISTER);
-      if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        return s1[chars_match] - s2[chars_match];
-      }
-      len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s2 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-    }
-  }
-  // TODO: for some reason memcmp is way slower than strncmp (2.5x)  why?
-  int result = strncmp(s1, s2, len);
+  // memcmp has undefined behavior when called on nullptr for either pointer
+  const int result = (len == 0) ? 0 : COMPARE(s1, s2, len);
   if (result != 0) return result;
   return n1 - n2;
 }
@@ -143,51 +98,13 @@ struct TestData {
   int result;
 };
 
-void TestStringCompare1(int batch_size, void* d) {
+template<int (*STRING_COMPARE)(const char* s1, int n1, const char* s2, int n2, int len)>
+void TestStringCompare(int batch_size, void* d) {
   TestData* data = reinterpret_cast<TestData*>(d);
-  int len = std::min(data->n1, data->n2);
+  const int len = std::min(data->n1, data->n2);
+  data->result = 0;
   for (int i = 0; i < batch_size; ++i) {
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
-  }
-}
-
-void TestStringCompare2(int batch_size, void* d) {
-  TestData* data = reinterpret_cast<TestData*>(d);
-  int len = std::min(data->n1, data->n2);
-  for (int i = 0; i < batch_size; ++i) {
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
-  }
-}
-
-void TestStringCompare3(int batch_size, void* d) {
-  TestData* data = reinterpret_cast<TestData*>(d);
-  int len = std::min(data->n1, data->n2);
-  for (int i = 0; i < batch_size; ++i) {
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
-    data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
+    data->result += STRING_COMPARE(data->s1, data->n1, data->s2, data->n2, len);
   }
 }
 
@@ -202,30 +119,22 @@ TestData InitTestData(int len) {
   return data;
 }
 
+void BenchmarkAll(int len) {
+  Benchmark suite(Substitute("Length $0", len));
+  TestData data = InitTestData(len);
+  suite.AddBenchmark("StringCompare", TestStringCompare<StringCompare>, &data);
+  suite.AddBenchmark("strncmp", TestStringCompare<SimpleCompare<char, strncmp>>, &data);
+  suite.AddBenchmark("memcmp", TestStringCompare<SimpleCompare<void, memcmp>>, &data);
+  cout << suite.Measure() << endl;
+}
+
 int main(int argc, char **argv) {
   CpuInfo::Init();
   cout << Benchmark::GetMachineInfo() << endl << endl;
 
-  Benchmark long_suite("Long strings (10000)");
-  TestData long_data = InitTestData(10000);
-  long_suite.AddBenchmark("Original", TestStringCompare1, &long_data);
-  long_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &long_data);
-  long_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &long_data);
-  cout << long_suite.Measure() << endl;
-
-  Benchmark med_suite("Med strings (100)");
-  TestData med_data = InitTestData(100);
-  med_suite.AddBenchmark("Original", TestStringCompare1, &med_data);
-  med_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &med_data);
-  med_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &med_data);
-  cout << med_suite.Measure() << endl;
-
-  Benchmark short_suite("Short strings (10)");
-  TestData short_data = InitTestData(10);
-  short_suite.AddBenchmark("Original", TestStringCompare1, &short_data);
-  short_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &short_data);
-  short_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &short_data);
-  cout << short_suite.Measure() << endl;
+  for (int len : {1, 10, 100, 10000}) {
+    BenchmarkAll(len);
+  }
 
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5cab97fd/be/src/runtime/string-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-value.inline.h b/be/src/runtime/string-value.inline.h
index 073d01f..e6db93f 100644
--- a/be/src/runtime/string-value.inline.h
+++ b/be/src/runtime/string-value.inline.h
@@ -27,37 +27,17 @@
 
 namespace impala {
 
-/// Compare two strings using sse4.2 intrinsics if they are available. This code assumes
-/// that the trivial cases are already handled (i.e. one string is empty).
-/// Returns:
+/// Compare two strings. Returns:
 ///   < 0 if s1 < s2
 ///   0 if s1 == s2
 ///   > 0 if s1 > s2
-/// The SSE code path is just under 2x faster than the non-sse code path.
+///
 ///   - s1/n1: ptr/len for the first string
 ///   - s2/n2: ptr/len for the second string
 ///   - len: min(n1, n2) - this can be more cheaply passed in by the caller
 static inline int StringCompare(const char* s1, int n1, const char* s2, int n2, int len) {
-  DCHECK_EQ(len, std::min(n1, n2));
-  if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
-    while (len >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-      __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
-      __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
-      int chars_match = SSE4_cmpestri<SSEUtil::STRCMP_MODE>(xmm0,
-          SSEUtil::CHARS_PER_128_BIT_REGISTER, xmm1,
-          SSEUtil::CHARS_PER_128_BIT_REGISTER);
-      if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        // Match strncmp() behavior, which interprets characters as unsigned char.
-        return static_cast<unsigned char>(s1[chars_match]) -
-            static_cast<unsigned char>(s2[chars_match]);
-      }
-      len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      s2 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-    }
-  }
-  // TODO: for some reason memcmp is way slower than strncmp (2.5x)  why?
-  int result = strncmp(s1, s2, len);
+  // memcmp has undefined behavior when called on nullptr for either pointer
+  const int result = (len == 0) ? 0 : memcmp(s1, s2, len);
   if (result != 0) return result;
   return n1 - n2;
 }

[6/7] incubator-impala git commit: IMPALA-5184: build fe against both Hive 1 & 2 APIs

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/impala/compat/MetastoreShim.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-1/java/org/apache/impala/compat/MetastoreShim.java
new file mode 100644
index 0000000..d0cd351
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/impala/compat/MetastoreShim.java
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.compat;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hive.service.cli.thrift.TGetColumnsReq;
+import org.apache.hive.service.cli.thrift.TGetFunctionsReq;
+import org.apache.hive.service.cli.thrift.TGetSchemasReq;
+import org.apache.hive.service.cli.thrift.TGetTablesReq;
+import org.apache.impala.authorization.User;
+import org.apache.impala.common.Pair;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.service.MetadataOp;
+import org.apache.impala.thrift.TMetadataOpRequest;
+import org.apache.impala.thrift.TResultSet;
+import org.apache.thrift.TException;
+
+/**
+ * A wrapper around some of Hive's Metastore API's to abstract away differences
+ * between major versions of Hive. This implements the shimmed methods for Hive 2.
+ */
+public class MetastoreShim {
+  /**
+   * Wrapper around MetaStoreUtils.validateName() to deal with added arguments.
+   */
+  public static boolean validateName(String name) {
+    return MetaStoreUtils.validateName(name);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.alter_partition() to deal with added
+   * arguments.
+   */
+  public static void alterPartition(IMetaStoreClient client, Partition partition)
+      throws InvalidOperationException, MetaException, TException {
+    client.alter_partition(partition.getDbName(), partition.getTableName(), partition);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.alter_partitions() to deal with added
+   * arguments.
+   */
+  public static void alterPartitions(IMetaStoreClient client, String dbName,
+      String tableName, List<Partition> partitions)
+      throws InvalidOperationException, MetaException, TException {
+    client.alter_partitions(dbName, tableName, partitions);
+  }
+
+  /**
+   * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
+   * arguments.
+   */
+  public static void updatePartitionStatsFast(Partition partition, Warehouse warehouse)
+      throws MetaException {
+    MetaStoreUtils.updatePartitionStatsFast(partition, warehouse);
+  }
+
+  /**
+   * Return the maximum number of Metastore objects that should be retrieved in
+   * a batch.
+   */
+  public static String metastoreBatchRetrieveObjectsMaxConfigKey() {
+    return HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_TABLE_PARTITION_MAX.toString();
+  }
+
+  /**
+   * Return the key and value that should be set in the partition parameters to
+   * mark that the stats were generated automatically by a stats task.
+   */
+  public static Pair<String, String> statsGeneratedViaStatsTaskParam() {
+    return Pair.create(
+        StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
+  }
+
+  public static TResultSet execGetFunctions(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetFunctionsReq req = request.getGet_functions_req();
+    return MetadataOp.getFunctions(
+        frontend, req.getCatalogName(), req.getSchemaName(), req.getFunctionName(), user);
+  }
+
+  public static TResultSet execGetColumns(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetColumnsReq req = request.getGet_columns_req();
+    return MetadataOp.getColumns(frontend, req.getCatalogName(), req.getSchemaName(),
+        req.getTableName(), req.getColumnName(), user);
+  }
+
+  public static TResultSet execGetTables(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetTablesReq req = request.getGet_tables_req();
+    return MetadataOp.getTables(frontend, req.getCatalogName(), req.getSchemaName(),
+        req.getTableName(), req.getTableTypes(), user);
+  }
+
+  public static TResultSet execGetSchemas(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetSchemasReq req = request.getGet_schemas_req();
+    return MetadataOp.getSchemas(
+        frontend, req.getCatalogName(), req.getSchemaName(), user);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
new file mode 100644
index 0000000..3d69545
--- /dev/null
+++ b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.compat;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
+import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
+import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
+import org.apache.hive.service.rpc.thrift.TGetTablesReq;
+import org.apache.impala.authorization.User;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.Pair;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.service.MetadataOp;
+import org.apache.impala.thrift.TMetadataOpRequest;
+import org.apache.impala.thrift.TResultSet;
+import org.apache.thrift.TException;
+
+/**
+ * A wrapper around some of Hive's Metastore API's to abstract away differences
+ * between major versions of Hive. This implements the shimmed methods for Hive 2.
+ */
+public class MetastoreShim {
+  /**
+   * Wrapper around MetaStoreUtils.validateName() to deal with added arguments.
+   */
+  public static boolean validateName(String name) {
+    return MetaStoreUtils.validateName(name, null);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.alter_partition() to deal with added
+   * arguments.
+   */
+  public static void alterPartition(IMetaStoreClient client, Partition partition)
+      throws InvalidOperationException, MetaException, TException {
+    client.alter_partition(
+        partition.getDbName(), partition.getTableName(), partition, null);
+  }
+
+  /**
+   * Wrapper around IMetaStoreClient.alter_partitions() to deal with added
+   * arguments.
+   */
+  public static void alterPartitions(IMetaStoreClient client, String dbName,
+      String tableName, List<Partition> partitions)
+      throws InvalidOperationException, MetaException, TException {
+    client.alter_partitions(dbName, tableName, partitions, null);
+  }
+
+  /**
+   * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
+   * arguments.
+   */
+  public static void updatePartitionStatsFast(Partition partition, Warehouse warehouse)
+      throws MetaException {
+    MetaStoreUtils.updatePartitionStatsFast(partition, warehouse, null);
+  }
+
+  /**
+   * Return the maximum number of Metastore objects that should be retrieved in
+   * a batch.
+   */
+  public static String metastoreBatchRetrieveObjectsMaxConfigKey() {
+    return HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX.toString();
+  }
+
+  /**
+   * Return the key and value that should be set in the partition parameters to
+   * mark that the stats were generated automatically by a stats task.
+   */
+  public static Pair<String, String> statsGeneratedViaStatsTaskParam() {
+    return Pair.create(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
+  }
+
+  public static TResultSet execGetFunctions(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetFunctionsReq req = request.getGet_functions_req();
+    return MetadataOp.getFunctions(
+        frontend, req.getCatalogName(), req.getSchemaName(), req.getFunctionName(), user);
+  }
+
+  public static TResultSet execGetColumns(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetColumnsReq req = request.getGet_columns_req();
+    return MetadataOp.getColumns(frontend, req.getCatalogName(), req.getSchemaName(),
+        req.getTableName(), req.getColumnName(), user);
+  }
+
+  public static TResultSet execGetTables(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetTablesReq req = request.getGet_tables_req();
+    return MetadataOp.getTables(frontend, req.getCatalogName(), req.getSchemaName(),
+        req.getTableName(), req.getTableTypes(), user);
+  }
+
+  public static TResultSet execGetSchemas(
+      Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+    TGetSchemasReq req = request.getGet_schemas_req();
+    return MetadataOp.getSchemas(
+        frontend, req.getCatalogName(), req.getSchemaName(), user);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
index 42304e6..57e4b51 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TColumn;
 import org.apache.impala.util.KuduUtil;
 import org.apache.impala.util.MetaStoreUtil;
@@ -175,7 +176,7 @@ public class ColumnDef {
 
   public void analyze(Analyzer analyzer) throws AnalysisException {
     // Check whether the column name meets the Metastore's requirements.
-    if (!MetaStoreUtils.validateName(colName_)) {
+    if (!MetastoreShim.validateName(colName_)) {
       throw new AnalysisException("Invalid column/field name: " + colName_);
     }
     if (typeDef_ != null) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/CreateDataSrcStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateDataSrcStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateDataSrcStmt.java
index 30ca223..8ef0737 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateDataSrcStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateDataSrcStmt.java
@@ -18,13 +18,13 @@
 package org.apache.impala.analysis;
 
 import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.extdatasource.ApiVersion;
 import org.apache.impala.thrift.TCreateDataSourceParams;
 import org.apache.impala.thrift.TDataSource;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
@@ -54,7 +54,7 @@ public class CreateDataSrcStmt extends StatementBase {
 
   @Override
   public void analyze(Analyzer analyzer) throws AnalysisException {
-    if (!MetaStoreUtils.validateName(dataSrcName_)) {
+    if (!MetastoreShim.validateName(dataSrcName_)) {
       throw new AnalysisException("Invalid data source name: " + dataSrcName_);
     }
     if (!ifNotExists_ && analyzer.getCatalog().getDataSource(dataSrcName_) != null) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java
index 9b2fd10..9ca14f4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.Db;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TCreateDbParams;
 
 /**
@@ -81,7 +82,7 @@ public class CreateDbStmt extends StatementBase {
   @Override
   public void analyze(Analyzer analyzer) throws AnalysisException {
     // Check whether the db name meets the Metastore's requirements.
-    if (!MetaStoreUtils.validateName(dbName_)) {
+    if (!MetastoreShim.validateName(dbName_)) {
       throw new AnalysisException("Invalid database name: " + dbName_);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/DropDataSrcStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/DropDataSrcStmt.java b/fe/src/main/java/org/apache/impala/analysis/DropDataSrcStmt.java
index e3dd9a8..32501c4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DropDataSrcStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DropDataSrcStmt.java
@@ -17,10 +17,10 @@
 
 package org.apache.impala.analysis;
 
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TDropDataSourceParams;
+
 import com.google.common.base.Preconditions;
 
 /**
@@ -39,10 +39,10 @@ public class DropDataSrcStmt extends StatementBase {
 
   @Override
   public void analyze(Analyzer analyzer) throws AnalysisException {
-    if (!MetaStoreUtils.validateName(dataSrcName_) ||
+    if (!MetastoreShim.validateName(dataSrcName_) ||
         (!ifExists_ && analyzer.getCatalog().getDataSource(dataSrcName_) == null)) {
-      throw new AnalysisException(Analyzer.DATA_SRC_DOES_NOT_EXIST_ERROR_MSG +
-          dataSrcName_);
+      throw new AnalysisException(
+          Analyzer.DATA_SRC_DOES_NOT_EXIST_ERROR_MSG + dataSrcName_);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/Subquery.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Subquery.java b/fe/src/main/java/org/apache/impala/analysis/Subquery.java
index cc2c075..1626d18 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Subquery.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Subquery.java
@@ -20,11 +20,11 @@ package org.apache.impala.analysis;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.StructField;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TExprNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -128,7 +128,7 @@ public class Subquery extends Expr {
       Expr expr = stmtResultExprs.get(i);
       String fieldName = null;
       // Check if the label meets the Metastore's requirements.
-      if (MetaStoreUtils.validateName(labels.get(i))) {
+      if (MetastoreShim.validateName(labels.get(i))) {
         fieldName = labels.get(i);
         // Make sure the field names are unique.
         if (!hasUniqueLabels) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/TableName.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableName.java b/fe/src/main/java/org/apache/impala/analysis/TableName.java
index 297948e..4ebef4c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableName.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableName.java
@@ -19,10 +19,10 @@ package org.apache.impala.analysis;
 
 import java.util.List;
 
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.TTableName;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
@@ -54,12 +54,12 @@ public class TableName {
    */
   public void analyze() throws AnalysisException {
     if (db_ != null) {
-      if (!MetaStoreUtils.validateName(db_)) {
+      if (!MetastoreShim.validateName(db_)) {
         throw new AnalysisException("Invalid database name: " + db_);
       }
     }
     Preconditions.checkNotNull(tbl_);
-    if (!MetaStoreUtils.validateName(tbl_)) {
+    if (!MetastoreShim.validateName(tbl_)) {
       throw new AnalysisException("Invalid table/view name: " + tbl_);
     }
   }
@@ -120,4 +120,4 @@ public class TableName {
   public int hashCode() {
     return toString().toLowerCase().hashCode();
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/analysis/TypeDef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TypeDef.java b/fe/src/main/java/org/apache/impala/analysis/TypeDef.java
index 76e6a8f..f2bf215 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TypeDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TypeDef.java
@@ -19,8 +19,6 @@ package org.apache.impala.analysis;
 
 import java.util.Set;
 
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.MapType;
 import org.apache.impala.catalog.PrimitiveType;
@@ -29,6 +27,8 @@ import org.apache.impala.catalog.StructField;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
 
@@ -128,12 +128,11 @@ public class TypeDef implements ParseNode {
     for (StructField f: structType.getFields()) {
       analyze(f.getType(), analyzer);
       if (!fieldNames.add(f.getName().toLowerCase())) {
-        throw new AnalysisException(
-            String.format("Duplicate field name '%s' in struct '%s'",
-                f.getName(), toSql()));
+        throw new AnalysisException(String.format(
+            "Duplicate field name '%s' in struct '%s'", f.getName(), toSql()));
       }
       // Check whether the column name meets the Metastore's requirements.
-      if (!MetaStoreUtils.validateName(f.getName().toLowerCase())) {
+      if (!MetastoreShim.validateName(f.getName().toLowerCase())) {
         throw new AnalysisException("Invalid struct field name: " + f.getName());
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
index 5cde346..de1a948 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
@@ -36,6 +36,7 @@ import org.apache.impala.analysis.PartitionKeyValue;
 import org.apache.impala.analysis.ToSqlUtils;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.Pair;
 import org.apache.impala.thrift.ImpalaInternalServiceConstants;
 import org.apache.impala.thrift.TAccessLevel;
 import org.apache.impala.thrift.TExpr;
@@ -442,6 +443,9 @@ public class HdfsPartition implements Comparable<HdfsPartition> {
   public Map<String, String> getParameters() { return hmsParameters_; }
 
   public void putToParameters(String k, String v) { hmsParameters_.put(k, v); }
+  public void putToParameters(Pair<String, String> kv) {
+    putToParameters(kv.first, kv.second);
+  }
 
   /**
    * Marks this partition's metadata as "dirty" indicating that changes have been

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 5011769..9dc247d 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -93,6 +93,7 @@ import org.apache.impala.common.ImpalaRuntimeException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.Pair;
 import org.apache.impala.common.Reference;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.thrift.ImpalaInternalServiceConstants;
 import org.apache.impala.thrift.JniCatalogConstants;
 import org.apache.impala.thrift.TAlterTableAddDropRangePartitionParams;
@@ -809,8 +810,7 @@ public class CatalogOpExecutor {
       }
       PartitionStatsUtil.partStatsToParameters(partitionStats, partition);
       partition.putToParameters(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
-      partition.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,
-          StatsSetupConst.TRUE);
+      partition.putToParameters(MetastoreShim.statsGeneratedViaStatsTaskParam());
       ++numTargetedPartitions;
       modifiedParts.add(partition);
     }
@@ -828,8 +828,8 @@ public class CatalogOpExecutor {
     // Update the table's ROW_COUNT parameter.
     msTbl.putToParameters(StatsSetupConst.ROW_COUNT,
         String.valueOf(params.getTable_stats().num_rows));
-    msTbl.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,
-        StatsSetupConst.TRUE);
+    Pair<String, String> statsTaskParam = MetastoreShim.statsGeneratedViaStatsTaskParam();
+    msTbl.putToParameters(statsTaskParam.first, statsTaskParam.second);
     return numTargetedPartitions;
   }
 
@@ -2655,7 +2655,7 @@ public class CatalogOpExecutor {
             cacheIds.add(id);
           }
           // Update the partition metadata to include the cache directive id.
-          msClient.getHiveClient().alter_partitions(tableName.getDb(),
+          MetastoreShim.alterPartitions(msClient.getHiveClient(), tableName.getDb(),
               tableName.getTbl(), hmsAddedPartitions);
         }
         updateLastDdlTime(msTbl, msClient);
@@ -2807,8 +2807,8 @@ public class CatalogOpExecutor {
       MetaStoreClient msClient, TableName tableName, List<Partition> hmsPartitions)
       throws ImpalaException {
     try {
-      msClient.getHiveClient().alter_partitions(tableName.getDb(), tableName.getTbl(),
-          hmsPartitions);
+      MetastoreShim.alterPartitions(
+          msClient.getHiveClient(), tableName.getDb(), tableName.getTbl(), hmsPartitions);
       updateLastDdlTime(msTbl, msClient);
     } catch (TException e) {
       throw new ImpalaRuntimeException(
@@ -2962,11 +2962,11 @@ public class CatalogOpExecutor {
             Math.min(i + MAX_PARTITION_UPDATES_PER_RPC, hmsPartitions.size());
         try {
           // Alter partitions in bulk.
-          msClient.getHiveClient().alter_partitions(dbName, tableName,
+          MetastoreShim.alterPartitions(msClient.getHiveClient(), dbName, tableName,
               hmsPartitions.subList(i, endPartitionIndex));
           // Mark the corresponding HdfsPartition objects as dirty
           for (org.apache.hadoop.hive.metastore.api.Partition msPartition:
-               hmsPartitions.subList(i, endPartitionIndex)) {
+              hmsPartitions.subList(i, endPartitionIndex)) {
             try {
               catalog_.getHdfsPartition(dbName, tableName, msPartition).markDirty();
             } catch (PartitionNotFoundException e) {
@@ -3221,7 +3221,7 @@ public class CatalogOpExecutor {
               partition.getSd().setSerdeInfo(msTbl.getSd().getSerdeInfo().deepCopy());
               partition.getSd().setLocation(msTbl.getSd().getLocation() + "/" +
                   partName.substring(0, partName.length() - 1));
-              MetaStoreUtils.updatePartitionStatsFast(partition, warehouse);
+              MetastoreShim.updatePartitionStatsFast(partition, warehouse);
             }
 
             // First add_partitions and then alter_partitions the successful ones with
@@ -3251,7 +3251,7 @@ public class CatalogOpExecutor {
                   }
                 }
                 try {
-                  msClient.getHiveClient().alter_partitions(tblName.getDb(),
+                  MetastoreShim.alterPartitions(msClient.getHiveClient(), tblName.getDb(),
                       tblName.getTbl(), cachedHmsParts);
                 } catch (Exception e) {
                   LOG.error("Failed in alter_partitions: ", e);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/service/Frontend.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 1348129..13faba5 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -35,10 +35,10 @@ import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hive.service.cli.thrift.TGetColumnsReq;
-import org.apache.hive.service.cli.thrift.TGetFunctionsReq;
-import org.apache.hive.service.cli.thrift.TGetSchemasReq;
-import org.apache.hive.service.cli.thrift.TGetTablesReq;
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
+import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
+import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
+import org.apache.hive.service.rpc.thrift.TGetTablesReq;
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.CreateDataSrcStmt;
 import org.apache.impala.analysis.CreateDropRoleStmt;
@@ -87,6 +87,7 @@ import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.planner.HdfsScanNode;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.planner.PlanFragment;
 import org.apache.impala.planner.Planner;
 import org.apache.impala.planner.ScanNode;
@@ -1197,32 +1198,12 @@ public class Frontend {
         ImpalaInternalAdminUser.getInstance();
     switch (request.opcode) {
       case GET_TYPE_INFO: return MetadataOp.getTypeInfo();
-      case GET_SCHEMAS:
-      {
-        TGetSchemasReq req = request.getGet_schemas_req();
-        return MetadataOp.getSchemas(this, req.getCatalogName(),
-            req.getSchemaName(), user);
-      }
-      case GET_TABLES:
-      {
-        TGetTablesReq req = request.getGet_tables_req();
-        return MetadataOp.getTables(this, req.getCatalogName(),
-            req.getSchemaName(), req.getTableName(), req.getTableTypes(), user);
-      }
-      case GET_COLUMNS:
-      {
-        TGetColumnsReq req = request.getGet_columns_req();
-        return MetadataOp.getColumns(this, req.getCatalogName(),
-            req.getSchemaName(), req.getTableName(), req.getColumnName(), user);
-      }
+      case GET_SCHEMAS: return MetastoreShim.execGetSchemas(this, request, user);
+      case GET_TABLES: return MetastoreShim.execGetTables(this, request, user);
+      case GET_COLUMNS: return MetastoreShim.execGetColumns(this, request, user);
       case GET_CATALOGS: return MetadataOp.getCatalogs();
       case GET_TABLE_TYPES: return MetadataOp.getTableTypes();
-      case GET_FUNCTIONS:
-      {
-        TGetFunctionsReq req = request.getGet_functions_req();
-        return MetadataOp.getFunctions(this, req.getCatalogName(),
-            req.getSchemaName(), req.getFunctionName(), user);
-      }
+      case GET_FUNCTIONS: return MetastoreShim.execGetFunctions(this, request, user);
       default:
         throw new NotImplementedException(request.opcode + " has not been implemented.");
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java b/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
index 8c9ac24..07435ae 100644
--- a/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
@@ -24,11 +24,12 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.impala.catalog.HdfsTable;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.log4j.Logger;
 import org.apache.thrift.TException;
 
-import org.apache.impala.catalog.HdfsTable;
-import org.apache.impala.common.AnalysisException;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
@@ -63,8 +64,8 @@ public class MetaStoreUtil {
   static {
     // Get the value from the Hive configuration, if present.
     HiveConf hiveConf = new HiveConf(HdfsTable.class);
-    String strValue = hiveConf.get(
-        HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_TABLE_PARTITION_MAX.toString());
+    String strValue =
+        hiveConf.get(MetastoreShim.metastoreBatchRetrieveObjectsMaxConfigKey());
     if (strValue != null) {
       try {
         maxPartitionsPerRpc_ = Short.parseShort(strValue);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
index dfc2580..e9d6698 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
@@ -30,10 +30,10 @@ import java.util.UUID;
 
 import org.apache.hadoop.conf.Configuration;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
-import org.apache.hive.service.cli.thrift.TGetColumnsReq;
-import org.apache.hive.service.cli.thrift.TGetSchemasReq;
-import org.apache.hive.service.cli.thrift.TGetTablesReq;
 import org.apache.sentry.provider.common.ResourceAuthorizationProvider;
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
+import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
+import org.apache.hive.service.rpc.thrift.TGetTablesReq;
 import org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider;
 import org.junit.After;
 import org.junit.AfterClass;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
index d0176d2..44a543d 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
@@ -26,10 +26,10 @@ import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.impala.analysis.TimestampArithmeticExpr.TimeUnit;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FrontendTestBase;
+import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.testutil.TestUtils;
 import org.junit.Test;
 
@@ -2947,7 +2947,7 @@ public class ParserTest extends FrontendTestBase {
     // may have unquoted identifiers corresponding to keywords.
     for (String keyword: SqlScanner.keywordMap.keySet()) {
       // Skip keywords that are not valid field/column names in the Metastore.
-      if (!MetaStoreUtils.validateName(keyword)) continue;
+      if (!MetastoreShim.validateName(keyword)) continue;
       String structType = "STRUCT<" + keyword + ":INT>";
       TypeDefsParseOk(structType);
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/test/java/org/apache/impala/service/FrontendTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/service/FrontendTest.java b/fe/src/test/java/org/apache/impala/service/FrontendTest.java
index 24056dc..dbd35b3 100644
--- a/fe/src/test/java/org/apache/impala/service/FrontendTest.java
+++ b/fe/src/test/java/org/apache/impala/service/FrontendTest.java
@@ -25,12 +25,12 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.lang.exception.ExceptionUtils;
-import org.apache.hive.service.cli.thrift.TGetCatalogsReq;
-import org.apache.hive.service.cli.thrift.TGetColumnsReq;
-import org.apache.hive.service.cli.thrift.TGetFunctionsReq;
-import org.apache.hive.service.cli.thrift.TGetInfoReq;
-import org.apache.hive.service.cli.thrift.TGetSchemasReq;
-import org.apache.hive.service.cli.thrift.TGetTablesReq;
+import org.apache.hive.service.rpc.thrift.TGetCatalogsReq;
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
+import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
+import org.apache.hive.service.rpc.thrift.TGetInfoReq;
+import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
+import org.apache.hive.service.rpc.thrift.TGetTablesReq;
 import org.junit.Test;
 import org.apache.impala.analysis.AuthorizationTest;
 import org.apache.impala.authorization.AuthorizationConfig;

[7/7] incubator-impala git commit: IMPALA-5184: build fe against both Hive 1 & 2 APIs

Posted by ta...@apache.org.

IMPALA-5184: build fe against both Hive 1 & 2 APIs

This adds a compatibility shim layer with Hive 1 and Hive 2
implementations. The version-specific code lives in
fe/src/compat-hive-$IMPALA_HIVE_MAJOR_VERSION and
common/thrift/hive-$IMPALA_HIVE_MAJOR_VERSION-api/

The shim adds wrapper methods to handle differing method signatures
and and config variables that changed slightly.

Some thrift classes were also moved from the 'cli' to 'rpc' package.
We work around these by implementing subclasses with the same name
in a different package for compatibility or by implementing shim
methods that operate on the classes. We also need to change the
package in the TCLIService.thrift, which is done with a
search-and-replace.

Also avoid the sticky config variable problem with some of the source
paths by requiring an _OVERRIDE suffix on the variable to override it
from the environment.

Testing:
Made sure that I could build Impala on master as normal, and also
with the following config overrides in bin/impala-config-local.sh:

  export IMPALA_HADOOP_VERSION=3.0.0-alpha1-cdh6.x-SNAPSHOT
  export IMPALA_HBASE_VERSION=2.0.0-cdh6.x-SNAPSHOT
  export IMPALA_HIVE_VERSION=2.1.0-cdh6.x-SNAPSHOT
  export IMPALA_SENTRY_VERSION=1.5.1-cdh6.x-SNAPSHOT
  export IMPALA_PARQUET_VERSION=1.5.0-cdh6.x-SNAPSHOT

I manually assembled the dependencies by copying the following files
from the Hive 2 source and from a Hadoop 3 build:
$CDH_COMPONENTS_HOME/hive-2.1.0-cdh6.x-SNAPSHOT/src/metastore/if/hive_metastore.thrift
$CDH_COMPONENTS_HOME/hadoop-3.0.0-alpha1-cdh6.x-SNAPSHOT/lib/native/*
$CDH_COMPONENTS_HOME/hadoop-3.0.0-alpha1-cdh6.x-SNAPSHOT/include/hdfs.h

Change-Id: Ifbc265281c04fe3136bc3c920dbac966742ce09a
Reviewed-on: http://gerrit.cloudera.org:8080/5538
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bdad90e6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bdad90e6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bdad90e6

Branch: refs/heads/master
Commit: bdad90e695860744d5289e0983f7e8e218383b68
Parents: aca07ee
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Oct 25 14:09:01 2016 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon May 8 21:36:36 2017 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                            |    8 +-
 common/thrift/.gitignore                        |    1 +
 common/thrift/CMakeLists.txt                    |   26 +-
 common/thrift/TCLIService.thrift                | 1180 ------------------
 common/thrift/hive-1-api/TCLIService.thrift     | 1180 ++++++++++++++++++
 fe/pom.xml                                      |    2 +
 .../service/rpc/thrift/TGetCatalogsReq.java     |   24 +
 .../hive/service/rpc/thrift/TGetColumnsReq.java |   24 +
 .../service/rpc/thrift/TGetFunctionsReq.java    |   25 +
 .../hive/service/rpc/thrift/TGetInfoReq.java    |   24 +
 .../hive/service/rpc/thrift/TGetSchemasReq.java |   24 +
 .../hive/service/rpc/thrift/TGetTablesReq.java  |   24 +
 .../org/apache/impala/compat/MetastoreShim.java |  127 ++
 .../org/apache/impala/compat/MetastoreShim.java |  127 ++
 .../org/apache/impala/analysis/ColumnDef.java   |    3 +-
 .../impala/analysis/CreateDataSrcStmt.java      |    6 +-
 .../apache/impala/analysis/CreateDbStmt.java    |    3 +-
 .../apache/impala/analysis/DropDataSrcStmt.java |   10 +-
 .../org/apache/impala/analysis/Subquery.java    |    4 +-
 .../org/apache/impala/analysis/TableName.java   |   10 +-
 .../org/apache/impala/analysis/TypeDef.java     |   11 +-
 .../apache/impala/catalog/HdfsPartition.java    |    4 +
 .../impala/service/CatalogOpExecutor.java       |   22 +-
 .../org/apache/impala/service/Frontend.java     |   37 +-
 .../org/apache/impala/util/MetaStoreUtil.java   |    9 +-
 .../impala/analysis/AuthorizationTest.java      |    6 +-
 .../org/apache/impala/analysis/ParserTest.java  |    4 +-
 .../org/apache/impala/service/FrontendTest.java |   12 +-
 28 files changed, 1672 insertions(+), 1265 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index ada1591..f1b781e 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -351,8 +351,8 @@ export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 # The include and lib paths are needed to pick up hdfs.h and libhdfs.*
 # Allow overriding in case we want to point to a package/install with a different layout.
-export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR:-"${HADOOP_HOME}/include"}
-export HADOOP_LIB_DIR=${HADOOP_LIB_DIR:-"${HADOOP_HOME}/lib"}
+export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR_OVERRIDE:-"${HADOOP_HOME}/include"}
+export HADOOP_LIB_DIR=${HADOOP_LIB_DIR_OVERRIDE:-"${HADOOP_HOME}/lib"}
 
 # Please note that the * is inside quotes, thus it won't get expanded by bash but
 # by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
@@ -368,11 +368,13 @@ export MINIKDC_HOME="$CDH_COMPONENTS_HOME/llama-minikdc-${IMPALA_LLAMA_MINIKDC_V
 export SENTRY_HOME="$CDH_COMPONENTS_HOME/sentry-${IMPALA_SENTRY_VERSION}"
 export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
 
+# Extract the first component of the hive version.
+export IMPALA_HIVE_MAJOR_VERSION=$(echo "$IMPALA_HIVE_VERSION" | cut -d . -f 1)
 export HIVE_HOME="$CDH_COMPONENTS_HOME/hive-${IMPALA_HIVE_VERSION}/"
 export PATH="$HIVE_HOME/bin:$PATH"
 # Allow overriding of Hive source location in case we want to build Impala without
 # a complete Hive build.
-export HIVE_SRC_DIR=${HIVE_SRC_DIR:-"${HIVE_HOME}/src"}
+export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${HIVE_HOME}/src"}
 export HIVE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 
 # Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/common/thrift/.gitignore
----------------------------------------------------------------------
diff --git a/common/thrift/.gitignore b/common/thrift/.gitignore
index 4d3144f..f0ee7f2 100644
--- a/common/thrift/.gitignore
+++ b/common/thrift/.gitignore
@@ -1,3 +1,4 @@
 Opcodes.thrift
 ErrorCodes.thrift
 MetricDefs.thrift
+hive-2-api/TCLIService.thrift

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/common/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/common/thrift/CMakeLists.txt b/common/thrift/CMakeLists.txt
index 1512e52..59684d9 100644
--- a/common/thrift/CMakeLists.txt
+++ b/common/thrift/CMakeLists.txt
@@ -63,7 +63,7 @@ function(THRIFT_GEN VAR)
       set(CPP_ARGS -r ${THRIFT_INCLUDE_DIR_OPTION} --gen cpp -o ${BE_OUTPUT_DIR})
     ENDIF(FIL STREQUAL "beeswax.thrift")
 
-    IF (FIL STREQUAL "TCLIService.thrift" OR FIL STREQUAL "parquet.thrift")
+    IF (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
       # HiveServer2 and Parquet thrift generation
       # Do not generate Java source because we should use the jar from Hive or Parquet
       add_custom_command(
@@ -74,7 +74,7 @@ function(THRIFT_GEN VAR)
         COMMENT "Running thrift compiler on ${FIL}"
         VERBATIM
       )
-    ELSE (FIL STREQUAL "TCLIService.thrift" OR FIL STREQUAL "parquet.thrift")
+    ELSE (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
       add_custom_command(
         OUTPUT ${OUTPUT_BE_FILE}
         COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${FIL}
@@ -84,7 +84,7 @@ function(THRIFT_GEN VAR)
         COMMENT "Running thrift compiler on ${FIL}"
         VERBATIM
     )
-    ENDIF(FIL STREQUAL "TCLIService.thrift" OR FIL STREQUAL "parquet.thrift")
+    ENDIF(FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
   endforeach(FIL)
 
   set(${VAR} ${${VAR}} PARENT_SCOPE)
@@ -115,8 +115,12 @@ function(THRIFT_GEN_DS VAR)
   set(${VAR} ${${VAR}} PARENT_SCOPE)
 endfunction(THRIFT_GEN_DS)
 
+
+set(HIVE_THRIFT_SOURCE_DIR "hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api")
+set(TCLI_SERVICE_THRIFT "${HIVE_THRIFT_SOURCE_DIR}/TCLIService.thrift")
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
-set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_SRC_DIR}/metastore/if)
+set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_SRC_DIR}/metastore/if
+  -I ${HIVE_THRIFT_SOURCE_DIR})
 set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
 set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources)
 # TODO: avoid duplicating generated java classes
@@ -148,7 +152,6 @@ set (SRC_FILES
   CatalogInternalService.thrift
   CatalogObjects.thrift
   CatalogService.thrift
-  TCLIService.thrift
   DataSinks.thrift
   Descriptors.thrift
   ExecStats.thrift
@@ -170,6 +173,7 @@ set (SRC_FILES
   Results.thrift
   RuntimeProfile.thrift
   StatestoreService.thrift
+  ${TCLI_SERVICE_THRIFT}
   ${EXT_DATA_SRC_FILES}
 )
 
@@ -183,6 +187,18 @@ add_custom_command(OUTPUT MetricDefs.thrift
   COMMAND python generate_metrics.py
   DEPENDS generate_metrics.py metrics.json)
 
+# The thrift-generated java classes defined in TCLIService are also pulled into our build
+# in the Hive jars that are downloaded via Maven. Hive2 moved the classes from
+# org.apache.hive.service.cli.thrift to org.apache.hive.service.rpc.thrift. Impala calls
+# various Hive methods that have these classes in the interface and if the packages don't
+# match it won't compile.
+add_custom_command(OUTPUT hive-2-api/TCLIService.thrift
+  COMMAND sed
+      's/namespace java org.apache.hive.service.cli.thrift/namespace java org.apache.hive.service.rpc.thrift/'
+      hive-1-api/TCLIService.thrift > hive-2-api/TCLIService.thrift
+  DEPENDS hive-1-api/TCLIService.thrift
+)
+
 # Create a build command for each of the thrift src files and generate
 # a list of files they produce
 THRIFT_GEN(THRIFT_ALL_FILES ${SRC_FILES})

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/common/thrift/TCLIService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/TCLIService.thrift b/common/thrift/TCLIService.thrift
deleted file mode 100644
index f95e2f8..0000000
--- a/common/thrift/TCLIService.thrift
+++ /dev/null
@@ -1,1180 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Coding Conventions for this file:
-//
-// Structs/Enums/Unions
-// * Struct, Enum, and Union names begin with a "T",
-//   and use a capital letter for each new word, with no underscores.
-// * All fields should be declared as either optional or required.
-//
-// Functions
-// * Function names start with a capital letter and have a capital letter for
-//   each new word, with no underscores.
-// * Each function should take exactly one parameter, named TFunctionNameReq,
-//   and should return either void or TFunctionNameResp. This convention allows
-//   incremental updates.
-//
-// Services
-// * Service names begin with the letter "T", use a capital letter for each
-//   new word (with no underscores), and end with the word "Service".
-
-namespace java org.apache.hive.service.cli.thrift
-namespace cpp apache.hive.service.cli.thrift
-
-// List of protocol versions. A new token should be
-// added to the end of this list every time a change is made.
-enum TProtocolVersion {
-  HIVE_CLI_SERVICE_PROTOCOL_V1,
-
-  // V2 adds support for asynchronous execution
-  HIVE_CLI_SERVICE_PROTOCOL_V2
-
-  // V3 add varchar type, primitive type qualifiers
-  HIVE_CLI_SERVICE_PROTOCOL_V3
-
-  // V4 add decimal precision/scale, char type
-  HIVE_CLI_SERVICE_PROTOCOL_V4
-
-  // V5 adds error details when GetOperationStatus returns in error state
-  HIVE_CLI_SERVICE_PROTOCOL_V5
-
-  // V6 uses binary type for binary payload (was string) and uses columnar result set
-  HIVE_CLI_SERVICE_PROTOCOL_V6
-
-  // V7 adds support for delegation token based connection
-  HIVE_CLI_SERVICE_PROTOCOL_V7
-}
-
-enum TTypeId {
-  BOOLEAN_TYPE,
-  TINYINT_TYPE,
-  SMALLINT_TYPE,
-  INT_TYPE,
-  BIGINT_TYPE,
-  FLOAT_TYPE,
-  DOUBLE_TYPE,
-  STRING_TYPE,
-  TIMESTAMP_TYPE,
-  BINARY_TYPE,
-  ARRAY_TYPE,
-  MAP_TYPE,
-  STRUCT_TYPE,
-  UNION_TYPE,
-  USER_DEFINED_TYPE,
-  DECIMAL_TYPE,
-  NULL_TYPE,
-  DATE_TYPE,
-  VARCHAR_TYPE,
-  CHAR_TYPE
-}
-
-const set<TTypeId> PRIMITIVE_TYPES = [
-  TTypeId.BOOLEAN_TYPE,
-  TTypeId.TINYINT_TYPE,
-  TTypeId.SMALLINT_TYPE,
-  TTypeId.INT_TYPE,
-  TTypeId.BIGINT_TYPE,
-  TTypeId.FLOAT_TYPE,
-  TTypeId.DOUBLE_TYPE,
-  TTypeId.STRING_TYPE,
-  TTypeId.TIMESTAMP_TYPE,
-  TTypeId.BINARY_TYPE,
-  TTypeId.DECIMAL_TYPE,
-  TTypeId.NULL_TYPE,
-  TTypeId.DATE_TYPE,
-  TTypeId.VARCHAR_TYPE,
-  TTypeId.CHAR_TYPE
-]
-
-const set<TTypeId> COMPLEX_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-  TTypeId.STRUCT_TYPE
-  TTypeId.UNION_TYPE
-  TTypeId.USER_DEFINED_TYPE
-]
-
-const set<TTypeId> COLLECTION_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-]
-
-const map<TTypeId,string> TYPE_NAMES = {
-  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
-  TTypeId.TINYINT_TYPE: "TINYINT",
-  TTypeId.SMALLINT_TYPE: "SMALLINT",
-  TTypeId.INT_TYPE: "INT",
-  TTypeId.BIGINT_TYPE: "BIGINT",
-  TTypeId.FLOAT_TYPE: "FLOAT",
-  TTypeId.DOUBLE_TYPE: "DOUBLE",
-  TTypeId.STRING_TYPE: "STRING",
-  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
-  TTypeId.BINARY_TYPE: "BINARY",
-  TTypeId.ARRAY_TYPE: "ARRAY",
-  TTypeId.MAP_TYPE: "MAP",
-  TTypeId.STRUCT_TYPE: "STRUCT",
-  TTypeId.UNION_TYPE: "UNIONTYPE",
-  TTypeId.DECIMAL_TYPE: "DECIMAL",
-  TTypeId.NULL_TYPE: "NULL"
-  TTypeId.DATE_TYPE: "DATE"
-  TTypeId.VARCHAR_TYPE: "VARCHAR"
-  TTypeId.CHAR_TYPE: "CHAR"
-}
-
-// Thrift does not support recursively defined types or forward declarations,
-// which makes it difficult to represent Hive's nested types.
-// To get around these limitations TTypeDesc employs a type list that maps
-// integer "pointers" to TTypeEntry objects. The following examples show
-// how different types are represented using this scheme:
-//
-// "INT":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "ARRAY<INT>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.array_entry {
-//       object_type_ptr = 1
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "MAP<INT,STRING>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.map_entry {
-//       key_type_ptr = 1
-//       value_type_ptr = 2
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = STRING_TYPE
-//     }
-//   ]
-// }
-
-typedef i32 TTypeEntryPtr
-
-// Valid TTypeQualifiers key names
-const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"
-
-// Type qualifier key name for decimal
-const string PRECISION = "precision"
-const string SCALE = "scale"
-
-union TTypeQualifierValue {
-  1: optional i32 i32Value
-  2: optional string stringValue
-}
-
-// Type qualifiers for primitive type.
-struct TTypeQualifiers {
-  1: required map <string, TTypeQualifierValue> qualifiers
-}
-
-// Type entry for a primitive type.
-struct TPrimitiveTypeEntry {
-  // The primitive type token. This must satisfy the condition
-  // that type is in the PRIMITIVE_TYPES set.
-  1: required TTypeId type
-  2: optional TTypeQualifiers typeQualifiers
-}
-
-// Type entry for an ARRAY type.
-struct TArrayTypeEntry {
-  1: required TTypeEntryPtr objectTypePtr
-}
-
-// Type entry for a MAP type.
-struct TMapTypeEntry {
-  1: required TTypeEntryPtr keyTypePtr
-  2: required TTypeEntryPtr valueTypePtr
-}
-
-// Type entry for a STRUCT type.
-struct TStructTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-// Type entry for a UNIONTYPE type.
-struct TUnionTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-struct TUserDefinedTypeEntry {
-  // The fully qualified name of the class implementing this type.
-  1: required string typeClassName
-}
-
-// We use a union here since Thrift does not support inheritance.
-union TTypeEntry {
-  1: TPrimitiveTypeEntry primitiveEntry
-  2: TArrayTypeEntry arrayEntry
-  3: TMapTypeEntry mapEntry
-  4: TStructTypeEntry structEntry
-  5: TUnionTypeEntry unionEntry
-  6: TUserDefinedTypeEntry userDefinedTypeEntry
-}
-
-// Type descriptor for columns.
-struct TTypeDesc {
-  // The "top" type is always the first element of the list.
-  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
-  // type, then subsequent elements represent nested types.
-  1: required list<TTypeEntry> types
-}
-
-// A result set column descriptor.
-struct TColumnDesc {
-  // The name of the column
-  1: required string columnName
-
-  // The type descriptor for this column
-  2: required TTypeDesc typeDesc
-
-  // The ordinal position of this column in the schema
-  3: required i32 position
-
-  4: optional string comment
-}
-
-// Metadata used to describe the schema (column names, types, comments)
-// of result sets.
-struct TTableSchema {
-  1: required list<TColumnDesc> columns
-}
-
-// A Boolean column value.
-struct TBoolValue {
-  // NULL if value is unset.
-  1: optional bool value
-}
-
-// A Byte column value.
-struct TByteValue {
-  // NULL if value is unset.
-  1: optional byte value
-}
-
-// A signed, 16 bit column value.
-struct TI16Value {
-  // NULL if value is unset
-  1: optional i16 value
-}
-
-// A signed, 32 bit column value
-struct TI32Value {
-  // NULL if value is unset
-  1: optional i32 value
-}
-
-// A signed 64 bit column value
-struct TI64Value {
-  // NULL if value is unset
-  1: optional i64 value
-}
-
-// A floating point 64 bit column value
-struct TDoubleValue {
-  // NULL if value is unset
-  1: optional double value
-}
-
-struct TStringValue {
-  // NULL if value is unset
-  1: optional string value
-}
-
-// A single column value in a result set.
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumnValue {
-  1: TBoolValue   boolVal      // BOOLEAN
-  2: TByteValue   byteVal      // TINYINT
-  3: TI16Value    i16Val       // SMALLINT
-  4: TI32Value    i32Val       // INT
-  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
-  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL
-}
-
-// Represents a row in a rowset.
-struct TRow {
-  1: required list<TColumnValue> colVals
-}
-
-struct TBoolColumn {
-  1: required list<bool> values
-  2: required binary nulls
-}
-
-struct TByteColumn {
-  1: required list<byte> values
-  2: required binary nulls
-}
-
-struct TI16Column {
-  1: required list<i16> values
-  2: required binary nulls
-}
-
-struct TI32Column {
-  1: required list<i32> values
-  2: required binary nulls
-}
-
-struct TI64Column {
-  1: required list<i64> values
-  2: required binary nulls
-}
-
-struct TDoubleColumn {
-  1: required list<double> values
-  2: required binary nulls
-}
-
-struct TStringColumn {
-  1: required list<string> values
-  2: required binary nulls
-}
-
-struct TBinaryColumn {
-  1: required list<binary> values
-  2: required binary nulls
-}
-
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumn {
-  1: TBoolColumn   boolVal      // BOOLEAN
-  2: TByteColumn   byteVal      // TINYINT
-  3: TI16Column    i16Val       // SMALLINT
-  4: TI32Column    i32Val       // INT
-  5: TI64Column    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleColumn doubleVal    // FLOAT, DOUBLE
-  7: TStringColumn stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL
-  8: TBinaryColumn binaryVal    // BINARY
-}
-
-// Represents a rowset
-struct TRowSet {
-  // The starting row offset of this rowset.
-  1: required i64 startRowOffset
-  2: required list<TRow> rows
-  3: optional list<TColumn> columns
-}
-
-// The return status code contained in each response.
-enum TStatusCode {
-  SUCCESS_STATUS,
-  SUCCESS_WITH_INFO_STATUS,
-  STILL_EXECUTING_STATUS,
-  ERROR_STATUS,
-  INVALID_HANDLE_STATUS
-}
-
-// The return status of a remote request
-struct TStatus {
-  1: required TStatusCode statusCode
-
-  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
-  // additional diagnostic information.
-  2: optional list<string> infoMessages
-
-  // If status is ERROR, then the following fields may be set
-  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
-  4: optional i32 errorCode    // internal error code
-  5: optional string errorMessage
-}
-
-// The state of an operation (i.e. a query or other
-// asynchronous operation that generates a result set)
-// on the server.
-enum TOperationState {
-  // The operation has been initialized
-  INITIALIZED_STATE,
-
-  // The operation is running. In this state the result
-  // set is not available.
-  RUNNING_STATE,
-
-  // The operation has completed. When an operation is in
-  // this state its result set may be fetched.
-  FINISHED_STATE,
-
-  // The operation was canceled by a client
-  CANCELED_STATE,
-
-  // The operation was closed by a client
-  CLOSED_STATE,
-
-  // The operation failed due to an error
-  ERROR_STATE,
-
-  // The operation is in an unrecognized state
-  UKNOWN_STATE,
-
-  // The operation is in an pending state
-  PENDING_STATE,
-}
-
-// A string identifier. This is interpreted literally.
-typedef string TIdentifier
-
-// A search pattern.
-//
-// Valid search pattern characters:
-// '_': Any single character.
-// '%': Any sequence of zero or more characters.
-// '\': Escape character used to include special characters,
-//      e.g. '_', '%', '\'. If a '\' precedes a non-special
-//      character it has no special meaning and is interpreted
-//      literally.
-typedef string TPattern
-
-
-// A search pattern or identifier. Used as input
-// parameter for many of the catalog functions.
-typedef string TPatternOrIdentifier
-
-struct THandleIdentifier {
-  // 16 byte globally unique identifier
-  // This is the public ID of the handle and
-  // can be used for reporting.
-  1: required binary guid,
-
-  // 16 byte secret generated by the server
-  // and used to verify that the handle is not
-  // being hijacked by another user.
-  2: required binary secret,
-}
-
-// Client-side handle to persistent
-// session information on the server-side.
-struct TSessionHandle {
-  1: required THandleIdentifier sessionId
-}
-
-// The subtype of an OperationHandle.
-enum TOperationType {
-  EXECUTE_STATEMENT,
-  GET_TYPE_INFO,
-  GET_CATALOGS,
-  GET_SCHEMAS,
-  GET_TABLES,
-  GET_TABLE_TYPES,
-  GET_COLUMNS,
-  GET_FUNCTIONS,
-  UNKNOWN,
-}
-
-// Client-side reference to a task running
-// asynchronously on the server.
-struct TOperationHandle {
-  1: required THandleIdentifier operationId
-  2: required TOperationType operationType
-
-  // If hasResultSet = TRUE, then this operation
-  // generates a result set that can be fetched.
-  // Note that the result set may be empty.
-  //
-  // If hasResultSet = FALSE, then this operation
-  // does not generate a result set, and calling
-  // GetResultSetMetadata or FetchResults against
-  // this OperationHandle will generate an error.
-  3: required bool hasResultSet
-
-  // For operations that don't generate result sets,
-  // modifiedRowCount is either:
-  //
-  // 1) The number of rows that were modified by
-  //    the DML operation (e.g. number of rows inserted,
-  //    number of rows deleted, etc).
-  //
-  // 2) 0 for operations that don't modify or add rows.
-  //
-  // 3) < 0 if the operation is capable of modifiying rows,
-  //    but Hive is unable to determine how many rows were
-  //    modified. For example, Hive's LOAD DATA command
-  //    doesn't generate row count information because
-  //    Hive doesn't inspect the data as it is loaded.
-  //
-  // modifiedRowCount is unset if the operation generates
-  // a result set.
-  4: optional double modifiedRowCount
-}
-
-
-// OpenSession()
-//
-// Open a session (connection) on the server against
-// which operations may be executed.
-struct TOpenSessionReq {
-  // The version of the HiveServer2 protocol that the client is using.
-  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6
-
-  // Username and password for authentication.
-  // Depending on the authentication scheme being used,
-  // this information may instead be provided by a lower
-  // protocol layer, in which case these fields may be
-  // left unset.
-  2: optional string username
-  3: optional string password
-
-  // Configuration overlay which is applied when the session is
-  // first created.
-  4: optional map<string, string> configuration
-}
-
-struct TOpenSessionResp {
-  1: required TStatus status
-
-  // The protocol version that the server is using.
-  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6
-
-  // Session Handle
-  3: optional TSessionHandle sessionHandle
-
-  // The configuration settings for this session.
-  4: optional map<string, string> configuration
-}
-
-
-// CloseSession()
-//
-// Closes the specified session and frees any resources
-// currently allocated to that session. Any open
-// operations in that session will be canceled.
-struct TCloseSessionReq {
-  1: required TSessionHandle sessionHandle
-}
-
-struct TCloseSessionResp {
-  1: required TStatus status
-}
-
-
-
-enum TGetInfoType {
-  CLI_MAX_DRIVER_CONNECTIONS =           0,
-  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
-  CLI_DATA_SOURCE_NAME =                 2,
-  CLI_FETCH_DIRECTION =                  8,
-  CLI_SERVER_NAME =                      13,
-  CLI_SEARCH_PATTERN_ESCAPE =            14,
-  CLI_DBMS_NAME =                        17,
-  CLI_DBMS_VER =                         18,
-  CLI_ACCESSIBLE_TABLES =                19,
-  CLI_ACCESSIBLE_PROCEDURES =            20,
-  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
-  CLI_DATA_SOURCE_READ_ONLY =            25,
-  CLI_DEFAULT_TXN_ISOLATION =            26,
-  CLI_IDENTIFIER_CASE =                  28,
-  CLI_IDENTIFIER_QUOTE_CHAR =            29,
-  CLI_MAX_COLUMN_NAME_LEN =              30,
-  CLI_MAX_CURSOR_NAME_LEN =              31,
-  CLI_MAX_SCHEMA_NAME_LEN =              32,
-  CLI_MAX_CATALOG_NAME_LEN =             34,
-  CLI_MAX_TABLE_NAME_LEN =               35,
-  CLI_SCROLL_CONCURRENCY =               43,
-  CLI_TXN_CAPABLE =                      46,
-  CLI_USER_NAME =                        47,
-  CLI_TXN_ISOLATION_OPTION =             72,
-  CLI_INTEGRITY =                        73,
-  CLI_GETDATA_EXTENSIONS =               81,
-  CLI_NULL_COLLATION =                   85,
-  CLI_ALTER_TABLE =                      86,
-  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
-  CLI_SPECIAL_CHARACTERS =               94,
-  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
-  CLI_MAX_COLUMNS_IN_INDEX =             98,
-  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
-  CLI_MAX_COLUMNS_IN_SELECT =            100,
-  CLI_MAX_COLUMNS_IN_TABLE =             101,
-  CLI_MAX_INDEX_SIZE =                   102,
-  CLI_MAX_ROW_SIZE =                     104,
-  CLI_MAX_STATEMENT_LEN =                105,
-  CLI_MAX_TABLES_IN_SELECT =             106,
-  CLI_MAX_USER_NAME_LEN =                107,
-  CLI_OJ_CAPABILITIES =                  115,
-
-  CLI_XOPEN_CLI_YEAR =                   10000,
-  CLI_CURSOR_SENSITIVITY =               10001,
-  CLI_DESCRIBE_PARAMETER =               10002,
-  CLI_CATALOG_NAME =                     10003,
-  CLI_COLLATION_SEQ =                    10004,
-  CLI_MAX_IDENTIFIER_LEN =               10005,
-}
-
-union TGetInfoValue {
-  1: string stringValue
-  2: i16 smallIntValue
-  3: i32 integerBitmask
-  4: i32 integerFlag
-  5: i32 binaryValue
-  6: i64 lenValue
-}
-
-// GetInfo()
-//
-// This function is based on ODBC's CLIGetInfo() function.
-// The function returns general information about the data source
-// using the same keys as ODBC.
-struct TGetInfoReq {
-  // The sesssion to run this request against
-  1: required TSessionHandle sessionHandle
-
-  2: required TGetInfoType infoType
-}
-
-struct TGetInfoResp {
-  1: required TStatus status
-
-  2: required TGetInfoValue infoValue
-}
-
-
-// ExecuteStatement()
-//
-// Execute a statement.
-// The returned OperationHandle can be used to check on the
-// status of the statement, and to fetch results once the
-// statement has finished executing.
-struct TExecuteStatementReq {
-  // The session to execute the statement against
-  1: required TSessionHandle sessionHandle
-
-  // The statement to be executed (DML, DDL, SET, etc)
-  2: required string statement
-
-  // Configuration properties that are overlayed on top of the
-  // the existing session configuration before this statement
-  // is executed. These properties apply to this statement
-  // only and will not affect the subsequent state of the Session.
-  3: optional map<string, string> confOverlay
-
-  // Execute asynchronously when runAsync is true
-  4: optional bool runAsync = false
-}
-
-struct TExecuteStatementResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-// GetTypeInfo()
-//
-// Get information about types supported by the HiveServer instance.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-//
-// Refer to the documentation for ODBC's CLIGetTypeInfo function for
-// the format of the result set.
-struct TGetTypeInfoReq {
-  // The session to run this request against.
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTypeInfoResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetCatalogs()
-//
-// Returns the list of catalogs (databases)
-// Results are ordered by TABLE_CATALOG
-//
-// Resultset columns :
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-struct TGetCatalogsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetCatalogsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetSchemas()
-//
-// Retrieves the schema names available in this database.
-// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
-// col1
-// name: TABLE_SCHEM
-// type: STRING
-// desc: schema name
-// col2
-// name: TABLE_CATALOG
-// type: STRING
-// desc: catalog name
-struct TGetSchemasReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // schema name or pattern
-  3: optional TPatternOrIdentifier schemaName
-}
-
-struct TGetSchemasResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTables()
-//
-// Returns a list of tables with catalog, schema, and table
-// type information. The information is returned as a result
-// set which can be fetched using the OperationHandle
-// provided in the response.
-// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
-//
-// Result Set Columns:
-//
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-// col2
-// name: TABLE_SCHEM
-// type: STRING
-// desc: Schema name.
-//
-// col3
-// name: TABLE_NAME
-// type: STRING
-// desc: Table name.
-//
-// col4
-// name: TABLE_TYPE
-// type: STRING
-// desc: The table type, e.g. "TABLE", "VIEW", etc.
-//
-// col5
-// name: REMARKS
-// type: STRING
-// desc: Comments about the table
-//
-struct TGetTablesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog or a search pattern.
-  2: optional TPatternOrIdentifier catalogName
-
-  // Name of the schema or a search pattern.
-  3: optional TPatternOrIdentifier schemaName
-
-  // Name of the table or a search pattern.
-  4: optional TPatternOrIdentifier tableName
-
-  // List of table types to match
-  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
-  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
-  5: optional list<string> tableTypes
-}
-
-struct TGetTablesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTableTypes()
-//
-// Returns the table types available in this database.
-// The results are ordered by table type.
-//
-// col1
-// name: TABLE_TYPE
-// type: STRING
-// desc: Table type name.
-struct TGetTableTypesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTableTypesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetColumns()
-//
-// Returns a list of columns in the specified tables.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME,
-// and ORDINAL_POSITION.
-//
-// Result Set Columns are the same as those for the ODBC CLIColumns
-// function.
-//
-struct TGetColumnsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // Schema name or search pattern
-  3: optional TPatternOrIdentifier schemaName
-
-  // Table name or search pattern
-  4: optional TPatternOrIdentifier tableName
-
-  // Column name or search pattern
-  5: optional TPatternOrIdentifier columnName
-}
-
-struct TGetColumnsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetFunctions()
-//
-// Returns a list of functions supported by the data source. The
-// behavior of this function matches
-// java.sql.DatabaseMetaData.getFunctions() both in terms of
-// inputs and outputs.
-//
-// Result Set Columns:
-//
-// col1
-// name: FUNCTION_CAT
-// type: STRING
-// desc: Function catalog (may be null)
-//
-// col2
-// name: FUNCTION_SCHEM
-// type: STRING
-// desc: Function schema (may be null)
-//
-// col3
-// name: FUNCTION_NAME
-// type: STRING
-// desc: Function name. This is the name used to invoke the function.
-//
-// col4
-// name: REMARKS
-// type: STRING
-// desc: Explanatory comment on the function.
-//
-// col5
-// name: FUNCTION_TYPE
-// type: SMALLINT
-// desc: Kind of function. One of:
-//       * functionResultUnknown - Cannot determine if a return value or a table
-//                                 will be returned.
-//       * functionNoTable       - Does not a return a table.
-//       * functionReturnsTable  - Returns a table.
-//
-// col6
-// name: SPECIFIC_NAME
-// type: STRING
-// desc: The name which uniquely identifies this function within its schema.
-//       In this case this is the fully qualified class name of the class
-//       that implements this function.
-//
-struct TGetFunctionsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // A catalog name; must match the catalog name as it is stored in the
-  // database; "" retrieves those without a catalog; null means
-  // that the catalog name should not be used to narrow the search.
-  2: optional TIdentifier catalogName
-
-  // A schema name pattern; must match the schema name as it is stored
-  // in the database; "" retrieves those without a schema; null means
-  // that the schema name should not be used to narrow the search.
-  3: optional TPatternOrIdentifier schemaName
-
-  // A function name pattern; must match the function name as it is stored
-  // in the database.
-  4: required TPatternOrIdentifier functionName
-}
-
-struct TGetFunctionsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetOperationStatus()
-//
-// Get the status of an operation running on the server.
-struct TGetOperationStatusReq {
-  // Session to run this request against
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetOperationStatusResp {
-  1: required TStatus status
-  2: optional TOperationState operationState
-
-  // If operationState is ERROR_STATE, then the following fields may be set
-  // sqlState as defined in the ISO/IEF CLI specification
-  3: optional string sqlState
-
-  // Internal error code
-  4: optional i32 errorCode
-
-  // Error message
-  5: optional string errorMessage
-}
-
-
-// CancelOperation()
-//
-// Cancels processing on the specified operation handle and
-// frees any resources which were allocated.
-struct TCancelOperationReq {
-  // Operation to cancel
-  1: required TOperationHandle operationHandle
-}
-
-struct TCancelOperationResp {
-  1: required TStatus status
-}
-
-
-// CloseOperation()
-//
-// Given an operation in the FINISHED, CANCELED,
-// or ERROR states, CloseOperation() will free
-// all of the resources which were allocated on
-// the server to service the operation.
-struct TCloseOperationReq {
-  1: required TOperationHandle operationHandle
-}
-
-struct TCloseOperationResp {
-  1: required TStatus status
-}
-
-
-// GetResultSetMetadata()
-//
-// Retrieves schema information for the specified operation
-struct TGetResultSetMetadataReq {
-  // Operation for which to fetch result set schema information
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetResultSetMetadataResp {
-  1: required TStatus status
-  2: optional TTableSchema schema
-}
-
-
-enum TFetchOrientation {
-  // Get the next rowset. The fetch offset is ignored.
-  FETCH_NEXT,
-
-  // Get the previous rowset. The fetch offset is ignored.
-  // NOT SUPPORTED
-  FETCH_PRIOR,
-
-  // Return the rowset at the given fetch offset relative
-  // to the curren rowset.
-  // NOT SUPPORTED
-  FETCH_RELATIVE,
-
-  // Return the rowset at the specified fetch offset.
-  // NOT SUPPORTED
-  FETCH_ABSOLUTE,
-
-  // Get the first rowset in the result set.
-  FETCH_FIRST,
-
-  // Get the last rowset in the result set.
-  // NOT SUPPORTED
-  FETCH_LAST
-}
-
-// FetchResults()
-//
-// Fetch rows from the server corresponding to
-// a particular OperationHandle.
-struct TFetchResultsReq {
-  // Operation from which to fetch results.
-  1: required TOperationHandle operationHandle
-
-  // The fetch orientation. For V1 this must be either
-  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
-  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
-
-  // Max number of rows that should be returned in
-  // the rowset.
-  3: required i64 maxRows
-}
-
-struct TFetchResultsResp {
-  1: required TStatus status
-
-  // TRUE if there are more rows left to fetch from the server.
-  2: optional bool hasMoreRows
-
-  // The rowset. This is optional so that we have the
-  // option in the future of adding alternate formats for
-  // representing result set data, e.g. delimited strings,
-  // binary encoded, etc.
-  3: optional TRowSet results
-}
-
-// GetDelegationToken()
-// Retrieve delegation token for the current user
-struct  TGetDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // userid for the proxy user
-  2: required string owner
-
-  // designated renewer userid
-  3: required string renewer
-}
-
-struct TGetDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-
-  // delegation token string
-  2: optional string delegationToken
-}
-
-// CancelDelegationToken()
-// Cancel the given delegation token
-struct TCancelDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to cancel
-  2: required string delegationToken
-}
-
-struct TCancelDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-// RenewDelegationToken()
-// Renew the given delegation token
-struct TRenewDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to renew
-  2: required string delegationToken
-}
-
-struct TRenewDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-// GetLog()
-// Not present in Hive 0.13, re-added for backwards compatibility.
-//
-// Fetch operation log from the server corresponding to
-// a particular OperationHandle.
-struct TGetLogReq {
-  // Operation whose log is requested
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetLogResp {
-  1: required TStatus status
-  2: required string log
-}
-
-service TCLIService {
-
-  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
-
-  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
-
-  TGetInfoResp GetInfo(1:TGetInfoReq req);
-
-  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
-
-  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
-
-  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
-
-  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
-
-  TGetTablesResp GetTables(1:TGetTablesReq req);
-
-  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
-
-  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
-
-  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
-
-  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
-
-  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
-
-  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
-
-  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
-
-  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
-
-  TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req);
-
-  TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req);
-
-  TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req);
-
-  // Not present in Hive 0.13, re-added for backwards compatibility.
-  TGetLogResp GetLog(1:TGetLogReq req);
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/common/thrift/hive-1-api/TCLIService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/hive-1-api/TCLIService.thrift b/common/thrift/hive-1-api/TCLIService.thrift
new file mode 100644
index 0000000..f95e2f8
--- /dev/null
+++ b/common/thrift/hive-1-api/TCLIService.thrift
@@ -0,0 +1,1180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Coding Conventions for this file:
+//
+// Structs/Enums/Unions
+// * Struct, Enum, and Union names begin with a "T",
+//   and use a capital letter for each new word, with no underscores.
+// * All fields should be declared as either optional or required.
+//
+// Functions
+// * Function names start with a capital letter and have a capital letter for
+//   each new word, with no underscores.
+// * Each function should take exactly one parameter, named TFunctionNameReq,
+//   and should return either void or TFunctionNameResp. This convention allows
+//   incremental updates.
+//
+// Services
+// * Service names begin with the letter "T", use a capital letter for each
+//   new word (with no underscores), and end with the word "Service".
+
+namespace java org.apache.hive.service.cli.thrift
+namespace cpp apache.hive.service.cli.thrift
+
+// List of protocol versions. A new token should be
+// added to the end of this list every time a change is made.
+enum TProtocolVersion {
+  HIVE_CLI_SERVICE_PROTOCOL_V1,
+
+  // V2 adds support for asynchronous execution
+  HIVE_CLI_SERVICE_PROTOCOL_V2
+
+  // V3 add varchar type, primitive type qualifiers
+  HIVE_CLI_SERVICE_PROTOCOL_V3
+
+  // V4 add decimal precision/scale, char type
+  HIVE_CLI_SERVICE_PROTOCOL_V4
+
+  // V5 adds error details when GetOperationStatus returns in error state
+  HIVE_CLI_SERVICE_PROTOCOL_V5
+
+  // V6 uses binary type for binary payload (was string) and uses columnar result set
+  HIVE_CLI_SERVICE_PROTOCOL_V6
+
+  // V7 adds support for delegation token based connection
+  HIVE_CLI_SERVICE_PROTOCOL_V7
+}
+
+enum TTypeId {
+  BOOLEAN_TYPE,
+  TINYINT_TYPE,
+  SMALLINT_TYPE,
+  INT_TYPE,
+  BIGINT_TYPE,
+  FLOAT_TYPE,
+  DOUBLE_TYPE,
+  STRING_TYPE,
+  TIMESTAMP_TYPE,
+  BINARY_TYPE,
+  ARRAY_TYPE,
+  MAP_TYPE,
+  STRUCT_TYPE,
+  UNION_TYPE,
+  USER_DEFINED_TYPE,
+  DECIMAL_TYPE,
+  NULL_TYPE,
+  DATE_TYPE,
+  VARCHAR_TYPE,
+  CHAR_TYPE
+}
+
+const set<TTypeId> PRIMITIVE_TYPES = [
+  TTypeId.BOOLEAN_TYPE,
+  TTypeId.TINYINT_TYPE,
+  TTypeId.SMALLINT_TYPE,
+  TTypeId.INT_TYPE,
+  TTypeId.BIGINT_TYPE,
+  TTypeId.FLOAT_TYPE,
+  TTypeId.DOUBLE_TYPE,
+  TTypeId.STRING_TYPE,
+  TTypeId.TIMESTAMP_TYPE,
+  TTypeId.BINARY_TYPE,
+  TTypeId.DECIMAL_TYPE,
+  TTypeId.NULL_TYPE,
+  TTypeId.DATE_TYPE,
+  TTypeId.VARCHAR_TYPE,
+  TTypeId.CHAR_TYPE
+]
+
+const set<TTypeId> COMPLEX_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+  TTypeId.STRUCT_TYPE
+  TTypeId.UNION_TYPE
+  TTypeId.USER_DEFINED_TYPE
+]
+
+const set<TTypeId> COLLECTION_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+]
+
+const map<TTypeId,string> TYPE_NAMES = {
+  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
+  TTypeId.TINYINT_TYPE: "TINYINT",
+  TTypeId.SMALLINT_TYPE: "SMALLINT",
+  TTypeId.INT_TYPE: "INT",
+  TTypeId.BIGINT_TYPE: "BIGINT",
+  TTypeId.FLOAT_TYPE: "FLOAT",
+  TTypeId.DOUBLE_TYPE: "DOUBLE",
+  TTypeId.STRING_TYPE: "STRING",
+  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
+  TTypeId.BINARY_TYPE: "BINARY",
+  TTypeId.ARRAY_TYPE: "ARRAY",
+  TTypeId.MAP_TYPE: "MAP",
+  TTypeId.STRUCT_TYPE: "STRUCT",
+  TTypeId.UNION_TYPE: "UNIONTYPE",
+  TTypeId.DECIMAL_TYPE: "DECIMAL",
+  TTypeId.NULL_TYPE: "NULL"
+  TTypeId.DATE_TYPE: "DATE"
+  TTypeId.VARCHAR_TYPE: "VARCHAR"
+  TTypeId.CHAR_TYPE: "CHAR"
+}
+
+// Thrift does not support recursively defined types or forward declarations,
+// which makes it difficult to represent Hive's nested types.
+// To get around these limitations TTypeDesc employs a type list that maps
+// integer "pointers" to TTypeEntry objects. The following examples show
+// how different types are represented using this scheme:
+//
+// "INT":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "ARRAY<INT>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.array_entry {
+//       object_type_ptr = 1
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "MAP<INT,STRING>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.map_entry {
+//       key_type_ptr = 1
+//       value_type_ptr = 2
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = STRING_TYPE
+//     }
+//   ]
+// }
+
+typedef i32 TTypeEntryPtr
+
+// Valid TTypeQualifiers key names
+const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"
+
+// Type qualifier key name for decimal
+const string PRECISION = "precision"
+const string SCALE = "scale"
+
+union TTypeQualifierValue {
+  1: optional i32 i32Value
+  2: optional string stringValue
+}
+
+// Type qualifiers for primitive type.
+struct TTypeQualifiers {
+  1: required map <string, TTypeQualifierValue> qualifiers
+}
+
+// Type entry for a primitive type.
+struct TPrimitiveTypeEntry {
+  // The primitive type token. This must satisfy the condition
+  // that type is in the PRIMITIVE_TYPES set.
+  1: required TTypeId type
+  2: optional TTypeQualifiers typeQualifiers
+}
+
+// Type entry for an ARRAY type.
+struct TArrayTypeEntry {
+  1: required TTypeEntryPtr objectTypePtr
+}
+
+// Type entry for a MAP type.
+struct TMapTypeEntry {
+  1: required TTypeEntryPtr keyTypePtr
+  2: required TTypeEntryPtr valueTypePtr
+}
+
+// Type entry for a STRUCT type.
+struct TStructTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+// Type entry for a UNIONTYPE type.
+struct TUnionTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+struct TUserDefinedTypeEntry {
+  // The fully qualified name of the class implementing this type.
+  1: required string typeClassName
+}
+
+// We use a union here since Thrift does not support inheritance.
+union TTypeEntry {
+  1: TPrimitiveTypeEntry primitiveEntry
+  2: TArrayTypeEntry arrayEntry
+  3: TMapTypeEntry mapEntry
+  4: TStructTypeEntry structEntry
+  5: TUnionTypeEntry unionEntry
+  6: TUserDefinedTypeEntry userDefinedTypeEntry
+}
+
+// Type descriptor for columns.
+struct TTypeDesc {
+  // The "top" type is always the first element of the list.
+  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
+  // type, then subsequent elements represent nested types.
+  1: required list<TTypeEntry> types
+}
+
+// A result set column descriptor.
+struct TColumnDesc {
+  // The name of the column
+  1: required string columnName
+
+  // The type descriptor for this column
+  2: required TTypeDesc typeDesc
+
+  // The ordinal position of this column in the schema
+  3: required i32 position
+
+  4: optional string comment
+}
+
+// Metadata used to describe the schema (column names, types, comments)
+// of result sets.
+struct TTableSchema {
+  1: required list<TColumnDesc> columns
+}
+
+// A Boolean column value.
+struct TBoolValue {
+  // NULL if value is unset.
+  1: optional bool value
+}
+
+// A Byte column value.
+struct TByteValue {
+  // NULL if value is unset.
+  1: optional byte value
+}
+
+// A signed, 16 bit column value.
+struct TI16Value {
+  // NULL if value is unset
+  1: optional i16 value
+}
+
+// A signed, 32 bit column value
+struct TI32Value {
+  // NULL if value is unset
+  1: optional i32 value
+}
+
+// A signed 64 bit column value
+struct TI64Value {
+  // NULL if value is unset
+  1: optional i64 value
+}
+
+// A floating point 64 bit column value
+struct TDoubleValue {
+  // NULL if value is unset
+  1: optional double value
+}
+
+struct TStringValue {
+  // NULL if value is unset
+  1: optional string value
+}
+
+// A single column value in a result set.
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumnValue {
+  1: TBoolValue   boolVal      // BOOLEAN
+  2: TByteValue   byteVal      // TINYINT
+  3: TI16Value    i16Val       // SMALLINT
+  4: TI32Value    i32Val       // INT
+  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
+  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL
+}
+
+// Represents a row in a rowset.
+struct TRow {
+  1: required list<TColumnValue> colVals
+}
+
+struct TBoolColumn {
+  1: required list<bool> values
+  2: required binary nulls
+}
+
+struct TByteColumn {
+  1: required list<byte> values
+  2: required binary nulls
+}
+
+struct TI16Column {
+  1: required list<i16> values
+  2: required binary nulls
+}
+
+struct TI32Column {
+  1: required list<i32> values
+  2: required binary nulls
+}
+
+struct TI64Column {
+  1: required list<i64> values
+  2: required binary nulls
+}
+
+struct TDoubleColumn {
+  1: required list<double> values
+  2: required binary nulls
+}
+
+struct TStringColumn {
+  1: required list<string> values
+  2: required binary nulls
+}
+
+struct TBinaryColumn {
+  1: required list<binary> values
+  2: required binary nulls
+}
+
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumn {
+  1: TBoolColumn   boolVal      // BOOLEAN
+  2: TByteColumn   byteVal      // TINYINT
+  3: TI16Column    i16Val       // SMALLINT
+  4: TI32Column    i32Val       // INT
+  5: TI64Column    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleColumn doubleVal    // FLOAT, DOUBLE
+  7: TStringColumn stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL
+  8: TBinaryColumn binaryVal    // BINARY
+}
+
+// Represents a rowset
+struct TRowSet {
+  // The starting row offset of this rowset.
+  1: required i64 startRowOffset
+  2: required list<TRow> rows
+  3: optional list<TColumn> columns
+}
+
+// The return status code contained in each response.
+enum TStatusCode {
+  SUCCESS_STATUS,
+  SUCCESS_WITH_INFO_STATUS,
+  STILL_EXECUTING_STATUS,
+  ERROR_STATUS,
+  INVALID_HANDLE_STATUS
+}
+
+// The return status of a remote request
+struct TStatus {
+  1: required TStatusCode statusCode
+
+  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
+  // additional diagnostic information.
+  2: optional list<string> infoMessages
+
+  // If status is ERROR, then the following fields may be set
+  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
+  4: optional i32 errorCode    // internal error code
+  5: optional string errorMessage
+}
+
+// The state of an operation (i.e. a query or other
+// asynchronous operation that generates a result set)
+// on the server.
+enum TOperationState {
+  // The operation has been initialized
+  INITIALIZED_STATE,
+
+  // The operation is running. In this state the result
+  // set is not available.
+  RUNNING_STATE,
+
+  // The operation has completed. When an operation is in
+  // this state its result set may be fetched.
+  FINISHED_STATE,
+
+  // The operation was canceled by a client
+  CANCELED_STATE,
+
+  // The operation was closed by a client
+  CLOSED_STATE,
+
+  // The operation failed due to an error
+  ERROR_STATE,
+
+  // The operation is in an unrecognized state
+  UKNOWN_STATE,
+
+  // The operation is in an pending state
+  PENDING_STATE,
+}
+
+// A string identifier. This is interpreted literally.
+typedef string TIdentifier
+
+// A search pattern.
+//
+// Valid search pattern characters:
+// '_': Any single character.
+// '%': Any sequence of zero or more characters.
+// '\': Escape character used to include special characters,
+//      e.g. '_', '%', '\'. If a '\' precedes a non-special
+//      character it has no special meaning and is interpreted
+//      literally.
+typedef string TPattern
+
+
+// A search pattern or identifier. Used as input
+// parameter for many of the catalog functions.
+typedef string TPatternOrIdentifier
+
+struct THandleIdentifier {
+  // 16 byte globally unique identifier
+  // This is the public ID of the handle and
+  // can be used for reporting.
+  1: required binary guid,
+
+  // 16 byte secret generated by the server
+  // and used to verify that the handle is not
+  // being hijacked by another user.
+  2: required binary secret,
+}
+
+// Client-side handle to persistent
+// session information on the server-side.
+struct TSessionHandle {
+  1: required THandleIdentifier sessionId
+}
+
+// The subtype of an OperationHandle.
+enum TOperationType {
+  EXECUTE_STATEMENT,
+  GET_TYPE_INFO,
+  GET_CATALOGS,
+  GET_SCHEMAS,
+  GET_TABLES,
+  GET_TABLE_TYPES,
+  GET_COLUMNS,
+  GET_FUNCTIONS,
+  UNKNOWN,
+}
+
+// Client-side reference to a task running
+// asynchronously on the server.
+struct TOperationHandle {
+  1: required THandleIdentifier operationId
+  2: required TOperationType operationType
+
+  // If hasResultSet = TRUE, then this operation
+  // generates a result set that can be fetched.
+  // Note that the result set may be empty.
+  //
+  // If hasResultSet = FALSE, then this operation
+  // does not generate a result set, and calling
+  // GetResultSetMetadata or FetchResults against
+  // this OperationHandle will generate an error.
+  3: required bool hasResultSet
+
+  // For operations that don't generate result sets,
+  // modifiedRowCount is either:
+  //
+  // 1) The number of rows that were modified by
+  //    the DML operation (e.g. number of rows inserted,
+  //    number of rows deleted, etc).
+  //
+  // 2) 0 for operations that don't modify or add rows.
+  //
+  // 3) < 0 if the operation is capable of modifiying rows,
+  //    but Hive is unable to determine how many rows were
+  //    modified. For example, Hive's LOAD DATA command
+  //    doesn't generate row count information because
+  //    Hive doesn't inspect the data as it is loaded.
+  //
+  // modifiedRowCount is unset if the operation generates
+  // a result set.
+  4: optional double modifiedRowCount
+}
+
+
+// OpenSession()
+//
+// Open a session (connection) on the server against
+// which operations may be executed.
+struct TOpenSessionReq {
+  // The version of the HiveServer2 protocol that the client is using.
+  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6
+
+  // Username and password for authentication.
+  // Depending on the authentication scheme being used,
+  // this information may instead be provided by a lower
+  // protocol layer, in which case these fields may be
+  // left unset.
+  2: optional string username
+  3: optional string password
+
+  // Configuration overlay which is applied when the session is
+  // first created.
+  4: optional map<string, string> configuration
+}
+
+struct TOpenSessionResp {
+  1: required TStatus status
+
+  // The protocol version that the server is using.
+  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6
+
+  // Session Handle
+  3: optional TSessionHandle sessionHandle
+
+  // The configuration settings for this session.
+  4: optional map<string, string> configuration
+}
+
+
+// CloseSession()
+//
+// Closes the specified session and frees any resources
+// currently allocated to that session. Any open
+// operations in that session will be canceled.
+struct TCloseSessionReq {
+  1: required TSessionHandle sessionHandle
+}
+
+struct TCloseSessionResp {
+  1: required TStatus status
+}
+
+
+
+enum TGetInfoType {
+  CLI_MAX_DRIVER_CONNECTIONS =           0,
+  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
+  CLI_DATA_SOURCE_NAME =                 2,
+  CLI_FETCH_DIRECTION =                  8,
+  CLI_SERVER_NAME =                      13,
+  CLI_SEARCH_PATTERN_ESCAPE =            14,
+  CLI_DBMS_NAME =                        17,
+  CLI_DBMS_VER =                         18,
+  CLI_ACCESSIBLE_TABLES =                19,
+  CLI_ACCESSIBLE_PROCEDURES =            20,
+  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
+  CLI_DATA_SOURCE_READ_ONLY =            25,
+  CLI_DEFAULT_TXN_ISOLATION =            26,
+  CLI_IDENTIFIER_CASE =                  28,
+  CLI_IDENTIFIER_QUOTE_CHAR =            29,
+  CLI_MAX_COLUMN_NAME_LEN =              30,
+  CLI_MAX_CURSOR_NAME_LEN =              31,
+  CLI_MAX_SCHEMA_NAME_LEN =              32,
+  CLI_MAX_CATALOG_NAME_LEN =             34,
+  CLI_MAX_TABLE_NAME_LEN =               35,
+  CLI_SCROLL_CONCURRENCY =               43,
+  CLI_TXN_CAPABLE =                      46,
+  CLI_USER_NAME =                        47,
+  CLI_TXN_ISOLATION_OPTION =             72,
+  CLI_INTEGRITY =                        73,
+  CLI_GETDATA_EXTENSIONS =               81,
+  CLI_NULL_COLLATION =                   85,
+  CLI_ALTER_TABLE =                      86,
+  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
+  CLI_SPECIAL_CHARACTERS =               94,
+  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
+  CLI_MAX_COLUMNS_IN_INDEX =             98,
+  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
+  CLI_MAX_COLUMNS_IN_SELECT =            100,
+  CLI_MAX_COLUMNS_IN_TABLE =             101,
+  CLI_MAX_INDEX_SIZE =                   102,
+  CLI_MAX_ROW_SIZE =                     104,
+  CLI_MAX_STATEMENT_LEN =                105,
+  CLI_MAX_TABLES_IN_SELECT =             106,
+  CLI_MAX_USER_NAME_LEN =                107,
+  CLI_OJ_CAPABILITIES =                  115,
+
+  CLI_XOPEN_CLI_YEAR =                   10000,
+  CLI_CURSOR_SENSITIVITY =               10001,
+  CLI_DESCRIBE_PARAMETER =               10002,
+  CLI_CATALOG_NAME =                     10003,
+  CLI_COLLATION_SEQ =                    10004,
+  CLI_MAX_IDENTIFIER_LEN =               10005,
+}
+
+union TGetInfoValue {
+  1: string stringValue
+  2: i16 smallIntValue
+  3: i32 integerBitmask
+  4: i32 integerFlag
+  5: i32 binaryValue
+  6: i64 lenValue
+}
+
+// GetInfo()
+//
+// This function is based on ODBC's CLIGetInfo() function.
+// The function returns general information about the data source
+// using the same keys as ODBC.
+struct TGetInfoReq {
+  // The sesssion to run this request against
+  1: required TSessionHandle sessionHandle
+
+  2: required TGetInfoType infoType
+}
+
+struct TGetInfoResp {
+  1: required TStatus status
+
+  2: required TGetInfoValue infoValue
+}
+
+
+// ExecuteStatement()
+//
+// Execute a statement.
+// The returned OperationHandle can be used to check on the
+// status of the statement, and to fetch results once the
+// statement has finished executing.
+struct TExecuteStatementReq {
+  // The session to execute the statement against
+  1: required TSessionHandle sessionHandle
+
+  // The statement to be executed (DML, DDL, SET, etc)
+  2: required string statement
+
+  // Configuration properties that are overlayed on top of the
+  // the existing session configuration before this statement
+  // is executed. These properties apply to this statement
+  // only and will not affect the subsequent state of the Session.
+  3: optional map<string, string> confOverlay
+
+  // Execute asynchronously when runAsync is true
+  4: optional bool runAsync = false
+}
+
+struct TExecuteStatementResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+// GetTypeInfo()
+//
+// Get information about types supported by the HiveServer instance.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+//
+// Refer to the documentation for ODBC's CLIGetTypeInfo function for
+// the format of the result set.
+struct TGetTypeInfoReq {
+  // The session to run this request against.
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTypeInfoResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetCatalogs()
+//
+// Returns the list of catalogs (databases)
+// Results are ordered by TABLE_CATALOG
+//
+// Resultset columns :
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+struct TGetCatalogsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetCatalogsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetSchemas()
+//
+// Retrieves the schema names available in this database.
+// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
+// col1
+// name: TABLE_SCHEM
+// type: STRING
+// desc: schema name
+// col2
+// name: TABLE_CATALOG
+// type: STRING
+// desc: catalog name
+struct TGetSchemasReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // schema name or pattern
+  3: optional TPatternOrIdentifier schemaName
+}
+
+struct TGetSchemasResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTables()
+//
+// Returns a list of tables with catalog, schema, and table
+// type information. The information is returned as a result
+// set which can be fetched using the OperationHandle
+// provided in the response.
+// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
+//
+// Result Set Columns:
+//
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+// col2
+// name: TABLE_SCHEM
+// type: STRING
+// desc: Schema name.
+//
+// col3
+// name: TABLE_NAME
+// type: STRING
+// desc: Table name.
+//
+// col4
+// name: TABLE_TYPE
+// type: STRING
+// desc: The table type, e.g. "TABLE", "VIEW", etc.
+//
+// col5
+// name: REMARKS
+// type: STRING
+// desc: Comments about the table
+//
+struct TGetTablesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog or a search pattern.
+  2: optional TPatternOrIdentifier catalogName
+
+  // Name of the schema or a search pattern.
+  3: optional TPatternOrIdentifier schemaName
+
+  // Name of the table or a search pattern.
+  4: optional TPatternOrIdentifier tableName
+
+  // List of table types to match
+  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
+  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
+  5: optional list<string> tableTypes
+}
+
+struct TGetTablesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTableTypes()
+//
+// Returns the table types available in this database.
+// The results are ordered by table type.
+//
+// col1
+// name: TABLE_TYPE
+// type: STRING
+// desc: Table type name.
+struct TGetTableTypesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTableTypesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetColumns()
+//
+// Returns a list of columns in the specified tables.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME,
+// and ORDINAL_POSITION.
+//
+// Result Set Columns are the same as those for the ODBC CLIColumns
+// function.
+//
+struct TGetColumnsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // Schema name or search pattern
+  3: optional TPatternOrIdentifier schemaName
+
+  // Table name or search pattern
+  4: optional TPatternOrIdentifier tableName
+
+  // Column name or search pattern
+  5: optional TPatternOrIdentifier columnName
+}
+
+struct TGetColumnsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetFunctions()
+//
+// Returns a list of functions supported by the data source. The
+// behavior of this function matches
+// java.sql.DatabaseMetaData.getFunctions() both in terms of
+// inputs and outputs.
+//
+// Result Set Columns:
+//
+// col1
+// name: FUNCTION_CAT
+// type: STRING
+// desc: Function catalog (may be null)
+//
+// col2
+// name: FUNCTION_SCHEM
+// type: STRING
+// desc: Function schema (may be null)
+//
+// col3
+// name: FUNCTION_NAME
+// type: STRING
+// desc: Function name. This is the name used to invoke the function.
+//
+// col4
+// name: REMARKS
+// type: STRING
+// desc: Explanatory comment on the function.
+//
+// col5
+// name: FUNCTION_TYPE
+// type: SMALLINT
+// desc: Kind of function. One of:
+//       * functionResultUnknown - Cannot determine if a return value or a table
+//                                 will be returned.
+//       * functionNoTable       - Does not a return a table.
+//       * functionReturnsTable  - Returns a table.
+//
+// col6
+// name: SPECIFIC_NAME
+// type: STRING
+// desc: The name which uniquely identifies this function within its schema.
+//       In this case this is the fully qualified class name of the class
+//       that implements this function.
+//
+struct TGetFunctionsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // A catalog name; must match the catalog name as it is stored in the
+  // database; "" retrieves those without a catalog; null means
+  // that the catalog name should not be used to narrow the search.
+  2: optional TIdentifier catalogName
+
+  // A schema name pattern; must match the schema name as it is stored
+  // in the database; "" retrieves those without a schema; null means
+  // that the schema name should not be used to narrow the search.
+  3: optional TPatternOrIdentifier schemaName
+
+  // A function name pattern; must match the function name as it is stored
+  // in the database.
+  4: required TPatternOrIdentifier functionName
+}
+
+struct TGetFunctionsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetOperationStatus()
+//
+// Get the status of an operation running on the server.
+struct TGetOperationStatusReq {
+  // Session to run this request against
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetOperationStatusResp {
+  1: required TStatus status
+  2: optional TOperationState operationState
+
+  // If operationState is ERROR_STATE, then the following fields may be set
+  // sqlState as defined in the ISO/IEF CLI specification
+  3: optional string sqlState
+
+  // Internal error code
+  4: optional i32 errorCode
+
+  // Error message
+  5: optional string errorMessage
+}
+
+
+// CancelOperation()
+//
+// Cancels processing on the specified operation handle and
+// frees any resources which were allocated.
+struct TCancelOperationReq {
+  // Operation to cancel
+  1: required TOperationHandle operationHandle
+}
+
+struct TCancelOperationResp {
+  1: required TStatus status
+}
+
+
+// CloseOperation()
+//
+// Given an operation in the FINISHED, CANCELED,
+// or ERROR states, CloseOperation() will free
+// all of the resources which were allocated on
+// the server to service the operation.
+struct TCloseOperationReq {
+  1: required TOperationHandle operationHandle
+}
+
+struct TCloseOperationResp {
+  1: required TStatus status
+}
+
+
+// GetResultSetMetadata()
+//
+// Retrieves schema information for the specified operation
+struct TGetResultSetMetadataReq {
+  // Operation for which to fetch result set schema information
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetResultSetMetadataResp {
+  1: required TStatus status
+  2: optional TTableSchema schema
+}
+
+
+enum TFetchOrientation {
+  // Get the next rowset. The fetch offset is ignored.
+  FETCH_NEXT,
+
+  // Get the previous rowset. The fetch offset is ignored.
+  // NOT SUPPORTED
+  FETCH_PRIOR,
+
+  // Return the rowset at the given fetch offset relative
+  // to the curren rowset.
+  // NOT SUPPORTED
+  FETCH_RELATIVE,
+
+  // Return the rowset at the specified fetch offset.
+  // NOT SUPPORTED
+  FETCH_ABSOLUTE,
+
+  // Get the first rowset in the result set.
+  FETCH_FIRST,
+
+  // Get the last rowset in the result set.
+  // NOT SUPPORTED
+  FETCH_LAST
+}
+
+// FetchResults()
+//
+// Fetch rows from the server corresponding to
+// a particular OperationHandle.
+struct TFetchResultsReq {
+  // Operation from which to fetch results.
+  1: required TOperationHandle operationHandle
+
+  // The fetch orientation. For V1 this must be either
+  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
+
+  // Max number of rows that should be returned in
+  // the rowset.
+  3: required i64 maxRows
+}
+
+struct TFetchResultsResp {
+  1: required TStatus status
+
+  // TRUE if there are more rows left to fetch from the server.
+  2: optional bool hasMoreRows
+
+  // The rowset. This is optional so that we have the
+  // option in the future of adding alternate formats for
+  // representing result set data, e.g. delimited strings,
+  // binary encoded, etc.
+  3: optional TRowSet results
+}
+
+// GetDelegationToken()
+// Retrieve delegation token for the current user
+struct  TGetDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // userid for the proxy user
+  2: required string owner
+
+  // designated renewer userid
+  3: required string renewer
+}
+
+struct TGetDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+
+  // delegation token string
+  2: optional string delegationToken
+}
+
+// CancelDelegationToken()
+// Cancel the given delegation token
+struct TCancelDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // delegation token to cancel
+  2: required string delegationToken
+}
+
+struct TCancelDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+}
+
+// RenewDelegationToken()
+// Renew the given delegation token
+struct TRenewDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // delegation token to renew
+  2: required string delegationToken
+}
+
+struct TRenewDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+}
+
+// GetLog()
+// Not present in Hive 0.13, re-added for backwards compatibility.
+//
+// Fetch operation log from the server corresponding to
+// a particular OperationHandle.
+struct TGetLogReq {
+  // Operation whose log is requested
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetLogResp {
+  1: required TStatus status
+  2: required string log
+}
+
+service TCLIService {
+
+  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
+
+  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
+
+  TGetInfoResp GetInfo(1:TGetInfoReq req);
+
+  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
+
+  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
+
+  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
+
+  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
+
+  TGetTablesResp GetTables(1:TGetTablesReq req);
+
+  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
+
+  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
+
+  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
+
+  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
+
+  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
+
+  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
+
+  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
+
+  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
+
+  TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req);
+
+  TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req);
+
+  TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req);
+
+  // Not present in Hive 0.13, re-added for backwards compatibility.
+  TGetLogResp GetLog(1:TGetLogReq req);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/pom.xml
----------------------------------------------------------------------
diff --git a/fe/pom.xml b/fe/pom.xml
index c4e7c44..3399544 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -40,6 +40,7 @@ under the License.
     <testExecutionMode>reduced</testExecutionMode>
     <hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
     <hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
+    <hive.major.version>${env.IMPALA_HIVE_MAJOR_VERSION}</hive.major.version>
     <sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
     <hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
@@ -493,6 +494,7 @@ under the License.
                         -->
                 <source>${project.basedir}/generated-sources/gen-java</source>
                 <source>${project.build.directory}/generated-sources/cup</source>
+                <source>${project.basedir}/src/compat-hive-${hive.major.version}/java</source>
               </sources>
             </configuration>
           </execution>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
new file mode 100644
index 0000000..6a264bd
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetCatalogsReq extends org.apache.hive.service.cli.thrift.TGetCatalogsReq {}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
new file mode 100644
index 0000000..b35819a
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetColumnsReq extends org.apache.hive.service.cli.thrift.TGetColumnsReq {}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
new file mode 100644
index 0000000..63424eb
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetFunctionsReq
+    extends org.apache.hive.service.cli.thrift.TGetFunctionsReq {}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
new file mode 100644
index 0000000..708134d
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetInfoReq extends org.apache.hive.service.cli.thrift.TGetInfoReq {}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
new file mode 100644
index 0000000..3b6ec26
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetSchemasReq extends org.apache.hive.service.cli.thrift.TGetSchemasReq {}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bdad90e6/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
new file mode 100644
index 0000000..fd309d4
--- /dev/null
+++ b/fe/src/compat-hive-1/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.hive.service.rpc.thrift;
+
+/**
+ * Wrapper to allow the same code to instantiate the equivalent classes from Hive 1 and
+ * Hive 2 APIs.
+ */
+public class TGetTablesReq extends org.apache.hive.service.cli.thrift.TGetTablesReq {}

[5/7] incubator-impala git commit: IMPALA-5120: Default to partitioned join when stats are missing

Posted by ta...@apache.org.

IMPALA-5120: Default to partitioned join when stats are missing

Previously, we defaulted to broadcast join when stats were
missing, but this can lead to disastrous plans when the
right hand side is actually large.

Its always difficult to make good plans when stats are missing,
but defaulting to partitioned joins should reduce the risk of
disastrous plans.

Testing:
- Added a planner test that joins a table with no stats.

Change-Id: Ie168ecfcd5e7c5d3c60d16926c151f8f134c81e0
Reviewed-on: http://gerrit.cloudera.org:8080/6803
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/aca07ee8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/aca07ee8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/aca07ee8

Branch: refs/heads/master
Commit: aca07ee8160bbea0812dc4ba3c08dff818240d22
Parents: 374f112
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu May 4 13:51:08 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon May 8 19:05:11 2017 +0000

----------------------------------------------------------------------
 .../impala/planner/DistributedPlanner.java      |  3 ++-
 .../queries/PlannerTest/joins.test              | 22 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aca07ee8/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
index e0e325c..83c8ccb 100644
--- a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
@@ -458,7 +458,8 @@ public class DistributedPlanner {
     // repartition: both left- and rightChildFragment are partitioned on the
     // join exprs, and a hash table is built with the rightChildFragment's output.
     PlanNode lhsTree = leftChildFragment.getPlanRoot();
-    long partitionCost = Long.MAX_VALUE;
+    // Subtract 1 here so that if stats are missing we default to partitioned.
+    long partitionCost = Long.MAX_VALUE - 1;
     List<Expr> lhsJoinExprs = Lists.newArrayList();
     List<Expr> rhsJoinExprs = Lists.newArrayList();
     for (Expr joinConjunct: node.getEqJoinConjuncts()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aca07ee8/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 0fdb19d..26b8c64 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -2519,3 +2519,25 @@ PLAN-ROOT SINK
 00:SCAN HDFS [tpch.customer a]
    partitions=1/1 files=1 size=23.08MB
 ====
+# If stats aren't available, default to partitioned join.
+select * from functional.tinytable x, functional.tinytable y where x.a = y.a
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+05:EXCHANGE [UNPARTITIONED]
+|
+02:HASH JOIN [INNER JOIN, PARTITIONED]
+|  hash predicates: x.a = y.a
+|  runtime filters: RF000 <- y.a
+|
+|--04:EXCHANGE [HASH(y.a)]
+|  |
+|  01:SCAN HDFS [functional.tinytable y]
+|     partitions=1/1 files=1 size=38B
+|
+03:EXCHANGE [HASH(x.a)]
+|
+00:SCAN HDFS [functional.tinytable x]
+   partitions=1/1 files=1 size=38B
+   runtime filters: RF000 -> x.a
+====

[4/7] incubator-impala git commit: IMPALA-3224: De-Cloudera non-docs JIRA URLs

Posted by ta...@apache.org.

IMPALA-3224: De-Cloudera non-docs JIRA URLs

John Russell is planning to fix the URLS in docs in a separate commit.

Fixed using:

    (git ls-files | xargs replace \
    'https://issues.cloudera.org/browse/IMPALA' 'IMPALA' --) && \
    git checkout HEAD docs

Change-Id: I28ea06e89341de234f9005fdc72a2e43f0ab8182
Reviewed-on: http://gerrit.cloudera.org:8080/6487
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/374f1121
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/374f1121
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/374f1121

Branch: refs/heads/master
Commit: 374f1121dab4deb2ac18fd204156f45e3f3221c1
Parents: 5cab97f
Author: Jim Apple <jb...@apache.org>
Authored: Sat Mar 25 12:57:18 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sun May 7 04:44:57 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/CreateTableStmt.java   |  3 +--
 shell/shell_output.py                             |  3 +--
 testdata/bin/compute-table-stats.sh               |  3 +--
 testdata/bin/create-load-data.sh                  | 14 +++++---------
 testdata/bin/load-test-warehouse-snapshot.sh      |  4 ++--
 testdata/bin/setup-hdfs-env.sh                    |  2 +-
 tests/comparison/db_connection.py                 |  3 +--
 tests/comparison/discrepancy_searcher.py          | 18 ++++++++----------
 tests/comparison/query_generator.py               |  4 ++--
 tests/custom_cluster/test_kudu_not_available.py   |  3 +--
 tests/stress/concurrent_select.py                 |  3 +--
 11 files changed, 24 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
index 8ee7927..21bcb33 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
@@ -247,8 +247,7 @@ public class CreateTableStmt extends StatementBase {
     }
 
     // TODO: Find out what is creating a directory in HDFS and stop doing that. Kudu
-    //       tables shouldn't have HDFS dirs.
-    //       https://issues.cloudera.org/browse/IMPALA-3570
+    //       tables shouldn't have HDFS dirs: IMPALA-3570
     AnalysisUtils.throwIfNotNull(getCachingOp(),
         "A Kudu table cannot be cached in HDFS.");
     AnalysisUtils.throwIfNotNull(getLocation(), "LOCATION cannot be specified for a " +

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/shell/shell_output.py
----------------------------------------------------------------------
diff --git a/shell/shell_output.py b/shell/shell_output.py
index 35ba920..f0cecc8 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -36,8 +36,7 @@ class PrettyOutputFormatter(object):
     except Exception, e:
       # beeswax returns each row as a tab separated string. If a string column
       # value in a row has tabs, it will break the row split. Default to displaying
-      # raw results. This will change with a move to hiveserver2.
-      # Reference:  https://issues.cloudera.org/browse/IMPALA-116
+      # raw results. This will change with a move to hiveserver2. Reference: IMPALA-116
       error_msg = ("Prettytable cannot resolve string columns values that have "
                    " embedded tabs. Reverting to tab delimited text output")
       print >>sys.stderr, error_msg

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/testdata/bin/compute-table-stats.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index f06efab..de33017 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -24,8 +24,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
 
 . ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 
-# TODO: We need a better way of managing how these get set. See:
-# https://issues.cloudera.org/browse/IMPALA-4346
+# TODO: We need a better way of managing how these get set. See IMPALA-4346
 IMPALAD=${IMPALAD:-localhost:21000}
 
 COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad=${IMPALAD}"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 7a351fc..bc24485 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -35,8 +35,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
 . ${IMPALA_HOME}/testdata/bin/run-step.sh
 
 # Environment variables used to direct the data loading process to an external cluster.
-# TODO: We need a better way of managing how these get set. See:
-# https://issues.cloudera.org/browse/IMPALA-4346
+# TODO: We need a better way of managing how these get set. See IMPALA-4346
 : ${HS2_HOST_PORT=localhost:11050}
 : ${HDFS_NN=localhost:20500}
 : ${IMPALAD=localhost:21000}
@@ -181,8 +180,7 @@ function load-data {
     WORKLOAD="functional"
   fi
 
-  # TODO: Why is there a REMOTE_LOAD condition?
-  # See https://issues.cloudera.org/browse/IMPALA-4347
+  # TODO: Why is there a REMOTE_LOAD condition? See IMPALA-4347
   #
   # Force load the dataset if we detect a schema change.
   if [[ -z "$REMOTE_LOAD" ]]; then
@@ -296,7 +294,7 @@ function copy-and-load-dependent-tables {
   # The error occurs while loading dependent tables.
   #
   # See: logs/data_loading/copy-and-load-dependent-tables.log)
-  # See also: https://issues.cloudera.org/browse/IMPALA-4345
+  # See also: IMPALA-4345
   hadoop fs -chmod -R 777 /tmp/alltypes_rc
   hadoop fs -chmod -R 777 /tmp/alltypes_seq
 
@@ -341,8 +339,7 @@ function load-custom-data {
   # Cleanup the old bad_text_lzo files, if they exist.
   hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
 
-  # TODO: Why is there a REMOTE_LOAD condition?
-  # See https://issues.cloudera.org/browse/IMPALA-4347
+  # TODO: Why is there a REMOTE_LOAD condition? See IMPALA-4347
   if [[ -z $REMOTE_LOAD ]]; then
     # Index all lzo files in HDFS under /test-warehouse
     ${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
@@ -414,8 +411,7 @@ function build-and-copy-hive-udfs {
 
 # Additional data loading actions that must be executed after the main data is loaded.
 function custom-post-load-steps {
-  # TODO: Why is there a REMOTE_LOAD condition?
-  # See https://issues.cloudera.org/browse/IMPALA-4347
+  # TODO: Why is there a REMOTE_LOAD condition? See IMPALA-4347
   if [[ -z "$REMOTE_LOAD" ]]; then
     # Configure alltypes_seq as a read-only table. This is required for fe tests.
     # Set both read and execute permissions because accessing the contents of a directory on

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/testdata/bin/load-test-warehouse-snapshot.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh
index 604c8f1..311a9ea 100755
--- a/testdata/bin/load-test-warehouse-snapshot.sh
+++ b/testdata/bin/load-test-warehouse-snapshot.sh
@@ -73,8 +73,8 @@ if [[ "$REPLY" =~ ^[Yy]$ ]]; then
     echo "Creating ${TEST_WAREHOUSE_DIR} directory"
     hadoop fs -mkdir -p ${FILESYSTEM_PREFIX}${TEST_WAREHOUSE_DIR}
 
-    # TODO: commented out because of regressions in local end-to-end testing
-    # See: https://issues.cloudera.org/browse/IMPALA-4345
+    # TODO: commented out because of regressions in local end-to-end testing. See
+    # IMPALA-4345
     #
     # hdfs dfs -chmod 1777 ${FILESYSTEM_PREFIX}${TEST_WAREHOUSE_DIR}
   fi

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/testdata/bin/setup-hdfs-env.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/setup-hdfs-env.sh b/testdata/bin/setup-hdfs-env.sh
index 259cac1..ece94de 100755
--- a/testdata/bin/setup-hdfs-env.sh
+++ b/testdata/bin/setup-hdfs-env.sh
@@ -43,7 +43,7 @@ fi
 
 # TODO: Investigate how to setup encryption keys for running HDFS encryption tests
 # against a remote cluster, rather than the local mini-cluster (i.e., when REMOTE_LOAD
-# is true. See: https://issues.cloudera.org/browse/IMPALA-4344)
+# is true. See: IMPALA-4344)
 
 if [[ $TARGET_FILESYSTEM == hdfs && -z "$REMOTE_LOAD" ]]; then  # Otherwise assume KMS isn't setup.
   # Create encryption keys for HDFS encryption tests. Keys are stored by the KMS.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/tests/comparison/db_connection.py
----------------------------------------------------------------------
diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py
index 8722755..6f9b7ce 100644
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -833,8 +833,7 @@ class ImpalaCursor(DbCursor):
 
   def close(self, quiet=False):
     try:
-      # Explicitly close the operation to avoid issues like
-      # https://issues.cloudera.org/browse/IMPALA-2562.
+      # Explicitly close the operation to avoid issues like IMPALA-2562.
       # This can be remove if https://github.com/cloudera/impyla/pull/142 is merged.
       self._cursor.close_operation()
       self._cursor.close()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/tests/comparison/discrepancy_searcher.py
----------------------------------------------------------------------
diff --git a/tests/comparison/discrepancy_searcher.py b/tests/comparison/discrepancy_searcher.py
index 6534e4d..ccbdd66 100755
--- a/tests/comparison/discrepancy_searcher.py
+++ b/tests/comparison/discrepancy_searcher.py
@@ -128,10 +128,10 @@ class QueryResultComparator(object):
         if 'Expressions in the ORDER BY clause must not be constant' in error_message \
             or 'Expressions in the PARTITION BY clause must not be consta' in error_message:
           # It's too much work to avoid this bug. Just ignore it if it comes up.
-          known_error = KnownError('https://issues.cloudera.org/browse/IMPALA-1354')
+          known_error = KnownError('IMPALA-1354')
         elif 'GROUP BY expression must not contain aggregate functions' in error_message \
             or 'select list expression not produced by aggregation output' in error_message:
-          known_error = KnownError('https://issues.cloudera.org/browse/IMPALA-1423')
+          known_error = KnownError('IMPALA-1423')
         elif ('max(' in error_message or 'min(' in error_message) \
             and 'only supported with an UNBOUNDED PRECEDING start bound' in error_message:
           # This analytic isn't supported and ignoring this here is much easier than not
@@ -139,18 +139,18 @@ class QueryResultComparator(object):
           known_error = KnownError('MAX UNBOUNDED PRECISION')
         elif 'IN and/or EXISTS subquery predicates are not supported in binary predicates' \
             in error_message:
-          known_error = KnownError('https://issues.cloudera.org/browse/IMPALA-1418')
+          known_error = KnownError('IMPALA-1418')
         elif 'Unsupported predicate with subquery' in error_message:
-          known_error = KnownError('https://issues.cloudera.org/browse/IMPALA-1950')
+          known_error = KnownError('IMPALA-1950')
         elif 'RIGHT OUTER JOIN type with no equi-join' in error_message:
-          known_error = KnownError('https://issues.cloudera.org/browse/IMPALA-3063')
+          known_error = KnownError('IMPALA-3063')
         elif 'Operation is in ERROR_STATE' in error_message:
           known_error = KnownError('Mem limit exceeded')
       elif self.test_db_type is db_connection.HIVE:
         if 'ParseException line' in error_message and 'missing ) at' in \
               error_message and query.select_clause and \
               query.select_clause.analytic_items:
-          known_error = KnownError("https://issues.apache.org/jira/browse/HIVE-14871")
+          known_error = KnownError("HIVE-14871")
 
       if known_error:
         comparison_result.exception = known_error
@@ -189,13 +189,11 @@ class QueryResultComparator(object):
             and isinstance(ref_val, (int, float, Decimal)) \
             and abs(ref_val) > BigInt.MAX:
           # Impala will return incorrect results if the val is greater than max BigInt
-          comparison_result.exception = KnownError(
-              'https://issues.cloudera.org/browse/IMPALA-865')
+          comparison_result.exception = KnownError('IMPALA-865')
         elif isinstance(test_val, float) \
             and (isinf(test_val) or isnan(test_val)):
           # In some cases, Impala gives NaNs and Infs instead of NULLs
-          comparison_result.exception = KnownError(
-              'https://issues.cloudera.org/browse/IMPALA-724')
+          comparison_result.exception = KnownError('IMPALA-724')
         comparison_result.ref_row = ref_row
         comparison_result.test_row = test_row
         comparison_result.mismatch_at_row_number = row_idx + 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/tests/comparison/query_generator.py
----------------------------------------------------------------------
diff --git a/tests/comparison/query_generator.py b/tests/comparison/query_generator.py
index 22e92f2..be51910 100644
--- a/tests/comparison/query_generator.py
+++ b/tests/comparison/query_generator.py
@@ -514,7 +514,7 @@ class QueryGenerator(object):
               select_item_data_types=select_item_data_types,
               required_table_expr_col_type=join_expr_type,
               require_aggregate=use_agg_subquery,
-              # Don't use UNION + LIMIT; https://issues.cloudera.org/browse/IMPALA-1379
+              # Don't use UNION + LIMIT; IMPALA-1379
               allow_union_clause=(not signature_arg.is_subquery),
               table_alias_prefix=(table_alias_prefix +
                   ('t' if use_correlated_subquery else '')),
@@ -1322,7 +1322,7 @@ class QueryGenerator(object):
       predicate, _ = self._create_boolean_func_tree()
     predicate = self.populate_func_with_vals(
         predicate, val_exprs=basic_select_item_exprs)
-    # https://issues.cloudera.org/browse/IMPALA-1423
+    # IMPALA-1423
     # Make sure any cols used have a table identifier. As of this writing the only
     # single table FROM clauses don't use table aliases. Setting a table alias
     # automatically propagates as a column table identifier ("t1.col" instead of "col").

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/tests/custom_cluster/test_kudu_not_available.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_kudu_not_available.py b/tests/custom_cluster/test_kudu_not_available.py
index 5fb3df5..090854d 100644
--- a/tests/custom_cluster/test_kudu_not_available.py
+++ b/tests/custom_cluster/test_kudu_not_available.py
@@ -43,8 +43,7 @@ class TestKuduNotAvailable(CustomClusterTestSuite):
         return
       cursor.execute("USE functional_kudu")
 
-      # CREATE TABLE succeeds, the execution is in the frontend only.
-      # https://issues.cloudera.org/browse/IMPALA-3233
+      # CREATE TABLE succeeds, the execution is in the frontend only. See IMPALA-3233
       self.assert_failure("SELECT * FROM tinytable", cursor)
       self.assert_failure("INSERT INTO tinytable VALUES ('a', 'b')", cursor)
       self.assert_failure("DELETE FROM tinytable", cursor)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/374f1121/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index 7aa44b7..b95c427 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -900,8 +900,7 @@ class QueryRunner(object):
             op_handle_to_query_id(cursor._last_operation.handle), e)
     caught_msg = str(caught_exception).lower().strip()
 
-    # Exceeding a mem limit may result in the message "cancelled".
-    # https://issues.cloudera.org/browse/IMPALA-2234
+    # Exceeding a mem limit may result in the message "cancelled". See IMPALA-2234
     if "memory limit exceeded" in caught_msg or \
        "repartitioning did not reduce the size of a spilled partition" in caught_msg or \
        caught_msg == "cancelled":

[2/7] incubator-impala git commit: IMPALA-3654: Parquet stats filtering for IN predicate

Posted by ta...@apache.org.

IMPALA-3654: Parquet stats filtering for IN predicate

This generates min/max predicates for InPredicates that
have only constant values in the IN list. It is only
used for statistics filtering on Parquet files.

Change-Id: I4a88963a7206f40a867e49eceeaf03fdd4f71997
Reviewed-on: http://gerrit.cloudera.org:8080/6810
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/aa05c649
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/aa05c649
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/aa05c649

Branch: refs/heads/master
Commit: aa05c6493b0ff8bbf422a4c38cf780bde34d51c7
Parents: c26a485
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Fri Apr 14 11:59:08 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat May 6 03:40:57 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/HdfsScanNode.java | 110 +++++++++++++------
 .../queries/PlannerTest/parquet-filtering.test  |  39 ++++++-
 .../queries/QueryTest/parquet_stats.test        |  17 +++
 3 files changed, 130 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aa05c649/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 2a9c84c..bd260e0 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -32,6 +32,10 @@ import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.BinaryPredicate;
 import org.apache.impala.analysis.DescriptorTable;
 import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.FunctionCallExpr;
+import org.apache.impala.analysis.InPredicate;
+import org.apache.impala.analysis.LiteralExpr;
+import org.apache.impala.analysis.NullLiteral;
 import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.SlotId;
 import org.apache.impala.analysis.SlotRef;
@@ -326,6 +330,77 @@ public class HdfsScanNode extends ScanNode {
     minMaxConjuncts_.add(statsPred);
   }
 
+  private void tryComputeBinaryMinMaxPredicate(Analyzer analyzer,
+      BinaryPredicate binaryPred) {
+    // We only support slot refs on the left hand side of the predicate, a rewriting
+    // rule makes sure that all compatible exprs are rewritten into this form. Only
+    // implicit casts are supported.
+    SlotRef slot = binaryPred.getChild(0).unwrapSlotRef(true);
+    if (slot == null) return;
+
+    // This node is a table scan, so this must be a scanning slot.
+    Preconditions.checkState(slot.getDesc().isScanSlot());
+    // If the column is null, then this can be a 'pos' scanning slot of a nested type.
+    if (slot.getDesc().getColumn() == null) return;
+
+    Expr constExpr = binaryPred.getChild(1);
+    // Only constant exprs can be evaluated against parquet::Statistics. This includes
+    // LiteralExpr, but can also be an expr like "1 + 2".
+    if (!constExpr.isConstant()) return;
+    if (constExpr.isNullLiteral()) return;
+
+    BinaryPredicate.Operator op = binaryPred.getOp();
+    if (op == BinaryPredicate.Operator.LT || op == BinaryPredicate.Operator.LE ||
+        op == BinaryPredicate.Operator.GE || op == BinaryPredicate.Operator.GT) {
+      minMaxOriginalConjuncts_.add(binaryPred);
+      buildStatsPredicate(analyzer, slot, binaryPred, op);
+    } else if (op == BinaryPredicate.Operator.EQ) {
+      minMaxOriginalConjuncts_.add(binaryPred);
+      // TODO: this could be optimized for boolean columns.
+      buildStatsPredicate(analyzer, slot, binaryPred, BinaryPredicate.Operator.LE);
+      buildStatsPredicate(analyzer, slot, binaryPred, BinaryPredicate.Operator.GE);
+    }
+  }
+
+  private void tryComputeInListMinMaxPredicate(Analyzer analyzer, InPredicate inPred) {
+    // Retrieve the left side of the IN predicate. It must be a simple slot to
+    // proceed.
+    SlotRef slot = inPred.getBoundSlot();
+    if (slot == null) return;
+    // This node is a table scan, so this must be a scanning slot.
+    Preconditions.checkState(slot.getDesc().isScanSlot());
+    // If the column is null, then this can be a 'pos' scanning slot of a nested type.
+    if (slot.getDesc().getColumn() == null) return;
+    if (inPred.isNotIn()) return;
+
+    ArrayList<Expr> children = inPred.getChildren();
+    LiteralExpr min = null;
+    LiteralExpr max = null;
+    for (int i = 1; i < children.size(); ++i) {
+      Expr child = children.get(i);
+
+      // If any child is not a literal, then nothing can be done
+      if (!child.isLiteral()) return;
+      LiteralExpr literalChild = (LiteralExpr) child;
+      // If any child is NULL, then there is not a valid min/max. Nothing can be done.
+      if (literalChild instanceof NullLiteral) return;
+
+      if (min == null || literalChild.compareTo(min) < 0) min = literalChild;
+      if (max == null || literalChild.compareTo(max) > 0) max = literalChild;
+    }
+    Preconditions.checkState(min != null);
+    Preconditions.checkState(max != null);
+
+    BinaryPredicate minBound = new BinaryPredicate(BinaryPredicate.Operator.GE,
+        children.get(0).clone(), min.clone());
+    BinaryPredicate maxBound = new BinaryPredicate(BinaryPredicate.Operator.LE,
+        children.get(0).clone(), max.clone());
+
+    minMaxOriginalConjuncts_.add(inPred);
+    buildStatsPredicate(analyzer, slot, minBound, minBound.getOp());
+    buildStatsPredicate(analyzer, slot, maxBound, maxBound.getOp());
+  }
+
   /**
    * Analyzes 'conjuncts_', populates 'minMaxTuple_' with slots for statistics values, and
    * populates 'minMaxConjuncts_' with conjuncts pointing into the 'minMaxTuple_'. Only
@@ -340,38 +415,11 @@ public class HdfsScanNode extends ScanNode {
     minMaxTuple_.setPath(desc_.getPath());
 
     for (Expr pred: conjuncts_) {
-      if (!(pred instanceof BinaryPredicate)) continue;
-      BinaryPredicate binaryPred = (BinaryPredicate) pred;
-
-      // We only support slot refs on the left hand side of the predicate, a rewriting
-      // rule makes sure that all compatible exprs are rewritten into this form. Only
-      // implicit casts are supported.
-      SlotRef slot = binaryPred.getChild(0).unwrapSlotRef(true);
-      if (slot == null) continue;
-
-      // This node is a table scan, so this must be a scanning slot.
-      Preconditions.checkState(slot.getDesc().isScanSlot());
-      // If the column is null, then this can be a 'pos' scanning slot of a nested type.
-      if (slot.getDesc().getColumn() == null) continue;
-
-      Expr constExpr = binaryPred.getChild(1);
-      // Only constant exprs can be evaluated against parquet::Statistics. This includes
-      // LiteralExpr, but can also be an expr like "1 + 2".
-      if (!constExpr.isConstant()) continue;
-      if (constExpr.isNullLiteral()) continue;
-
-      BinaryPredicate.Operator op = binaryPred.getOp();
-      if (op == BinaryPredicate.Operator.LT || op == BinaryPredicate.Operator.LE ||
-          op == BinaryPredicate.Operator.GE || op == BinaryPredicate.Operator.GT) {
-        minMaxOriginalConjuncts_.add(pred);
-        buildStatsPredicate(analyzer, slot, binaryPred, op);
-      } else if (op == BinaryPredicate.Operator.EQ) {
-        minMaxOriginalConjuncts_.add(pred);
-        // TODO: this could be optimized for boolean columns.
-        buildStatsPredicate(analyzer, slot, binaryPred, BinaryPredicate.Operator.LE);
-        buildStatsPredicate(analyzer, slot, binaryPred, BinaryPredicate.Operator.GE);
+      if (pred instanceof BinaryPredicate) {
+        tryComputeBinaryMinMaxPredicate(analyzer, (BinaryPredicate) pred);
+      } else if (pred instanceof InPredicate) {
+        tryComputeInListMinMaxPredicate(analyzer, (InPredicate) pred);
       }
-
     }
     minMaxTuple_.computeMemLayout();
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aa05c649/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
index df5f99d..31451aa 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
@@ -28,9 +28,10 @@ PLAN-ROOT SINK
 ====
 # Test a variety of types
 select count(*) from functional_parquet.alltypes
-where id = 1 and bool_col and tinyint_col < 50 and smallint_col > 50
+where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
-and double_col > 100.00 and date_string_col > '1993-10-01' and string_col > 'aaaa'
+and double_col > 100.00 and date_string_col > '1993-10-01'
+and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
@@ -45,11 +46,39 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    partitions=22/24 files=22 size=143.36KB
-   predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, smallint_col > 50, tinyint_col < 50, string_col > 'aaaa', mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+   predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
    table stats: unavailable
    columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
-   parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, smallint_col > 50, tinyint_col < 50, string_col > 'aaaa', date_string_col > '1993-10-01'
-   parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, smallint_col > 50, tinyint_col < 50, string_col > 'aaaa', mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+   parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), date_string_col > '1993-10-01'
+   parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
    mem-estimate=128.00MB mem-reservation=0B
    tuple-ids=0 row-size=80B cardinality=unavailable
 ====
+# Test negative cases for IN predicate min/max filtering
+#  - NOT IN
+#  - IN list with NULL
+#  - IN list contains non-Literals
+#  - complex expression on left side of IN
+select count(*) from functional_parquet.alltypes
+where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
+and mod(int_col,50) IN (0,1)
+and id IN (int_col);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+  PLAN-ROOT SINK
+  |  mem-estimate=0B mem-reservation=0B
+  |
+  01:AGGREGATE [FINALIZE]
+  |  output: count(*)
+  |  mem-estimate=10.00MB mem-reservation=0B
+  |  tuple-ids=1 row-size=8B cardinality=1
+  |
+  00:SCAN HDFS [functional_parquet.alltypes]
+     partitions=24/24 files=24 size=173.09KB
+     predicates: id IN (int_col), id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1)
+     table stats: unavailable
+     column stats: unavailable
+     parquet dictionary predicates: id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1)
+     mem-estimate=48.00MB mem-reservation=0B
+     tuple-ids=0 row-size=24B cardinality=unavailable
+====
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/aa05c649/testdata/workloads/functional-query/queries/QueryTest/parquet_stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet_stats.test b/testdata/workloads/functional-query/queries/QueryTest/parquet_stats.test
index 6f9393d..f88d1df 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet_stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet_stats.test
@@ -279,3 +279,20 @@ select count(*) from functional_parquet.complextypestbl.int_array where pos < 5;
 row_regex: .*NumRowGroups: 2 .*
 row_regex: .*NumStatsFilteredRowGroups: 0 .*
 ====
+---- QUERY
+# Test the conversion of constant IN lists to min/max predicats
+select count(*) from functional_parquet.alltypes where int_col in (-1,-2,-3,-4);
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 24
+aggregation(SUM, NumStatsFilteredRowGroups): 24
+====
+---- QUERY
+select count(*) from functional_parquet.alltypes where id IN (1,25,49);
+---- RESULTS
+3
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 24
+aggregation(SUM, NumStatsFilteredRowGroups): 23
+====
\ No newline at end of file