You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ma...@apache.org on 2016/08/22 17:16:58 UTC

incubator-impala git commit: IMPALA-3988: Only use first 96 bits of query id

Repository: incubator-impala
Updated Branches:
  refs/heads/master b251fb061 -> b69e469e9


IMPALA-3988: Only use first 96 bits of query id

This adds utility functions in uid-util.h to create query and instance
ids and convert between them. It also adapts SimpleScheduler to
utilize those functions when creating the instance id
(TPlanFragmentInstanceCtx.fragment_instance_id).

Change-Id: Ia56a03ed9a1d7e77c72b66a01cd48c5b6bf3624f
Reviewed-on: http://gerrit.cloudera.org:8080/4065
Reviewed-by: Marcel Kornacker <ma...@cloudera.com>
Tested-by: Marcel Kornacker <ma...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b69e469e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b69e469e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b69e469e

Branch: refs/heads/master
Commit: b69e469e99e4b0c08bc4718eb55b041cf7c11ffe
Parents: b251fb0
Author: Marcel Kornacker <ma...@cloudera.com>
Authored: Fri Aug 19 09:32:22 2016 -0700
Committer: Marcel Kornacker <ma...@cloudera.com>
Committed: Mon Aug 22 17:15:37 2016 +0000

----------------------------------------------------------------------
 be/src/scheduling/simple-scheduler.cc      | 11 ++----
 be/src/service/impala-server.cc            |  2 +-
 be/src/util/CMakeLists.txt                 |  1 +
 be/src/util/uid-util-test.cc               | 49 +++++++++++++++++++++++++
 be/src/util/uid-util.h                     | 34 +++++++++++++++++
 common/thrift/ImpalaInternalService.thrift |  9 ++++-
 6 files changed, 96 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/be/src/scheduling/simple-scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler.cc b/be/src/scheduling/simple-scheduler.cc
index a4d866f..5c05557 100644
--- a/be/src/scheduling/simple-scheduler.cc
+++ b/be/src/scheduling/simple-scheduler.cc
@@ -502,14 +502,9 @@ void SimpleScheduler::ComputeFragmentExecParams(const TQueryExecRequest& exec_re
   int64_t num_fragment_instances = 0;
   for (FragmentExecParams& params: *fragment_exec_params) {
     for (int j = 0; j < params.hosts.size(); ++j) {
-      int instance_num = num_fragment_instances + j;
-      // we add instance_num to query_id.lo to create a globally-unique instance id
-      TUniqueId instance_id;
-      instance_id.hi = schedule->query_id().hi;
-      DCHECK_LT(
-          schedule->query_id().lo, numeric_limits<int64_t>::max() - instance_num - 1);
-      instance_id.lo = schedule->query_id().lo + instance_num + 1;
-      params.instance_ids.push_back(instance_id);
+      int instance_idx = num_fragment_instances + j;
+      params.instance_ids.push_back(
+          CreateInstanceId(schedule->query_id(), instance_idx));
     }
     num_fragment_instances += params.hosts.size();
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 9bb8c77..c749a6a 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -856,7 +856,7 @@ void ImpalaServer::PrepareQueryContext(TQueryCtx* query_ctx) {
   // thread-safe).
   random_generator uuid_generator;
   uuid query_uuid = uuid_generator();
-  UUIDToTUniqueId(query_uuid, &query_ctx->query_id);
+  query_ctx->query_id = UuidToQueryId(query_uuid);
 }
 
 Status ImpalaServer::RegisterQuery(shared_ptr<SessionState> session_state,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 3421c46..dc2df01 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -141,3 +141,4 @@ ADD_BE_TEST(fixed-size-hash-table-test)
 ADD_BE_TEST(bloom-filter-test)
 ADD_BE_TEST(logging-support-test)
 ADD_BE_TEST(hdfs-util-test)
+ADD_BE_TEST(uid-util-test)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/be/src/util/uid-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/uid-util-test.cc b/be/src/util/uid-util-test.cc
new file mode 100644
index 0000000..a789562
--- /dev/null
+++ b/be/src/util/uid-util-test.cc
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <iostream>
+#include <gtest/gtest.h>
+
+#include "util/uid-util.h"
+
+namespace impala {
+
+TEST(UidUtil, FragmentInstanceId) {
+  boost::uuids::random_generator uuid_generator;
+  boost::uuids::uuid query_uuid = uuid_generator();
+  TUniqueId query_id = UuidToQueryId(query_uuid);
+
+  for (int i = 0; i < 100; ++i) {
+    TUniqueId instance_id = CreateInstanceId(query_id, i);
+    EXPECT_EQ(instance_id.hi, query_id.hi);
+
+    TUniqueId qid = GetQueryId(instance_id);
+    EXPECT_EQ(qid.hi, query_id.hi);
+    EXPECT_EQ(qid.lo, query_id.lo);
+
+    EXPECT_EQ(GetInstanceIdx(instance_id), i);
+  }
+}
+
+}
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/be/src/util/uid-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/uid-util.h b/be/src/util/uid-util.h
index 84684d4..c78a9ea 100644
--- a/be/src/util/uid-util.h
+++ b/be/src/util/uid-util.h
@@ -19,6 +19,7 @@
 #ifndef IMPALA_UTIL_UID_UTIL_H
 #define IMPALA_UTIL_UID_UTIL_H
 
+#include <boost/functional/hash.hpp>
 #include <boost/uuid/uuid.hpp>
 #include <boost/uuid/uuid_generators.hpp>
 
@@ -27,6 +28,7 @@
 #include "common/names.h"
 
 namespace impala {
+
 /// This function must be called 'hash_value' to be picked up by boost.
 inline std::size_t hash_value(const impala::TUniqueId& id) {
   std::size_t seed = 0;
@@ -43,6 +45,38 @@ inline void UUIDToTUniqueId(const boost::uuids::uuid& uuid, T* unique_id) {
   memcpy(&(unique_id->lo), &uuid.data[8], 8);
 }
 
+/// Query id: uuid with bottom 4 bytes set to 0
+/// Fragment instance id: query id with instance index stored in the bottom 4 bytes
+
+const int64_t FRAGMENT_IDX_MASK = (1L << 32) - 1;
+
+inline TUniqueId UuidToQueryId(const boost::uuids::uuid& uuid) {
+  TUniqueId result;
+  memcpy(&result.hi, &uuid.data[0], 8);
+  memcpy(&result.lo, &uuid.data[8], 8);
+  result.lo &= ~FRAGMENT_IDX_MASK;  // zero out bottom 4 bytes
+  return result;
+}
+
+inline TUniqueId GetQueryId(const TUniqueId& fragment_instance_id) {
+  TUniqueId result = fragment_instance_id;
+  result.lo &= ~FRAGMENT_IDX_MASK;  // zero out bottom 4 bytes
+  return result;
+}
+
+inline int32_t GetInstanceIdx(const TUniqueId& fragment_instance_id) {
+  return fragment_instance_id.lo & FRAGMENT_IDX_MASK;
+}
+
+inline TUniqueId CreateInstanceId(
+    const TUniqueId& query_id, int32_t instance_idx) {
+  DCHECK_EQ(GetInstanceIdx(query_id), 0);  // well-formed query id
+  DCHECK_GE(instance_idx, 0);
+  TUniqueId result = query_id;
+  result.lo += instance_idx;
+  return result;
+}
+
 template <typename F, typename T>
 inline T CastTUniqueId(const F& from) {
   T to;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b69e469e/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index ae3ae27..f4b070f 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -254,6 +254,7 @@ struct TQueryCtx {
   1: required TClientRequest request
 
   // A globally unique id assigned to the entire query in the BE.
+  // The bottom 4 bytes are 0 (for details see be/src/util/uid-util.h).
   2: required Types.TUniqueId query_id
 
   // Session state including user.
@@ -326,7 +327,11 @@ struct TPlanFragmentDestination {
 // of fragment instances, the query context, the coordinator address, etc.
 // TODO: for range partitioning, we also need to specify the range boundaries
 struct TPlanFragmentInstanceCtx {
-  // the globally unique fragment instance id
+  // The globally unique fragment instance id.
+  // Format: query id + query-wide fragment index
+  // The query-wide fragment index starts at 0, so that the query id
+  // and the id of the first fragment instance (the coordinator instance)
+  // are identical.
   1: required Types.TUniqueId fragment_instance_id
 
   // Index of this fragment instance accross all instances of its parent fragment,
@@ -334,6 +339,8 @@ struct TPlanFragmentInstanceCtx {
   2: required i32 fragment_instance_idx
 
   // Index of this fragment instance in Coordinator::fragment_instance_states_.
+  // TODO: remove; this is subsumed by the query-wide instance idx embedded
+  // in the fragment_instance_id
   3: required i32 instance_state_idx
 
   // Initial scan ranges for each scan node in TPlanFragment.plan_tree