You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2017/05/01 17:47:08 UTC

hbase git commit: HBASE-17973 Expand list_regions to filter on data locality

Repository: hbase
Updated Branches:
  refs/heads/master 94c14ad0f -> 13b6fdf8a


HBASE-17973 Expand list_regions to filter on data locality


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/13b6fdf8
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/13b6fdf8
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/13b6fdf8

Branch: refs/heads/master
Commit: 13b6fdf8ad81e236632a2dc99e6c4a317213858e
Parents: 94c14ad
Author: Josh Elser <el...@apache.org>
Authored: Fri Apr 28 12:04:26 2017 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Mon May 1 13:46:41 2017 -0400

----------------------------------------------------------------------
 hbase-shell/src/main/ruby/hbase_constants.rb    |  2 +
 .../main/ruby/shell/commands/list_regions.rb    | 63 ++++++++++++++++----
 .../ruby/hbase/list_regions_test_no_cluster.rb  | 61 +++++++++++++++++++
 3 files changed, 113 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/13b6fdf8/hbase-shell/src/main/ruby/hbase_constants.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/hbase_constants.rb b/hbase-shell/src/main/ruby/hbase_constants.rb
index c02d5c6..55ae9e7 100644
--- a/hbase-shell/src/main/ruby/hbase_constants.rb
+++ b/hbase-shell/src/main/ruby/hbase_constants.rb
@@ -81,6 +81,8 @@ module HBaseConstants
   NAMESPACES = 'NAMESPACES'
   CONFIG = 'CONFIG'
   DATA = 'DATA'
+  SERVER_NAME = 'SERVER_NAME'
+  LOCALITY_THRESHOLD = 'LOCALITY_THRESHOLD'
 
   # Load constants from hbase java API
   def self.promote_constants(constants)

http://git-wip-us.apache.org/repos/asf/hbase/blob/13b6fdf8/hbase-shell/src/main/ruby/shell/commands/list_regions.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/shell/commands/list_regions.rb b/hbase-shell/src/main/ruby/shell/commands/list_regions.rb
index 527a6cb..94b7a29 100644
--- a/hbase-shell/src/main/ruby/shell/commands/list_regions.rb
+++ b/hbase-shell/src/main/ruby/shell/commands/list_regions.rb
@@ -23,37 +23,62 @@ module Shell
       def help
 
         return<<EOF
-        List all regions for a particular table as an array and also filter them by server name (optional) as prefix.
-        By default, it will return all the regions for the table
+        List all regions for a particular table as an array and also filter them by server name (optional) as prefix
+        and maximum locality (optional). By default, it will return all the regions for the table with any locality.
 
         Examples:
         hbase> list_regions 'table_name'
         hbase> list_regions 'table_name', 'server_name'
+        hbase> list_regions 'table_name', {SERVER_NAME => 'server_name', LOCALITY_THRESHOLD => 0.8}
 
 EOF
         return
       end
 
-      def command(table_name, region_server_name = "")
+      def command(table_name, options = nil)
+        if options.nil?
+          options = {}
+        elsif not options.is_a? Hash
+          # When options isn't a hash, assume it's the server name
+          # and create the hash internally
+          options = {SERVER_NAME => options}
+        end
         admin_instance = admin.instance_variable_get("@admin")
         conn_instance = admin_instance.getConnection()
         cluster_status = admin_instance.getClusterStatus()
         hregion_locator_instance = conn_instance.getRegionLocator(TableName.valueOf(table_name))
-        hregion_locator_list = hregion_locator_instance.getAllRegionLocations()
+        hregion_locator_list = hregion_locator_instance.getAllRegionLocations().to_a
         results = Array.new
 
         begin
-          hregion_locator_list.each do |hregion|
+          # Filter out region servers which we don't want, default to all RS
+          regions = hregion_locator_list.filter do |hregion|
+            server_name = options[SERVER_NAME] || '*'
+            accept_server_name? server_name, hregion.getServerName().toString()
+          end
+          # A locality threshold of "1.0" would be all regions (cannot have greater than 1 locality)
+          # Regions which have a `dataLocality` less-than-or-equal to this value are accepted
+          locality_threshold = 1.0
+          if options.has_key? LOCALITY_THRESHOLD
+            value = options[LOCALITY_THRESHOLD]
+            # Value validation. Must be a Float, and must be between [0, 1.0]
+            raise "#{LOCALITY_THRESHOLD} must be a float value" unless value.is_a? Float
+            raise "#{LOCALITY_THRESHOLD} must be between 0 and 1.0, inclusive" unless valid_locality_threshold? value
+            locality_threshold = value
+          end
+          regions.each do |hregion|
             hregion_info = hregion.getRegionInfo()
             server_name = hregion.getServerName()
-            if hregion.getServerName().toString.start_with? region_server_name
-              startKey = Bytes.toString(hregion.getRegionInfo().getStartKey())
-              endKey = Bytes.toString(hregion.getRegionInfo().getEndKey())
-              region_load_map = cluster_status.getLoad(server_name).getRegionsLoad()
-              region_load = region_load_map.get(hregion_info.getRegionName())
+            region_load_map = cluster_status.getLoad(server_name).getRegionsLoad()
+            region_load = region_load_map.get(hregion_info.getRegionName())
+            # Ignore regions which exceed our locality threshold
+            if accept_region_for_locality? region_load.getDataLocality(), locality_threshold
+              startKey = Bytes.toString(hregion_info.getStartKey())
+              endKey = Bytes.toString(hregion_info.getEndKey())
               region_store_file_size = region_load.getStorefileSizeMB()
               region_requests = region_load.getRequestsCount()
-              results << { "server" => hregion.getServerName().toString(), "name" => hregion_info.getRegionNameAsString(), "startkey" => startKey, "endkey" => endKey, "size" => region_store_file_size, "requests" => region_requests }
+              results << { "server" => hregion.getServerName().toString(), "name" => hregion_info.getRegionNameAsString(), "startkey" => startKey, "endkey" => endKey,
+                 "size" => region_store_file_size, "requests" => region_requests, "locality" => region_load.getDataLocality() }
             end
           end
         ensure
@@ -62,15 +87,27 @@ EOF
 
         @end_time = Time.now
 
-        printf("%-60s | %-60s | %-15s | %-15s | %-20s | %-20s", "SERVER_NAME", "REGION_NAME", "START_KEY", "END_KEY", "SIZE", "REQ");
+        printf("%-60s | %-60s | %-15s | %-15s | %-20s | %-20s | %-20s", "SERVER_NAME", "REGION_NAME", "START_KEY", "END_KEY", "SIZE", "REQ", "LOCALITY");
         printf("\n")
         for result in results
-          printf("%-60s | %-60s | %-15s | %-15s | %-20s | %-20s", result["server"], result["name"], result["startkey"], result["endkey"], result["size"], result["requests"]);
+          printf("%-60s | %-60s | %-15s | %-15s | %-20s | %-20s | %-20s", result["server"], result["name"], result["startkey"], result["endkey"], result["size"], result["requests"], result['locality']);
             printf("\n")
         end
         printf("%d rows", results.size)
 
       end
+
+      def valid_locality_threshold?(value)
+        value >= 0 and value <= 1.0
+      end
+
+      def accept_server_name?(desired_server_name, actual_server_name)
+        desired_server_name.eql? '*' or actual_server_name.start_with? desired_server_name
+      end
+
+      def accept_region_for_locality?(actual_locality, locality_threshold)
+        actual_locality <= locality_threshold
+      end
     end
   end
 end

http://git-wip-us.apache.org/repos/asf/hbase/blob/13b6fdf8/hbase-shell/src/test/ruby/hbase/list_regions_test_no_cluster.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/test/ruby/hbase/list_regions_test_no_cluster.rb b/hbase-shell/src/test/ruby/hbase/list_regions_test_no_cluster.rb
new file mode 100644
index 0000000..657b1bd
--- /dev/null
+++ b/hbase-shell/src/test/ruby/hbase/list_regions_test_no_cluster.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+require 'shell'
+require 'hbase_constants'
+
+include HBaseConstants
+
+module Hbase
+  class NoClusterListRegionsTest < Test::Unit::TestCase
+    include TestHelpers
+
+    define_test 'valid_locality_values' do
+      command = ::Shell::Commands::ListRegions.new(nil)
+      # Validation that a float is received is done elsewhere
+      assert command.valid_locality_threshold?(0.999)
+      assert command.valid_locality_threshold?(0.001)
+      assert command.valid_locality_threshold?(1.0)
+      assert command.valid_locality_threshold?(0.0)
+      assert_equal false, command.valid_locality_threshold?(2.0)
+      assert_equal false, command.valid_locality_threshold?(100.0)
+    end
+
+    define_test 'acceptable_server_names' do
+      command = ::Shell::Commands::ListRegions.new(nil)
+      assert command.accept_server_name?('host.domain.com', 'host.domain.com')
+      assert command.accept_server_name?('host.domain', 'host.domain.com')
+      assert command.accept_server_name?('host.dom', 'host.domain.com')
+      assert command.accept_server_name?('host1', 'host1.domain.com')
+      assert_equal false, command.accept_server_name?('host2', 'host1.domain.com')
+      assert_equal false, command.accept_server_name?('host2.domain', 'host1.domain.com')
+      assert_equal false, command.accept_server_name?('host1.niamod', 'host1.domain.com')
+    end
+
+    define_test 'valid_region_localities' do
+      command = ::Shell::Commands::ListRegions.new(nil)
+      assert command.accept_region_for_locality?(0.5, 0.8)
+      assert command.accept_region_for_locality?(0.8, 0.8)
+      assert command.accept_region_for_locality?(0.0, 1.0)
+      assert command.accept_region_for_locality?(1.0, 1.0)
+      assert_equal false, command.accept_region_for_locality?(0.01, 0.001)
+      assert_equal false, command.accept_region_for_locality?(1.0, 0.8)
+      assert_equal false, command.accept_region_for_locality?(1.0, 0.999)
+      assert_equal false, command.accept_region_for_locality?(0.5, 0.3)
+    end
+  end
+end