You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2012/09/18 23:41:20 UTC
svn commit: r1387369 - in /hbase/trunk/hbase-server/src: main/ruby/hbase/table.rb main/ruby/shell/commands/get.rb main/ruby/shell/commands/scan.rb test/ruby/hbase/table_test.rb

Author: stack
Date: Tue Sep 18 21:41:20 2012
New Revision: 1387369

URL: http://svn.apache.org/viewvc?rev=1387369&view=rev
Log:
HBASE-6592 [shell] Add means of custom formatting output by column

Modified:
    hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb
    hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb
    hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb
    hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb

Modified: hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb?rev=1387369&r1=1387368&r2=1387369&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb (original)
+++ hbase/trunk/hbase-server/src/main/ruby/hbase/table.rb Tue Sep 18 21:41:20 2012
@@ -113,6 +113,7 @@ EOF
       @table = org.apache.hadoop.hbase.client.HTable.new(configuration, table_name)
       @name = table_name
       @shell = shell
+      @converters = Hash.new()
     end
 
     # Note the below methods are prefixed with '_' to hide them from the average user, as
@@ -187,7 +188,8 @@ EOF
     def _get_internal(row, *args)
       get = org.apache.hadoop.hbase.client.Get.new(row.to_s.to_java_bytes)
       maxlength = -1
-
+      @converters.clear()
+      
       # Normalize args
       args = args.first if args.first.kind_of?(Hash)
       if args.kind_of?(String) || args.kind_of?(Array)
@@ -299,6 +301,7 @@ EOF
 
       limit = args.delete("LIMIT") || -1
       maxlength = args.delete("MAXLENGTH") || -1
+      @converters.clear()
 
       if args.any?
         filter = args["FILTER"]
@@ -450,6 +453,7 @@ EOF
     # Returns family and (when has it) qualifier for a column name
     def parse_column_name(column)
       split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes)
+      set_converter(split) if split.length > 1
       return split[0], (split.length > 1) ? split[1] : nil
     end
 
@@ -474,9 +478,42 @@ EOF
       if kv.isDelete
         val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}"
       else
-        val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getValue)}"
+        val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}"
       end
       (maxlength != -1) ? val[0, maxlength] : val
     end
+    
+    def convert(column, kv)
+      #use org.apache.hadoop.hbase.util.Bytes as the default class
+      klazz_name = 'org.apache.hadoop.hbase.util.Bytes'
+      #use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor
+      converter = 'toStringBinary'
+      if @converters.has_key?(column)
+        # lookup the CONVERTER for certain column - "cf:qualifier"
+        matches = /c\((.+)\)\.(.+)/.match(@converters[column])
+        if matches.nil?
+          # cannot match the pattern of 'c(className).functionname'
+          # use the default klazz_name
+          converter = @converters[column] 
+        else
+          klazz_name = matches[1]
+          converter = matches[2]
+        end
+      end
+      method = eval(klazz_name).method(converter)
+      return method.call(kv.getValue) # apply the converter
+    end
+    
+    # if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
+    # 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
+    # 2. register the CONVERTER information based on column spec - "cf:qualifier"
+    def set_converter(column)
+      family = String.from_java_bytes(column[0])
+      parts = org.apache.hadoop.hbase.KeyValue.parseColumn(column[1])
+      if parts.length > 1
+        @converters["#{family}:#{String.from_java_bytes(parts[0])}"] = String.from_java_bytes(parts[1])
+        column[1] = parts[0]
+      end
+    end
   end
 end

Modified: hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb?rev=1387369&r1=1387368&r2=1387369&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb (original)
+++ hbase/trunk/hbase-server/src/main/ruby/shell/commands/get.rb Tue Sep 18 21:41:20 2012
@@ -36,8 +36,23 @@ a dictionary of column(s), timestamp, ti
   hbase> get 't1', 'r1', 'c1', 'c2'
   hbase> get 't1', 'r1', ['c1', 'c2']
 
+Besides the default 'toStringBinary' format, 'get' also supports custom formatting by
+column.  A user can define a FORMATTER by adding it to the column name in the get
+specification.  The FORMATTER can be stipulated: 
+
+ 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
+ 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
+
+Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: 
+  hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt',
+    'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } 
+
+Note that you can specify a FORMATTER by column only (cf:qualifer).  You cannot specify
+a FORMATTER for all columns of a column family.
+    
 The same commands also can be run on a reference to a table (obtained via get_table or
- create_table). Suppose you had a reference t to table 't1', the corresponding commands would be:
+create_table). Suppose you had a reference t to table 't1', the corresponding commands
+would be:
 
   hbase> t.get 'r1'
   hbase> t.get 'r1', {TIMERANGE => [ts1, ts2]}

Modified: hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb?rev=1387369&r1=1387368&r2=1387369&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb (original)
+++ hbase/trunk/hbase-server/src/main/ruby/shell/commands/scan.rb Tue Sep 18 21:41:20 2012
@@ -42,8 +42,10 @@ Some examples:
   hbase> scan '.META.', {COLUMNS => 'info:regioninfo'}
   hbase> scan 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
   hbase> scan 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]}
-  hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"}
-  hbase> scan 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
+  hbase> scan 't1', {FILTER => "(PrefixFilter ('row2') AND
+    (QualifierFilter (>=, 'binary:xyz'))) AND (TimestampsFilter ( 123, 456))"}
+  hbase> scan 't1', {FILTER =>
+    org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
 
 For experts, there is an additional option -- CACHE_BLOCKS -- which
 switches block caching for the scanner on (true) or off (false).  By
@@ -58,13 +60,29 @@ Disabled by default.  Example:
 
   hbase> scan 't1', {RAW => true, VERSIONS => 10}
 
-Scan can also be used directly from a table, by first getting a reference to a table, like such:
+Besides the default 'toStringBinary' format, 'scan' supports custom formatting
+by column.  A user can define a FORMATTER by adding it to the column name in
+the scan specification.  The FORMATTER can be stipulated: 
+
+ 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString)
+ 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'.
+
+Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: 
+  hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt',
+    'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } 
+
+Note that you can specify a FORMATTER by column only (cf:qualifer).  You cannot
+specify a FORMATTER for all columns of a column family.
+
+Scan can also be used directly from a table, by first getting a reference to a
+table, like such:
 
   hbase> t = get_table 't'
   hbase> t.scan
 
-Note in the above situation, you can still provide all the filtering, columns, options, etc as
-described above.
+Note in the above situation, you can still provide all the filtering, columns,
+options, etc as described above.
+
 EOF
       end
 

Modified: hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb?rev=1387369&r1=1387368&r2=1387369&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb (original)
+++ hbase/trunk/hbase-server/src/test/ruby/hbase/table_test.rb Tue Sep 18 21:41:20 2012
@@ -311,6 +311,22 @@ module Hbase
       @test_table._get_internal('1') { |col, val| res[col] = val }
       assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
     end
+    
+    define_test "get should support COLUMNS with value CONVERTER information" do
+        @test_table.put(1, "x:c", [1024].pack('N'))
+        @test_table.put(1, "x:d", [98].pack('N'))
+        begin
+          res = @test_table._get_internal('1', ['x:c:toInt'], ['x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'])
+          assert_not_nil(res)
+          assert_kind_of(Hash, res)
+          assert_not_nil(/value=1024/.match(res['x:c']))
+          assert_not_nil(/value=98/.match(res['x:d']))
+        ensure
+          # clean up newly added columns for this test only.
+          @test_table.delete(1, "x:c")
+          @test_table.delete(1, "x:d")
+        end
+    end
 
     #-------------------------------------------------------------------------------
 
@@ -417,5 +433,22 @@ module Hbase
       res = @test_table._scan_internal { |row, cells| rows[row] = cells }
       assert_equal(rows.keys.size, res)
     end
+    
+    define_test "scan should support COLUMNS with value CONVERTER information" do
+      @test_table.put(1, "x:c", [1024].pack('N'))
+      @test_table.put(1, "x:d", [98].pack('N'))
+      begin
+        res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
+        assert_not_nil(res)
+        assert_kind_of(Hash, res)
+        assert_not_nil(/value=1024/.match(res['1']['x:c']))
+        assert_not_nil(/value=98/.match(res['1']['x:d']))
+      ensure
+        # clean up newly added columns for this test only.
+        @test_table.delete(1, "x:c")
+        @test_table.delete(1, "x:d")
+    end
+end
+    
   end
 end