You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hbase.apache.org by zg...@apache.org on 2020/05/25 02:37:20 UTC

[hbase] branch branch-2.3 updated (9f69ad4 -> 9216c90)

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a change to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    from 9f69ad4  HBASE-24408 Introduce a general 'local region' to store data on master (#1753)
     new 210f22e  HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)
     new 9216c90  HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../mapreduce/TableSnapshotInputFormatImpl.java    | 23 +++++++---
 .../mapreduce/TestTableSnapshotInputFormat.java    | 51 ++++++++++++++++++++++
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 12 ++---
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  4 +-
 .../hadoop/hbase/client/TestFromClientSide5.java   | 24 +++++++++-
 5 files changed, 99 insertions(+), 15 deletions(-)

[hbase] 01/02: HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)

Posted by zg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 210f22edd32d8ce50a871a4f190d67e3e03af1a6
Author: niuyulin <ny...@163.com>
AuthorDate: Sun May 24 20:55:48 2020 -0500

    HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../mapreduce/TableSnapshotInputFormatImpl.java    | 23 +++++++---
 .../mapreduce/TestTableSnapshotInputFormat.java    | 51 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
index 9758f15..28b832e 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -102,6 +102,12 @@ public class TableSnapshotInputFormatImpl {
   public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT = true;
 
   /**
+   * In some scenario, scan limited rows on each InputSplit for sampling data extraction
+   */
+  public static final String SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT =
+      "hbase.TableSnapshotInputFormat.row.limit.per.inputsplit";
+
+  /**
    * Implementation class for InputSplit logic common between mapred and mapreduce.
    */
   public static class InputSplit implements Writable {
@@ -213,6 +219,8 @@ public class TableSnapshotInputFormatImpl {
     private Result result = null;
     private ImmutableBytesWritable row = null;
     private ClientSideRegionScanner scanner;
+    private int numOfCompleteRows = 0;
+    private int rowLimitPerSplit;
 
     public ClientSideRegionScanner getScanner() {
       return scanner;
@@ -221,6 +229,7 @@ public class TableSnapshotInputFormatImpl {
     public void initialize(InputSplit split, Configuration conf) throws IOException {
       this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
       this.split = split;
+      this.rowLimitPerSplit = conf.getInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 0);
       TableDescriptor htd = split.htd;
       HRegionInfo hri = this.split.getRegionInfo();
       FileSystem fs = CommonFSUtils.getCurrentFileSystem(conf);
@@ -244,6 +253,9 @@ public class TableSnapshotInputFormatImpl {
         return false;
       }
 
+      if (rowLimitPerSplit > 0 && ++this.numOfCompleteRows > rowLimitPerSplit) {
+        return false;
+      }
       if (this.row == null) {
         this.row = new ImmutableBytesWritable();
       }
@@ -296,10 +308,11 @@ public class TableSnapshotInputFormatImpl {
     return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
   }
 
-  public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException{
+  public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException {
     String splitAlgoClassName = conf.get(SPLIT_ALGO);
-    if (splitAlgoClassName == null)
+    if (splitAlgoClassName == null) {
       return null;
+    }
     try {
       return Class.forName(splitAlgoClassName).asSubclass(RegionSplitter.SplitAlgorithm.class)
           .getDeclaredConstructor().newInstance();
@@ -511,9 +524,9 @@ public class TableSnapshotInputFormatImpl {
    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
    * @param conf the job to configure
    * @param snapshotName the name of the snapshot to read from
-   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restoreDir can be deleted.
+   * @param restoreDir a temporary directory to restore the snapshot into. Current user should have
+   *          write permissions to this directory, and this should not be a subdirectory of rootdir.
+   *          After the job is finished, restoreDir can be deleted.
    * @param numSplitsPerRegion how many input splits to generate per one region
    * @param splitAlgo SplitAlgorithm to be used when generating InputSplits
    * @throws IOException if an error occurs
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
index 5f187c6..d98340f 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;
 
 import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT;
 import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY;
+import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -305,6 +306,56 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
   }
 
   @Test
+  public void testScanLimit() throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final String snapshotName = tableName + "Snapshot";
+    Table table = null;
+    try {
+      UTIL.getConfiguration().setInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 10);
+      if (UTIL.getAdmin().tableExists(tableName)) {
+        UTIL.deleteTable(tableName);
+      }
+
+      UTIL.createTable(tableName, FAMILIES, new byte[][] { bbb, yyy });
+
+      Admin admin = UTIL.getAdmin();
+
+      int regionNum = admin.getRegions(tableName).size();
+      // put some stuff in the table
+      table = UTIL.getConnection().getTable(tableName);
+      UTIL.loadTable(table, FAMILIES);
+
+      Path rootDir = CommonFSUtils.getRootDir(UTIL.getConfiguration());
+      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+        null, snapshotName, rootDir, fs, true);
+
+      Job job = new Job(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+      Scan scan = new Scan();
+      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+        TestTableSnapshotInputFormat.class);
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+        RowCounter.RowCounterMapper.class, NullWritable.class, NullWritable.class, job, true,
+        tmpTableDir);
+      Assert.assertTrue(job.waitForCompletion(true));
+      Assert.assertEquals(10 * regionNum,
+        job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue());
+    } finally {
+      if (table != null) {
+        table.close();
+      }
+      UTIL.getConfiguration().unset(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT);
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  @Test
   public void testNoDuplicateResultsWhenSplitting() throws Exception {
     setupCluster();
     TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");

[hbase] 02/02: HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)

Posted by zg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 9216c906c6348510cfd2e2a70ecd1765370b6bc6
Author: wenbang <81...@qq.com>
AuthorDate: Mon May 25 09:58:01 2020 +0800

    HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 12 +++++------
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  4 +---
 .../hadoop/hbase/client/TestFromClientSide5.java   | 24 +++++++++++++++++++++-
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 59b652d..e8a066c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -8218,14 +8218,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
           break;
         default: throw new UnsupportedOperationException(op.toString());
       }
-      int newCellSize = PrivateCellUtil.estimatedSerializedSizeOf(newCell);
-      if (newCellSize > this.maxCellSize) {
-        String msg = "Cell with size " + newCellSize + " exceeds limit of " + this.maxCellSize
-          + " bytes in region " + this;
-        if (LOG.isDebugEnabled()) {
+      if (this.maxCellSize > 0) {
+        int newCellSize = PrivateCellUtil.estimatedSerializedSizeOf(newCell);
+        if (newCellSize > this.maxCellSize) {
+          String msg = "Cell with size " + newCellSize + " exceeds limit of " + this.maxCellSize
+            + " bytes in region " + this;
           LOG.debug(msg);
+          throw new DoNotRetryIOException(msg);
         }
-        throw new DoNotRetryIOException(msg);
       }
       cellPairs.add(new Pair<>(currentValue, newCell));
       // Add to results to get returned to the Client. If null, cilent does not want results.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 4119e4f..8f57a37 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -973,9 +973,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
         int size = PrivateCellUtil.estimatedSerializedSizeOf(cells.current());
         if (size > r.maxCellSize) {
           String msg = "Cell with size " + size + " exceeds limit of " + r.maxCellSize + " bytes";
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(msg);
-          }
+          LOG.debug(msg);
           throw new DoNotRetryIOException(msg);
         }
       }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java
index cff4959..0ae15ca 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java
@@ -2235,7 +2235,7 @@ public class TestFromClientSide5 extends FromClientSideBase {
 
   @Test
   public void testCellSizeLimit() throws IOException {
-    final TableName tableName = TableName.valueOf("testCellSizeLimit");
+    final TableName tableName = name.getTableName();
     TableDescriptorBuilder.ModifyableTableDescriptor tableDescriptor =
       new TableDescriptorBuilder.ModifyableTableDescriptor(tableName)
         .setValue(HRegion.HBASE_MAX_CELL_SIZE_KEY, Integer.toString(10 * 1024));
@@ -2273,6 +2273,28 @@ public class TestFromClientSide5 extends FromClientSideBase {
   }
 
   @Test
+  public void testCellSizeNoLimit() throws IOException {
+    final TableName tableName = name.getTableName();
+    ColumnFamilyDescriptor familyDescriptor =
+      new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(FAMILY);
+    TableDescriptorBuilder.ModifyableTableDescriptor tableDescriptor =
+      new TableDescriptorBuilder.ModifyableTableDescriptor(tableName)
+        .setValue(HRegion.HBASE_MAX_CELL_SIZE_KEY, Integer.toString(0));
+    tableDescriptor.setColumnFamily(familyDescriptor);
+
+    try (Admin admin = TEST_UTIL.getAdmin()) {
+      admin.createTable(tableDescriptor);
+    }
+
+    // Will succeed
+    try (Table ht = TEST_UTIL.getConnection().getTable(tableName)) {
+      ht.put(new Put(ROW).addColumn(FAMILY, QUALIFIER,  new byte[HRegion.DEFAULT_MAX_CELL_SIZE -
+        1024]));
+      ht.append(new Append(ROW).addColumn(FAMILY, QUALIFIER, new byte[1024 + 1]));
+    }
+  }
+
+  @Test
   public void testDeleteSpecifiedVersionOfSpecifiedColumn() throws Exception {
     final TableName tableName = name.getTableName();