You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zg...@apache.org on 2020/05/25 05:17:06 UTC

[hbase] branch branch-2.2 updated (6a6788c -> 3cd1525)

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a change to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    from 6a6788c  HBASE-23771 [Flakey Tests] Test TestSplitTransactionOnCluster Again
     new fff59f4  HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)
     new 3cd1525  HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../mapreduce/TableSnapshotInputFormatImpl.java    | 23 +++++++---
 .../mapreduce/TestTableSnapshotInputFormat.java    | 51 ++++++++++++++++++++++
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 12 ++---
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  4 +-
 .../hadoop/hbase/client/TestFromClientSide.java    | 22 ++++++++++
 5 files changed, 98 insertions(+), 14 deletions(-)


[hbase] 02/02: HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)

Posted by zg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 3cd1525dccc706ffd20a16a9ae70f1017c708f6b
Author: wenbang <81...@qq.com>
AuthorDate: Mon May 25 09:58:01 2020 +0800

    HBASE-24401 Cell size limit check on append should consider 0 or less value to disable the check (#1742)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 12 ++++++------
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  4 +---
 .../hadoop/hbase/client/TestFromClientSide.java    | 22 ++++++++++++++++++++++
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 8ff2a4f..a9b3e4d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -8093,14 +8093,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
           break;
         default: throw new UnsupportedOperationException(op.toString());
       }
-      int newCellSize = PrivateCellUtil.estimatedSerializedSizeOf(newCell);
-      if (newCellSize > this.maxCellSize) {
-        String msg = "Cell with size " + newCellSize + " exceeds limit of " + this.maxCellSize
-          + " bytes in region " + this;
-        if (LOG.isDebugEnabled()) {
+      if (this.maxCellSize > 0) {
+        int newCellSize = PrivateCellUtil.estimatedSerializedSizeOf(newCell);
+        if (newCellSize > this.maxCellSize) {
+          String msg = "Cell with size " + newCellSize + " exceeds limit of " + this.maxCellSize
+            + " bytes in region " + this;
           LOG.debug(msg);
+          throw new DoNotRetryIOException(msg);
         }
-        throw new DoNotRetryIOException(msg);
       }
 
       cellPairs.add(new Pair<>(currentValue, newCell));
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 2e356c5..44bee45 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -946,9 +946,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
         int size = PrivateCellUtil.estimatedSerializedSizeOf(cells.current());
         if (size > r.maxCellSize) {
           String msg = "Cell with size " + size + " exceeds limit of " + r.maxCellSize + " bytes";
-          if (LOG.isDebugEnabled()) {
-            LOG.debug(msg);
-          }
+          LOG.debug(msg);
           throw new DoNotRetryIOException(msg);
         }
       }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
index d301447..0b1b687 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
@@ -6591,6 +6591,28 @@ public class TestFromClientSide {
   }
 
   @Test
+  public void testCellSizeNoLimit() throws IOException {
+    final TableName tableName = TableName.valueOf("testCellSizeNoLimit");
+    ColumnFamilyDescriptor familyDescriptor =
+      new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(FAMILY);
+    TableDescriptorBuilder.ModifyableTableDescriptor tableDescriptor =
+      new TableDescriptorBuilder.ModifyableTableDescriptor(tableName)
+        .setValue(HRegion.HBASE_MAX_CELL_SIZE_KEY, Integer.toString(0));
+    tableDescriptor.setColumnFamily(familyDescriptor);
+
+    try (Admin admin = TEST_UTIL.getAdmin()) {
+      admin.createTable(tableDescriptor);
+    }
+
+    // Will succeed
+    try (Table ht = TEST_UTIL.getConnection().getTable(tableName)) {
+      ht.put(new Put(ROW).addColumn(FAMILY, QUALIFIER,  new byte[HRegion.DEFAULT_MAX_CELL_SIZE -
+        1024]));
+      ht.append(new Append(ROW).addColumn(FAMILY, QUALIFIER, new byte[1024 + 1]));
+    }
+  }
+
+  @Test
   public void testDeleteSpecifiedVersionOfSpecifiedColumn() throws Exception {
     try (Admin admin = TEST_UTIL.getAdmin()) {
       final TableName tableName = TableName.valueOf(name.getMethodName());


[hbase] 01/02: HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)

Posted by zg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit fff59f4064714ea2081f79eaa2abe8cf4d544353
Author: niuyulin <ny...@163.com>
AuthorDate: Sun May 24 20:55:48 2020 -0500

    HBASE-24387 TableSnapshotInputFormatImpl support row limit on each InputSplit (#1731)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../mapreduce/TableSnapshotInputFormatImpl.java    | 23 +++++++---
 .../mapreduce/TestTableSnapshotInputFormat.java    | 51 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
index 73b5d05..dabd60c 100644
--- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -102,6 +102,12 @@ public class TableSnapshotInputFormatImpl {
   public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT = true;
 
   /**
+   * In some scenario, scan limited rows on each InputSplit for sampling data extraction
+   */
+  public static final String SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT =
+      "hbase.TableSnapshotInputFormat.row.limit.per.inputsplit";
+
+  /**
    * Implementation class for InputSplit logic common between mapred and mapreduce.
    */
   public static class InputSplit implements Writable {
@@ -213,6 +219,8 @@ public class TableSnapshotInputFormatImpl {
     private Result result = null;
     private ImmutableBytesWritable row = null;
     private ClientSideRegionScanner scanner;
+    private int numOfCompleteRows = 0;
+    private int rowLimitPerSplit;
 
     public ClientSideRegionScanner getScanner() {
       return scanner;
@@ -221,6 +229,7 @@ public class TableSnapshotInputFormatImpl {
     public void initialize(InputSplit split, Configuration conf) throws IOException {
       this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
       this.split = split;
+      this.rowLimitPerSplit = conf.getInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 0);
       TableDescriptor htd = split.htd;
       HRegionInfo hri = this.split.getRegionInfo();
       FileSystem fs = FSUtils.getCurrentFileSystem(conf);
@@ -244,6 +253,9 @@ public class TableSnapshotInputFormatImpl {
         return false;
       }
 
+      if (rowLimitPerSplit > 0 && ++this.numOfCompleteRows > rowLimitPerSplit) {
+        return false;
+      }
       if (this.row == null) {
         this.row = new ImmutableBytesWritable();
       }
@@ -296,10 +308,11 @@ public class TableSnapshotInputFormatImpl {
     return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
   }
 
-  public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException{
+  public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException {
     String splitAlgoClassName = conf.get(SPLIT_ALGO);
-    if (splitAlgoClassName == null)
+    if (splitAlgoClassName == null) {
       return null;
+    }
     try {
       return Class.forName(splitAlgoClassName).asSubclass(RegionSplitter.SplitAlgorithm.class)
           .getDeclaredConstructor().newInstance();
@@ -511,9 +524,9 @@ public class TableSnapshotInputFormatImpl {
    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
    * @param conf the job to configure
    * @param snapshotName the name of the snapshot to read from
-   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
-   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
-   * After the job is finished, restoreDir can be deleted.
+   * @param restoreDir a temporary directory to restore the snapshot into. Current user should have
+   *          write permissions to this directory, and this should not be a subdirectory of rootdir.
+   *          After the job is finished, restoreDir can be deleted.
    * @param numSplitsPerRegion how many input splits to generate per one region
    * @param splitAlgo SplitAlgorithm to be used when generating InputSplits
    * @throws IOException if an error occurs
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
index 358af24..46d5f3e 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;
 
 import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT;
 import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY;
+import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -305,6 +306,56 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
   }
 
   @Test
+  public void testScanLimit() throws Exception {
+    setupCluster();
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final String snapshotName = tableName + "Snapshot";
+    Table table = null;
+    try {
+      UTIL.getConfiguration().setInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 10);
+      if (UTIL.getAdmin().tableExists(tableName)) {
+        UTIL.deleteTable(tableName);
+      }
+
+      UTIL.createTable(tableName, FAMILIES, new byte[][] { bbb, yyy });
+
+      Admin admin = UTIL.getAdmin();
+
+      int regionNum = admin.getRegions(tableName).size();
+      // put some stuff in the table
+      table = UTIL.getConnection().getTable(tableName);
+      UTIL.loadTable(table, FAMILIES);
+
+      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+        null, snapshotName, rootDir, fs, true);
+
+      Job job = new Job(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+      Scan scan = new Scan();
+      TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
+        TestTableSnapshotInputFormat.class);
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+        RowCounter.RowCounterMapper.class, NullWritable.class, NullWritable.class, job, true,
+        tmpTableDir);
+      Assert.assertTrue(job.waitForCompletion(true));
+      Assert.assertEquals(10 * regionNum,
+        job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue());
+    } finally {
+      if (table != null) {
+        table.close();
+      }
+      UTIL.getConfiguration().unset(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT);
+      UTIL.getAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  @Test
   public void testNoDuplicateResultsWhenSplitting() throws Exception {
     setupCluster();
     TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");