You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2017/05/25 19:47:48 UTC
hbase git commit: HBASE-16011 TableSnapshotScanner and
TableSnapshotInputFormat can produce duplicate rows if split table.
Repository: hbase
Updated Branches:
refs/heads/master 4ab94744e -> f441ca045
HBASE-16011 TableSnapshotScanner and TableSnapshotInputFormat can produce duplicate rows if split table.
Signed-off-by: tedyu <yu...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/f441ca04
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/f441ca04
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/f441ca04
Branch: refs/heads/master
Commit: f441ca0458b40556a64d564cf2d4aa15a0c0be1e
Parents: 4ab9474
Author: huzheng <op...@gmail.com>
Authored: Wed May 24 20:31:57 2017 +0800
Committer: tedyu <yu...@gmail.com>
Committed: Thu May 25 12:47:43 2017 -0700
----------------------------------------------------------------------
.../hbase/client/TableSnapshotScanner.java | 9 ++-
.../mapreduce/TableSnapshotInputFormatImpl.java | 6 +-
.../hbase/client/TestTableSnapshotScanner.java | 50 ++++++++++++++
.../mapreduce/TestTableSnapshotInputFormat.java | 71 ++++++++++++++++++++
.../hbase/snapshot/SnapshotTestingUtils.java | 13 +++-
5 files changed, 144 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/f441ca04/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
index 42d40eb..b861969 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
@@ -126,9 +126,12 @@ public class TableSnapshotScanner extends AbstractClientScanner {
htd = meta.getTableDescriptor();
regions = new ArrayList<>(restoredRegions.size());
- for (HRegionInfo hri: restoredRegions) {
- if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
- hri.getStartKey(), hri.getEndKey())) {
+ for (HRegionInfo hri : restoredRegions) {
+ if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+ continue;
+ }
+ if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
+ hri.getEndKey())) {
regions.add(hri);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/f441ca04/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
index d87468a..1a268d0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -275,7 +275,11 @@ public class TableSnapshotInputFormatImpl {
List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
for (SnapshotRegionManifest regionManifest : regionManifests) {
- regionInfos.add(HRegionInfo.convert(regionManifest.getRegionInfo()));
+ HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
+ if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+ continue;
+ }
+ regionInfos.add(hri);
}
return regionInfos;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/f441ca04/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
index 85cb0fd..3322e6c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
@@ -111,6 +111,56 @@ public class TestTableSnapshotScanner {
}
@Test
+ public void testNoDuplicateResultsWhenSplitting() throws Exception {
+ setupCluster();
+ TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+ String snapshotName = "testSnapshotBug";
+ try {
+ if (UTIL.getAdmin().tableExists(tableName)) {
+ UTIL.deleteTable(tableName);
+ }
+
+ UTIL.createTable(tableName, FAMILIES);
+ Admin admin = UTIL.getAdmin();
+
+ // put some stuff in the table
+ Table table = UTIL.getConnection().getTable(tableName);
+ UTIL.loadTable(table, FAMILIES);
+
+ // split to 2 regions
+ admin.split(tableName, Bytes.toBytes("eee"));
+ TestTableSnapshotInputFormat.blockUntilSplitFinished(UTIL, tableName, 2);
+
+ Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+ FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+ SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
+ Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
+
+ // load different values
+ byte[] value = Bytes.toBytes("after_snapshot_value");
+ UTIL.loadTable(table, FAMILIES, value);
+
+ // cause flush to create new files in the region
+ admin.flush(tableName);
+ table.close();
+
+ Path restoreDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+ Scan scan = new Scan().withStartRow(bbb).withStopRow(yyy); // limit the scan
+
+ TableSnapshotScanner scanner =
+ new TableSnapshotScanner(UTIL.getConfiguration(), restoreDir, snapshotName, scan);
+
+ verifyScanner(scanner, bbb, yyy);
+ scanner.close();
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
+ @Test
public void testWithSingleRegion() throws Exception {
testScanner(UTIL, "testWithSingleRegion", 1, false);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/f441ca04/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
index 8a7cf46..3b91d3b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.CategoryBasedTimeout;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
@@ -55,6 +56,13 @@ import org.junit.rules.TestRule;
import com.google.common.collect.Lists;
+import java.util.Arrays;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.FSUtils;
+
@Category({VerySlowMapReduceTests.class, LargeTests.class})
public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
@Rule public final TestRule timeout = CategoryBasedTimeout.builder().
@@ -222,6 +230,69 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
}
}
+ public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
+ int expectedRegionSize) throws Exception {
+ for (int i = 0; i < 100; i++) {
+ List<HRegionInfo> hRegionInfoList = util.getAdmin().getTableRegions(tableName);
+ if (hRegionInfoList.size() >= expectedRegionSize) {
+ break;
+ }
+ Thread.sleep(1000);
+ }
+ }
+
+ @Test
+ public void testNoDuplicateResultsWhenSplitting() throws Exception {
+ setupCluster();
+ TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+ String snapshotName = "testSnapshotBug";
+ try {
+ if (UTIL.getAdmin().tableExists(tableName)) {
+ UTIL.deleteTable(tableName);
+ }
+
+ UTIL.createTable(tableName, FAMILIES);
+ Admin admin = UTIL.getAdmin();
+
+ // put some stuff in the table
+ Table table = UTIL.getConnection().getTable(tableName);
+ UTIL.loadTable(table, FAMILIES);
+
+ // split to 2 regions
+ admin.split(tableName, Bytes.toBytes("eee"));
+ blockUntilSplitFinished(UTIL, tableName, 2);
+
+ Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+ FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+ SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+ null, snapshotName, rootDir, fs, true);
+
+ // load different values
+ byte[] value = Bytes.toBytes("after_snapshot_value");
+ UTIL.loadTable(table, FAMILIES, value);
+
+ // cause flush to create new files in the region
+ admin.flush(tableName);
+ table.close();
+
+ Job job = new Job(UTIL.getConfiguration());
+ Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+ // limit the scan
+ Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
+
+ TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+ TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
+ tmpTableDir);
+
+ verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
+ } finally {
+ UTIL.getAdmin().deleteSnapshot(snapshotName);
+ UTIL.deleteTable(tableName);
+ tearDownCluster();
+ }
+ }
+
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
byte[] startRow, byte[] stopRow)
throws IOException, InterruptedException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/f441ca04/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
index 9098277..b8438ca 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
@@ -251,7 +252,17 @@ public final class SnapshotTestingUtils {
if (hasMob) {
assertEquals(regions.size(), regionManifests.size() - 1);
} else {
- assertEquals(regions.size(), regionManifests.size());
+ // if create snapshot when table splitting, parent region will be included to the snapshot
+ // region manifest. we should exclude the parent regions.
+ int regionCountExclusiveSplitParent = 0;
+ for (SnapshotRegionManifest snapshotRegionManifest : regionManifests.values()) {
+ HRegionInfo hri = HRegionInfo.convert(snapshotRegionManifest.getRegionInfo());
+ if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+ continue;
+ }
+ regionCountExclusiveSplitParent++;
+ }
+ assertEquals(regions.size(), regionCountExclusiveSplitParent);
}
// Verify Regions (redundant check, see MasterSnapshotVerifier)