You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2018/08/18 01:16:56 UTC

[1/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Repository: hbase
Updated Branches:
  refs/heads/branch-1 8716ac256 -> 56c59f11e
  refs/heads/branch-1.3 d95e66424 -> 25a629621
  refs/heads/branch-1.4 9f78a1dd6 -> 9573f7629
  refs/heads/branch-2 9da5d3a48 -> 52ba33aa7
  refs/heads/branch-2.0 c4eb99e1f -> c3bd00e2f
  refs/heads/branch-2.1 67ad0e601 -> 798cb1d79
  refs/heads/master f9793fafb -> e8eb36651


HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e8eb3665
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e8eb3665
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e8eb3665

Branch: refs/heads/master
Commit: e8eb3665142fb157ff000b25c687123908e7f63e
Parents: f9793fa
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:01:44 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../client/TestAsyncTableGetMultiThreaded.java  |  28 +++-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/namespace/TestNamespaceAuditor.java   |  20 +++
 .../TestEndToEndSplitTransaction.java           | 136 +++++++++++++++++--
 .../TestSplitTransactionOnCluster.java          |  37 +++--
 6 files changed, 210 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index ca6e591..8bdc070 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1701,7 +1701,17 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<HStoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+      return StoreUtils.hasReferences(reloadedStoreFiles);
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has references, assuming references exists",
+        ioe);
+      return true;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
index 7632716..8a2dfcc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
@@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_META_OPERATION_TIM
 import static org.apache.hadoop.hbase.master.LoadBalancer.TABLES_ON_MASTER;
 import static org.junit.Assert.assertEquals;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -35,18 +34,21 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -124,7 +126,7 @@ public class TestAsyncTableGetMultiThreaded {
   }
 
   @Test
-  public void test() throws IOException, InterruptedException, ExecutionException {
+  public void test() throws Exception {
     int numThreads = 20;
     AtomicBoolean stop = new AtomicBoolean(false);
     ExecutorService executor =
@@ -137,9 +139,31 @@ public class TestAsyncTableGetMultiThreaded {
     Collections.shuffle(Arrays.asList(SPLIT_KEYS), new Random(123));
     Admin admin = TEST_UTIL.getAdmin();
     for (byte[] splitPoint : SPLIT_KEYS) {
+      int oldRegionCount = admin.getRegions(TABLE_NAME).size();
       admin.split(TABLE_NAME, splitPoint);
+      TEST_UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return TEST_UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size() > oldRegionCount;
+        }
+
+        @Override
+        public String explainFailure() throws Exception {
+          return "Split has not finished yet";
+        }
+      });
+
       for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
         region.compact(true);
+
+        //Waiting for compaction to complete and references are cleaned up
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(region.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
       }
       Thread.sleep(5000);
       admin.balance(true);

http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 1937d80..38313c4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -107,6 +107,10 @@ public class TestChangingEncoding {
   public static void setUpBeforeClass() throws Exception {
     // Use a small flush size to create more HFiles.
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+        "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
     TEST_UTIL.startMiniCluster();

http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index 1b4957a..cc6c217 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -48,7 +48,9 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -81,6 +83,7 @@ import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.KeeperException;
 import org.junit.After;
@@ -365,6 +368,23 @@ public class TestNamespaceAuditor {
     HRegion regionToSplit = UTIL.getMiniHBaseCluster().getRegions(tableTwo).stream()
       .filter(r -> r.getRegionInfo().containsRow(splitKey)).findFirst().get();
     regionToSplit.compact(true);
+    // Waiting for compaction to finish
+    UTIL.waitFor(30000, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return (CompactionState.NONE == ADMIN
+            .getCompactionStateForRegion(regionToSplit.getRegionInfo().getRegionName()));
+      }
+    });
+
+    // Cleaning compacted references for split to proceed
+    regionToSplit.getStores().stream().forEach(s -> {
+      try {
+        s.closeAndArchiveCompactedFiles();
+      } catch (IOException e1) {
+        LOG.error("Error whiling cleaning compacted file");
+      }
+    });
     // the above compact may quit immediately if there is a compaction ongoing, so here we need to
     // wait a while to let the ongoing compaction finish.
     UTIL.waitFor(10000, regionToSplit::isSplittable);

http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b0302f6..85e9d30 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -24,10 +24,13 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
@@ -41,6 +44,8 @@ import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.Get;
@@ -49,13 +54,19 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
+import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -65,8 +76,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
-
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
 
@@ -92,6 +101,78 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getAdmin();
+    Map<String, StoreFileReader> scanner = Maps.newHashMap();
+
+    try {
+      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
+          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(fam)).build();
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        regions.stream()
+            .forEach(r -> r.getStores().get(0).getStorefiles().stream()
+                .filter(
+                  s -> s.isReference() && !scanner.containsKey(r.getRegionInfo().getEncodedName()))
+                .forEach(sf -> {
+                  StoreFileReader reader = ((HStoreFile) sf).getReader();
+                  reader.getStoreFileScanner(true, false, false, 0, 0, false);
+                  scanner.put(r.getRegionInfo().getEncodedName(), reader);
+                  LOG.info("Got reference to file = " + sf.getPath() + ",for region = "
+                      + r.getRegionInfo().getEncodedName());
+                }));
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      regions.stream()
+          .filter(region -> scanner.containsKey(region.getRegionInfo().getEncodedName()))
+          .forEach(r -> Assert.assertTrue("Contains an open file reference which can be split",
+            !r.getStores().get(0).canSplit()));
+    } finally {
+      scanner.values().stream().forEach(s -> {
+        try {
+          s.close(true);
+        } catch (IOException ioe) {
+          LOG.error("Failed while closing store file", ioe);
+        }
+      });
+      scanner.clear();
+      if (source != null) {
+        source.close();
+      }
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   /**
    * Tests that the client sees meta table changes as atomic during splits
    */
@@ -151,18 +232,17 @@ public class TestEndToEndSplitTransaction {
     public void run() {
       try {
         Random random = new Random();
-        for (int i= 0; i< 5; i++) {
-          List<RegionInfo> regions =
-              MetaTableAccessor.getTableRegions(connection, tableName, true);
+        for (int i = 0; i < 5; i++) {
+          List<RegionInfo> regions = MetaTableAccessor.getTableRegions(connection, tableName, true);
           if (regions.isEmpty()) {
             continue;
           }
           int regionIndex = random.nextInt(regions.size());
 
-          //pick a random region and split it into two
+          // pick a random region and split it into two
           RegionInfo region = Iterators.get(regions.iterator(), regionIndex);
 
-          //pick the mid split point
+          // pick the mid split point
           int start = 0, end = Integer.MAX_VALUE;
           if (region.getStartKey().length > 0) {
             start = Bytes.toInt(region.getStartKey());
@@ -173,7 +253,7 @@ public class TestEndToEndSplitTransaction {
           int mid = start + ((end - start) / 2);
           byte[] splitPoint = Bytes.toBytes(mid);
 
-          //put some rows to the regions
+          // put some rows to the regions
           addData(start);
           addData(mid);
 
@@ -183,11 +263,11 @@ public class TestEndToEndSplitTransaction {
           log("Initiating region split for:" + region.getRegionNameAsString());
           try {
             admin.splitRegion(region.getRegionName(), splitPoint);
-            //wait until the split is complete
+            // wait until the split is complete
             blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
 
           } catch (NotServingRegionException ex) {
-            //ignore
+            // ignore
           }
         }
       } catch (Throwable ex) {
@@ -226,9 +306,11 @@ public class TestEndToEndSplitTransaction {
     /** verify region boundaries obtained from MetaScanner */
     void verifyRegionsUsingMetaTableAccessor() throws Exception {
       List<RegionInfo> regionList = MetaTableAccessor.getTableRegions(connection, tableName, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
       regionList = MetaTableAccessor.getAllRegions(connection, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
     }
 
     /** verify region boundaries obtained from HTable.getStartEndKeys() */
@@ -343,7 +425,9 @@ public class TestEndToEndSplitTransaction {
     }
   }
 
-  /** Blocks until the region split is complete in hbase:meta and region server opens the daughters */
+  /**
+   * Blocks until the region split is complete in hbase:meta and region server opens the daughters
+   */
   public static void blockUntilRegionSplit(Configuration conf, long timeout,
       final byte[] regionName, boolean waitForDaughters)
       throws IOException, InterruptedException {
@@ -389,10 +473,32 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedFiles(conn, timeout, daughterA);
+        removeCompactedFiles(conn, timeout, daughterB);
       }
     }
   }
 
+  public static void removeCompactedFiles(Connection conn, long timeout, RegionInfo hri)
+      throws IOException, InterruptedException {
+    log("remove compacted files for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    regions.stream().forEach(r -> {
+      try {
+        r.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException ioe) {
+        LOG.error("failed in removing compacted file", ioe);
+      }
+    });
+  }
+
   public static void blockUntilRegionIsInMeta(Connection conn, long timeout, RegionInfo hri)
       throws IOException, InterruptedException {
     log("blocking until region is in META: " + hri.getRegionNameAsString());
@@ -415,7 +521,9 @@ public class TestEndToEndSplitTransaction {
         Table table = conn.getTable(hri.getTable())) {
       byte[] row = hri.getStartKey();
       // Check for null/empty row. If we find one, use a key that is likely to be in first region.
-      if (row == null || row.length <= 0) row = new byte[] { '0' };
+      if (row == null || row.length <= 0) {
+        row = new byte[] { '0' };
+      }
       Get get = new Get(row);
       while (System.currentTimeMillis() - start < timeout) {
         try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e8eb3665/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 95e0112..eb162de 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -31,7 +31,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -50,6 +52,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownRegionException;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Consistency;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -78,6 +81,10 @@ import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -85,7 +92,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.After;
@@ -101,14 +111,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
-
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
-
 /**
  * The below tests are testing split region against a running cluster
  */
@@ -386,11 +388,18 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compactRegion(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }
@@ -533,11 +542,19 @@ public class TestSplitTransactionOnCluster {
       // Call split.
       this.admin.splitRegion(hri.getRegionName());
       List<HRegion> daughters = checkAndGetDaughters(tableName);
+
       // Before cleanup, get a new master.
       HMaster master = abortAndWaitForMaster();
       // Now call compact on the daughters and clean up any references.
-      for (HRegion daughter: daughters) {
+      for (HRegion daughter : daughters) {
         daughter.compact(true);
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
         assertFalse(daughter.hasReferences());
       }
       // BUT calling compact on the daughters is not enough. The CatalogJanitor looks


[7/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Conflicts:
	hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/25a62962
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/25a62962
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/25a62962

Branch: refs/heads/branch-1.3
Commit: 25a62962166ec0d0b42d54bf7857b2dfea76cc7a
Parents: d95e664
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:44:52 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/master/TestAssignmentListener.java    |  21 +++-
 .../TestEndToEndSplitTransaction.java           | 115 ++++++++++++++++++-
 .../hadoop/hbase/regionserver/TestHRegion.java  |   8 ++
 .../TestSplitTransactionOnCluster.java          |  16 ++-
 6 files changed, 165 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 49efd82..76073d0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1596,7 +1596,17 @@ public class HStore implements Store {
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<StoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has referenes, " + "assuming references exists",
+        ioe);
+      return true;
+    }
+    return StoreUtils.hasReferences(reloadedStoreFiles);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 621d3f8..d038c5f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -105,6 +105,10 @@ public class TestChangingEncoding {
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+      "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     conf.setBoolean("hbase.online.schema.update.enable", true);
     TEST_UTIL.startMiniCluster();
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
index f171821..6481756 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
@@ -33,19 +34,21 @@ import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -251,6 +254,20 @@ public class TestAssignmentListener {
       while (mergeable < 2) {
         Thread.sleep(100);
         admin.majorCompact(TABLE_NAME);
+
+        // Waiting for compaction to finish
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin.getCompactionState(TABLE_NAME)
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+
+        // Making sure references are getting cleaned after compaction.
+        // This is taken care by discharger
+        for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
+          region.getStores().get(0).closeAndArchiveCompactedFiles();
+        }
+
         mergeable = 0;
         for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
           for (Region region: regionThread.getRegionServer().getOnlineRegions(TABLE_NAME)) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index 0df3a91..1561f1b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -25,10 +25,12 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@@ -36,9 +38,11 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ScheduledChore;
@@ -59,6 +63,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.RequestConverter;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
@@ -66,14 +71,17 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import com.google.common.collect.Iterators;
+import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.protobuf.ServiceException;
 
@@ -94,6 +102,79 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    Map<String, StoreFile.Reader> scanner = Maps.newHashMap();
+
+    try {
+      HTableDescriptor htd = new HTableDescriptor(tableName);
+      htd.addFamily(new HColumnDescriptor(fam));
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        // Trying to get reference of the file before region split completes
+        // this would try to get handle on storefile or it can not be cleaned
+        for (HRegion region : regions) {
+          for (Store store : region.getStores()) {
+            for (StoreFile file : store.getStorefiles()) {
+              StoreFile.Reader reader = file.getReader();
+              reader.getStoreFileScanner(false, false, false, 0);
+              scanner.put(region.getRegionInfo().getEncodedName(), reader);
+              LOG.info("Got reference to file = " + file.getPath() + ",for region = "
+                  + region.getRegionInfo().getEncodedName());
+            }
+          }
+        }
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      for (HRegion region : regions) {
+        for (Store store : region.getStores()) {
+          Assert.assertTrue("Contains an open file reference which can be split",
+            !scanner.containsKey(region.getRegionInfo().getEncodedName()) || !store.canSplit());
+        }
+      }
+    } finally {
+      for (StoreFile.Reader s : scanner.values()) {
+        try {
+          s.close(true);
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+      scanner.clear();
+      if (source != null) source.close();
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   @Test
   public void testMasterOpsWhileSplitting() throws Exception {
     TableName tableName = TableName.valueOf("TestSplit");
@@ -130,10 +211,10 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         server.postOpenDeployTasks(regions.getSecond());
       } else {
-      server.reportRegionStateTransition(
-        RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
-        region.getRegionInfo(), regions.getFirst().getRegionInfo(),
-        regions.getSecond().getRegionInfo());
+        server.reportRegionStateTransition(
+          RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
+          region.getRegionInfo(), regions.getFirst().getRegionInfo(),
+          regions.getSecond().getRegionInfo());
       }
 
       // first daughter second
@@ -157,8 +238,8 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         // 4. phase III
         ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
-          .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
-            regions.getSecond(), split.std, region);
+            .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
+              regions.getSecond(), split.std, region);
       }
 
       assertTrue(test(conn, tableName, firstRow, server));
@@ -552,6 +633,28 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedfiles(conn, timeout, daughterA);
+        removeCompactedfiles(conn, timeout, daughterB);
+      }
+    }
+  }
+
+  public static void removeCompactedfiles(Connection conn, long timeout, HRegionInfo hri)
+      throws IOException, InterruptedException {
+    log("removeCompactedfiles for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    for (HRegion region : regions) {
+      try {
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException e) {
+        e.printStackTrace();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 11fc26e..296ccf4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -2609,6 +2609,10 @@ public class TestHRegion {
       LOG.info("" + HBaseTestCase.addContent(region, fam3));
       region.flush(true);
       region.compactStores();
+      region.waitForFlushesAndCompactions();
+      for(Store s:region.getStores()) {
+        s.closeAndArchiveCompactedFiles();
+      }
       byte[] splitRow = region.checkSplit();
       assertNotNull(splitRow);
       LOG.info("SplitRow: " + Bytes.toString(splitRow));
@@ -2618,6 +2622,10 @@ public class TestHRegion {
         for (int i = 0; i < subregions.length; i++) {
           HRegion.openHRegion(subregions[i], null);
           subregions[i].compactStores();
+          subregions[i].waitForFlushesAndCompactions();
+          for(Store s:subregions[i].getStores()) {
+            s.closeAndArchiveCompactedFiles();
+          }
         }
         Path oldRegionPath = region.getRegionFileSystem().getRegionDir();
         Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir();

http://git-wip-us.apache.org/repos/asf/hbase/blob/25a62962/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index b222639..4d33988 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -26,12 +26,13 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import java.io.IOException;
 import java.io.InterruptedIOException;
+import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -87,6 +88,7 @@ import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -96,6 +98,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -608,11 +611,20 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compact(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (r.getRegionInfo().equals(daughter)) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
+
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }


[4/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/c3bd00e2
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/c3bd00e2
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/c3bd00e2

Branch: refs/heads/branch-2.0
Commit: c3bd00e2fb943fe699cfcdfb79706f452f1f00a7
Parents: c4eb99e
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:02:31 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../client/TestAsyncTableGetMultiThreaded.java  |  28 +++-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/namespace/TestNamespaceAuditor.java   |  20 +++
 .../TestEndToEndSplitTransaction.java           | 136 +++++++++++++++++--
 .../TestSplitTransactionOnCluster.java          |  37 +++--
 6 files changed, 210 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 035496f..3943de1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1632,7 +1632,17 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<HStoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+      return StoreUtils.hasReferences(reloadedStoreFiles);
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has references, assuming references exists",
+        ioe);
+      return true;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
index 7632716..8a2dfcc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
@@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_META_OPERATION_TIM
 import static org.apache.hadoop.hbase.master.LoadBalancer.TABLES_ON_MASTER;
 import static org.junit.Assert.assertEquals;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -35,18 +34,21 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -124,7 +126,7 @@ public class TestAsyncTableGetMultiThreaded {
   }
 
   @Test
-  public void test() throws IOException, InterruptedException, ExecutionException {
+  public void test() throws Exception {
     int numThreads = 20;
     AtomicBoolean stop = new AtomicBoolean(false);
     ExecutorService executor =
@@ -137,9 +139,31 @@ public class TestAsyncTableGetMultiThreaded {
     Collections.shuffle(Arrays.asList(SPLIT_KEYS), new Random(123));
     Admin admin = TEST_UTIL.getAdmin();
     for (byte[] splitPoint : SPLIT_KEYS) {
+      int oldRegionCount = admin.getRegions(TABLE_NAME).size();
       admin.split(TABLE_NAME, splitPoint);
+      TEST_UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return TEST_UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size() > oldRegionCount;
+        }
+
+        @Override
+        public String explainFailure() throws Exception {
+          return "Split has not finished yet";
+        }
+      });
+
       for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
         region.compact(true);
+
+        //Waiting for compaction to complete and references are cleaned up
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(region.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
       }
       Thread.sleep(5000);
       admin.balance(true);

http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 1937d80..38313c4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -107,6 +107,10 @@ public class TestChangingEncoding {
   public static void setUpBeforeClass() throws Exception {
     // Use a small flush size to create more HFiles.
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+        "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
     TEST_UTIL.startMiniCluster();

http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index 1b4957a..cc6c217 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -48,7 +48,9 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -81,6 +83,7 @@ import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.KeeperException;
 import org.junit.After;
@@ -365,6 +368,23 @@ public class TestNamespaceAuditor {
     HRegion regionToSplit = UTIL.getMiniHBaseCluster().getRegions(tableTwo).stream()
       .filter(r -> r.getRegionInfo().containsRow(splitKey)).findFirst().get();
     regionToSplit.compact(true);
+    // Waiting for compaction to finish
+    UTIL.waitFor(30000, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return (CompactionState.NONE == ADMIN
+            .getCompactionStateForRegion(regionToSplit.getRegionInfo().getRegionName()));
+      }
+    });
+
+    // Cleaning compacted references for split to proceed
+    regionToSplit.getStores().stream().forEach(s -> {
+      try {
+        s.closeAndArchiveCompactedFiles();
+      } catch (IOException e1) {
+        LOG.error("Error whiling cleaning compacted file");
+      }
+    });
     // the above compact may quit immediately if there is a compaction ongoing, so here we need to
     // wait a while to let the ongoing compaction finish.
     UTIL.waitFor(10000, regionToSplit::isSplittable);

http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b0302f6..85e9d30 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -24,10 +24,13 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
@@ -41,6 +44,8 @@ import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.Get;
@@ -49,13 +54,19 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
+import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -65,8 +76,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
-
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
 
@@ -92,6 +101,78 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getAdmin();
+    Map<String, StoreFileReader> scanner = Maps.newHashMap();
+
+    try {
+      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
+          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(fam)).build();
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        regions.stream()
+            .forEach(r -> r.getStores().get(0).getStorefiles().stream()
+                .filter(
+                  s -> s.isReference() && !scanner.containsKey(r.getRegionInfo().getEncodedName()))
+                .forEach(sf -> {
+                  StoreFileReader reader = ((HStoreFile) sf).getReader();
+                  reader.getStoreFileScanner(true, false, false, 0, 0, false);
+                  scanner.put(r.getRegionInfo().getEncodedName(), reader);
+                  LOG.info("Got reference to file = " + sf.getPath() + ",for region = "
+                      + r.getRegionInfo().getEncodedName());
+                }));
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      regions.stream()
+          .filter(region -> scanner.containsKey(region.getRegionInfo().getEncodedName()))
+          .forEach(r -> Assert.assertTrue("Contains an open file reference which can be split",
+            !r.getStores().get(0).canSplit()));
+    } finally {
+      scanner.values().stream().forEach(s -> {
+        try {
+          s.close(true);
+        } catch (IOException ioe) {
+          LOG.error("Failed while closing store file", ioe);
+        }
+      });
+      scanner.clear();
+      if (source != null) {
+        source.close();
+      }
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   /**
    * Tests that the client sees meta table changes as atomic during splits
    */
@@ -151,18 +232,17 @@ public class TestEndToEndSplitTransaction {
     public void run() {
       try {
         Random random = new Random();
-        for (int i= 0; i< 5; i++) {
-          List<RegionInfo> regions =
-              MetaTableAccessor.getTableRegions(connection, tableName, true);
+        for (int i = 0; i < 5; i++) {
+          List<RegionInfo> regions = MetaTableAccessor.getTableRegions(connection, tableName, true);
           if (regions.isEmpty()) {
             continue;
           }
           int regionIndex = random.nextInt(regions.size());
 
-          //pick a random region and split it into two
+          // pick a random region and split it into two
           RegionInfo region = Iterators.get(regions.iterator(), regionIndex);
 
-          //pick the mid split point
+          // pick the mid split point
           int start = 0, end = Integer.MAX_VALUE;
           if (region.getStartKey().length > 0) {
             start = Bytes.toInt(region.getStartKey());
@@ -173,7 +253,7 @@ public class TestEndToEndSplitTransaction {
           int mid = start + ((end - start) / 2);
           byte[] splitPoint = Bytes.toBytes(mid);
 
-          //put some rows to the regions
+          // put some rows to the regions
           addData(start);
           addData(mid);
 
@@ -183,11 +263,11 @@ public class TestEndToEndSplitTransaction {
           log("Initiating region split for:" + region.getRegionNameAsString());
           try {
             admin.splitRegion(region.getRegionName(), splitPoint);
-            //wait until the split is complete
+            // wait until the split is complete
             blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
 
           } catch (NotServingRegionException ex) {
-            //ignore
+            // ignore
           }
         }
       } catch (Throwable ex) {
@@ -226,9 +306,11 @@ public class TestEndToEndSplitTransaction {
     /** verify region boundaries obtained from MetaScanner */
     void verifyRegionsUsingMetaTableAccessor() throws Exception {
       List<RegionInfo> regionList = MetaTableAccessor.getTableRegions(connection, tableName, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
       regionList = MetaTableAccessor.getAllRegions(connection, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
     }
 
     /** verify region boundaries obtained from HTable.getStartEndKeys() */
@@ -343,7 +425,9 @@ public class TestEndToEndSplitTransaction {
     }
   }
 
-  /** Blocks until the region split is complete in hbase:meta and region server opens the daughters */
+  /**
+   * Blocks until the region split is complete in hbase:meta and region server opens the daughters
+   */
   public static void blockUntilRegionSplit(Configuration conf, long timeout,
       final byte[] regionName, boolean waitForDaughters)
       throws IOException, InterruptedException {
@@ -389,10 +473,32 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedFiles(conn, timeout, daughterA);
+        removeCompactedFiles(conn, timeout, daughterB);
       }
     }
   }
 
+  public static void removeCompactedFiles(Connection conn, long timeout, RegionInfo hri)
+      throws IOException, InterruptedException {
+    log("remove compacted files for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    regions.stream().forEach(r -> {
+      try {
+        r.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException ioe) {
+        LOG.error("failed in removing compacted file", ioe);
+      }
+    });
+  }
+
   public static void blockUntilRegionIsInMeta(Connection conn, long timeout, RegionInfo hri)
       throws IOException, InterruptedException {
     log("blocking until region is in META: " + hri.getRegionNameAsString());
@@ -415,7 +521,9 @@ public class TestEndToEndSplitTransaction {
         Table table = conn.getTable(hri.getTable())) {
       byte[] row = hri.getStartKey();
       // Check for null/empty row. If we find one, use a key that is likely to be in first region.
-      if (row == null || row.length <= 0) row = new byte[] { '0' };
+      if (row == null || row.length <= 0) {
+        row = new byte[] { '0' };
+      }
       Get get = new Get(row);
       while (System.currentTimeMillis() - start < timeout) {
         try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/c3bd00e2/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 95e0112..eb162de 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -31,7 +31,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -50,6 +52,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownRegionException;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Consistency;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -78,6 +81,10 @@ import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -85,7 +92,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.After;
@@ -101,14 +111,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
-
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
-
 /**
  * The below tests are testing split region against a running cluster
  */
@@ -386,11 +388,18 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compactRegion(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }
@@ -533,11 +542,19 @@ public class TestSplitTransactionOnCluster {
       // Call split.
       this.admin.splitRegion(hri.getRegionName());
       List<HRegion> daughters = checkAndGetDaughters(tableName);
+
       // Before cleanup, get a new master.
       HMaster master = abortAndWaitForMaster();
       // Now call compact on the daughters and clean up any references.
-      for (HRegion daughter: daughters) {
+      for (HRegion daughter : daughters) {
         daughter.compact(true);
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
         assertFalse(daughter.hasReferences());
       }
       // BUT calling compact on the daughters is not enough. The CatalogJanitor looks


[5/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/56c59f11
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/56c59f11
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/56c59f11

Branch: refs/heads/branch-1
Commit: 56c59f11e38e24e232cb2f1c126c250edd700366
Parents: 8716ac2
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:30:40 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/master/TestAssignmentListener.java    |  21 +++-
 .../TestEndToEndSplitTransaction.java           | 123 +++++++++++++++++--
 .../hadoop/hbase/regionserver/TestHRegion.java  |   8 ++
 .../TestSplitTransactionOnCluster.java          |  15 ++-
 6 files changed, 168 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index a017820..6ce41e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1636,7 +1636,17 @@ public class HStore implements Store {
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<StoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has referenes, " + "assuming references exists",
+        ioe);
+      return true;
+    }
+    return StoreUtils.hasReferences(reloadedStoreFiles);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 621d3f8..d038c5f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -105,6 +105,10 @@ public class TestChangingEncoding {
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+      "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     conf.setBoolean("hbase.online.schema.update.enable", true);
     TEST_UTIL.startMiniCluster();
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
index bd4251b..12d04ff 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
@@ -33,19 +34,21 @@ import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -256,6 +259,20 @@ public class TestAssignmentListener {
       while (mergeable < 2) {
         Thread.sleep(100);
         admin.majorCompact(TABLE_NAME);
+
+        // Waiting for compaction to finish
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin.getCompactionState(TABLE_NAME)
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+
+        // Making sure references are getting cleaned after compaction.
+        // This is taken care by discharger
+        for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
+          region.getStores().get(0).closeAndArchiveCompactedFiles();
+        }
+
         mergeable = 0;
         for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
           for (Region region: regionThread.getRegionServer().getOnlineRegions(TABLE_NAME)) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b133224..86662a4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -22,17 +22,15 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Sets;
-import com.google.protobuf.ServiceException;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@@ -40,9 +38,11 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ScheduledChore;
@@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
 import org.apache.hadoop.hbase.ipc.HBaseRpcControllerImpl;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.RequestConverter;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
@@ -70,13 +71,20 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.google.protobuf.ServiceException;
+
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
   private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class);
@@ -94,6 +102,79 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    Map<String, StoreFile.Reader> scanner = Maps.newHashMap();
+
+    try {
+      HTableDescriptor htd = new HTableDescriptor(tableName);
+      htd.addFamily(new HColumnDescriptor(fam));
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        // Trying to get reference of the file before region split completes
+        // this would try to get handle on storefile or it can not be cleaned
+        for (HRegion region : regions) {
+          for (Store store : region.getStores()) {
+            for (StoreFile file : store.getStorefiles()) {
+              StoreFile.Reader reader = file.getReader();
+              reader.getStoreFileScanner(false, false, false, 0, 0, false);
+              scanner.put(region.getRegionInfo().getEncodedName(), reader);
+              LOG.info("Got reference to file = " + file.getPath() + ",for region = "
+                  + region.getRegionInfo().getEncodedName());
+            }
+          }
+        }
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      for (HRegion region : regions) {
+        for (Store store : region.getStores()) {
+          Assert.assertTrue("Contains an open file reference which can be split",
+            !scanner.containsKey(region.getRegionInfo().getEncodedName()) || !store.canSplit());
+        }
+      }
+    } finally {
+      for (StoreFile.Reader s : scanner.values()) {
+        try {
+          s.close(true);
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+      scanner.clear();
+      if (source != null) source.close();
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   @Test
   public void testMasterOpsWhileSplitting() throws Exception {
     TableName tableName = TableName.valueOf("TestSplit");
@@ -130,10 +211,10 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         server.postOpenDeployTasks(regions.getSecond());
       } else {
-      server.reportRegionStateTransition(
-        RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
-        region.getRegionInfo(), regions.getFirst().getRegionInfo(),
-        regions.getSecond().getRegionInfo());
+        server.reportRegionStateTransition(
+          RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
+          region.getRegionInfo(), regions.getFirst().getRegionInfo(),
+          regions.getSecond().getRegionInfo());
       }
 
       // first daughter second
@@ -157,8 +238,8 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         // 4. phase III
         ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
-          .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
-            regions.getSecond(), split.std, region);
+            .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
+              regions.getSecond(), split.std, region);
       }
 
       assertTrue(test(conn, tableName, firstRow, server));
@@ -552,6 +633,28 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedfiles(conn, timeout, daughterA);
+        removeCompactedfiles(conn, timeout, daughterB);
+      }
+    }
+  }
+
+  public static void removeCompactedfiles(Connection conn, long timeout, HRegionInfo hri)
+      throws IOException, InterruptedException {
+    log("removeCompactedfiles for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    for (HRegion region : regions) {
+      try {
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException e) {
+        e.printStackTrace();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index f7d57b5..a10f48d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -2686,6 +2686,10 @@ public class TestHRegion {
       LOG.info("" + HBaseTestCase.addContent(region, fam3));
       region.flush(true);
       region.compactStores();
+      region.waitForFlushesAndCompactions();
+      for(Store s:region.getStores()) {
+        s.closeAndArchiveCompactedFiles();
+      }
       byte[] splitRow = region.checkSplit();
       assertNotNull(splitRow);
       LOG.info("SplitRow: " + Bytes.toString(splitRow));
@@ -2695,6 +2699,10 @@ public class TestHRegion {
         for (int i = 0; i < subregions.length; i++) {
           HRegion.openHRegion(subregions[i], null);
           subregions[i].compactStores();
+          subregions[i].waitForFlushesAndCompactions();
+          for(Store s:subregions[i].getStores()) {
+            s.closeAndArchiveCompactedFiles();
+          }
         }
         Path oldRegionPath = region.getRegionFileSystem().getRegionDir();
         Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir();

http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 4c11c61..32700af 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -27,12 +27,12 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
-import java.io.InterruptedIOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -97,6 +98,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -605,11 +607,20 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compact(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (r.getRegionInfo().equals(daughter)) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
+
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }


[6/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9573f762
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9573f762
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9573f762

Branch: refs/heads/branch-1.4
Commit: 9573f7629bd35d799d8491e21ab6399b46d3f3a4
Parents: 9f78a1d
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:30:47 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/master/TestAssignmentListener.java    |  21 +++-
 .../TestEndToEndSplitTransaction.java           | 123 +++++++++++++++++--
 .../hadoop/hbase/regionserver/TestHRegion.java  |   8 ++
 .../TestSplitTransactionOnCluster.java          |  15 ++-
 6 files changed, 168 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 9859798..e37baf8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1625,7 +1625,17 @@ public class HStore implements Store {
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<StoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has referenes, " + "assuming references exists",
+        ioe);
+      return true;
+    }
+    return StoreUtils.hasReferences(reloadedStoreFiles);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 621d3f8..d038c5f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -105,6 +105,10 @@ public class TestChangingEncoding {
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+      "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     conf.setBoolean("hbase.online.schema.update.enable", true);
     TEST_UTIL.startMiniCluster();
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
index bd4251b..12d04ff 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
@@ -33,19 +34,21 @@ import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -256,6 +259,20 @@ public class TestAssignmentListener {
       while (mergeable < 2) {
         Thread.sleep(100);
         admin.majorCompact(TABLE_NAME);
+
+        // Waiting for compaction to finish
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin.getCompactionState(TABLE_NAME)
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+
+        // Making sure references are getting cleaned after compaction.
+        // This is taken care by discharger
+        for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
+          region.getStores().get(0).closeAndArchiveCompactedFiles();
+        }
+
         mergeable = 0;
         for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
           for (Region region: regionThread.getRegionServer().getOnlineRegions(TABLE_NAME)) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b133224..86662a4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -22,17 +22,15 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Sets;
-import com.google.protobuf.ServiceException;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@@ -40,9 +38,11 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.ScheduledChore;
@@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
 import org.apache.hadoop.hbase.ipc.HBaseRpcControllerImpl;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.RequestConverter;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
@@ -70,13 +71,20 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.google.protobuf.ServiceException;
+
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
   private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class);
@@ -94,6 +102,79 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    Map<String, StoreFile.Reader> scanner = Maps.newHashMap();
+
+    try {
+      HTableDescriptor htd = new HTableDescriptor(tableName);
+      htd.addFamily(new HColumnDescriptor(fam));
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        // Trying to get reference of the file before region split completes
+        // this would try to get handle on storefile or it can not be cleaned
+        for (HRegion region : regions) {
+          for (Store store : region.getStores()) {
+            for (StoreFile file : store.getStorefiles()) {
+              StoreFile.Reader reader = file.getReader();
+              reader.getStoreFileScanner(false, false, false, 0, 0, false);
+              scanner.put(region.getRegionInfo().getEncodedName(), reader);
+              LOG.info("Got reference to file = " + file.getPath() + ",for region = "
+                  + region.getRegionInfo().getEncodedName());
+            }
+          }
+        }
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      for (HRegion region : regions) {
+        for (Store store : region.getStores()) {
+          Assert.assertTrue("Contains an open file reference which can be split",
+            !scanner.containsKey(region.getRegionInfo().getEncodedName()) || !store.canSplit());
+        }
+      }
+    } finally {
+      for (StoreFile.Reader s : scanner.values()) {
+        try {
+          s.close(true);
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+      scanner.clear();
+      if (source != null) source.close();
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   @Test
   public void testMasterOpsWhileSplitting() throws Exception {
     TableName tableName = TableName.valueOf("TestSplit");
@@ -130,10 +211,10 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         server.postOpenDeployTasks(regions.getSecond());
       } else {
-      server.reportRegionStateTransition(
-        RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
-        region.getRegionInfo(), regions.getFirst().getRegionInfo(),
-        regions.getSecond().getRegionInfo());
+        server.reportRegionStateTransition(
+          RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
+          region.getRegionInfo(), regions.getFirst().getRegionInfo(),
+          regions.getSecond().getRegionInfo());
       }
 
       // first daughter second
@@ -157,8 +238,8 @@ public class TestEndToEndSplitTransaction {
       if (split.useZKForAssignment) {
         // 4. phase III
         ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
-          .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
-            regions.getSecond(), split.std, region);
+            .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
+              regions.getSecond(), split.std, region);
       }
 
       assertTrue(test(conn, tableName, firstRow, server));
@@ -552,6 +633,28 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedfiles(conn, timeout, daughterA);
+        removeCompactedfiles(conn, timeout, daughterB);
+      }
+    }
+  }
+
+  public static void removeCompactedfiles(Connection conn, long timeout, HRegionInfo hri)
+      throws IOException, InterruptedException {
+    log("removeCompactedfiles for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    for (HRegion region : regions) {
+      try {
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException e) {
+        e.printStackTrace();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index f2d9e80..1351a5e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -2686,6 +2686,10 @@ public class TestHRegion {
       LOG.info("" + HBaseTestCase.addContent(region, fam3));
       region.flush(true);
       region.compactStores();
+      region.waitForFlushesAndCompactions();
+      for(Store s:region.getStores()) {
+        s.closeAndArchiveCompactedFiles();
+      }
       byte[] splitRow = region.checkSplit();
       assertNotNull(splitRow);
       LOG.info("SplitRow: " + Bytes.toString(splitRow));
@@ -2695,6 +2699,10 @@ public class TestHRegion {
         for (int i = 0; i < subregions.length; i++) {
           HRegion.openHRegion(subregions[i], null);
           subregions[i].compactStores();
+          subregions[i].waitForFlushesAndCompactions();
+          for(Store s:subregions[i].getStores()) {
+            s.closeAndArchiveCompactedFiles();
+          }
         }
         Path oldRegionPath = region.getRegionFileSystem().getRegionDir();
         Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir();

http://git-wip-us.apache.org/repos/asf/hbase/blob/9573f762/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 4c11c61..32700af 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -27,12 +27,12 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
-import java.io.InterruptedIOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -97,6 +98,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -605,11 +607,20 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compact(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (r.getRegionInfo().equals(daughter)) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
+
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }


[2/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/52ba33aa
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/52ba33aa
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/52ba33aa

Branch: refs/heads/branch-2
Commit: 52ba33aa735c37dccff57e722c5096f1c58ffa2d
Parents: 9da5d3a
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:02:22 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../client/TestAsyncTableGetMultiThreaded.java  |  28 +++-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/namespace/TestNamespaceAuditor.java   |  20 +++
 .../TestEndToEndSplitTransaction.java           | 136 +++++++++++++++++--
 .../TestSplitTransactionOnCluster.java          |  37 +++--
 6 files changed, 210 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 424a357..ab597f8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1659,7 +1659,17 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<HStoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+      return StoreUtils.hasReferences(reloadedStoreFiles);
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has references, assuming references exists",
+        ioe);
+      return true;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
index 7632716..8a2dfcc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
@@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_META_OPERATION_TIM
 import static org.apache.hadoop.hbase.master.LoadBalancer.TABLES_ON_MASTER;
 import static org.junit.Assert.assertEquals;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -35,18 +34,21 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -124,7 +126,7 @@ public class TestAsyncTableGetMultiThreaded {
   }
 
   @Test
-  public void test() throws IOException, InterruptedException, ExecutionException {
+  public void test() throws Exception {
     int numThreads = 20;
     AtomicBoolean stop = new AtomicBoolean(false);
     ExecutorService executor =
@@ -137,9 +139,31 @@ public class TestAsyncTableGetMultiThreaded {
     Collections.shuffle(Arrays.asList(SPLIT_KEYS), new Random(123));
     Admin admin = TEST_UTIL.getAdmin();
     for (byte[] splitPoint : SPLIT_KEYS) {
+      int oldRegionCount = admin.getRegions(TABLE_NAME).size();
       admin.split(TABLE_NAME, splitPoint);
+      TEST_UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return TEST_UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size() > oldRegionCount;
+        }
+
+        @Override
+        public String explainFailure() throws Exception {
+          return "Split has not finished yet";
+        }
+      });
+
       for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
         region.compact(true);
+
+        //Waiting for compaction to complete and references are cleaned up
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(region.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
       }
       Thread.sleep(5000);
       admin.balance(true);

http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 1937d80..38313c4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -107,6 +107,10 @@ public class TestChangingEncoding {
   public static void setUpBeforeClass() throws Exception {
     // Use a small flush size to create more HFiles.
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+        "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
     TEST_UTIL.startMiniCluster();

http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index 1b4957a..cc6c217 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -48,7 +48,9 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -81,6 +83,7 @@ import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.KeeperException;
 import org.junit.After;
@@ -365,6 +368,23 @@ public class TestNamespaceAuditor {
     HRegion regionToSplit = UTIL.getMiniHBaseCluster().getRegions(tableTwo).stream()
       .filter(r -> r.getRegionInfo().containsRow(splitKey)).findFirst().get();
     regionToSplit.compact(true);
+    // Waiting for compaction to finish
+    UTIL.waitFor(30000, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return (CompactionState.NONE == ADMIN
+            .getCompactionStateForRegion(regionToSplit.getRegionInfo().getRegionName()));
+      }
+    });
+
+    // Cleaning compacted references for split to proceed
+    regionToSplit.getStores().stream().forEach(s -> {
+      try {
+        s.closeAndArchiveCompactedFiles();
+      } catch (IOException e1) {
+        LOG.error("Error whiling cleaning compacted file");
+      }
+    });
     // the above compact may quit immediately if there is a compaction ongoing, so here we need to
     // wait a while to let the ongoing compaction finish.
     UTIL.waitFor(10000, regionToSplit::isSplittable);

http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b0302f6..85e9d30 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -24,10 +24,13 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
@@ -41,6 +44,8 @@ import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.Get;
@@ -49,13 +54,19 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
+import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -65,8 +76,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
-
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
 
@@ -92,6 +101,78 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getAdmin();
+    Map<String, StoreFileReader> scanner = Maps.newHashMap();
+
+    try {
+      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
+          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(fam)).build();
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        regions.stream()
+            .forEach(r -> r.getStores().get(0).getStorefiles().stream()
+                .filter(
+                  s -> s.isReference() && !scanner.containsKey(r.getRegionInfo().getEncodedName()))
+                .forEach(sf -> {
+                  StoreFileReader reader = ((HStoreFile) sf).getReader();
+                  reader.getStoreFileScanner(true, false, false, 0, 0, false);
+                  scanner.put(r.getRegionInfo().getEncodedName(), reader);
+                  LOG.info("Got reference to file = " + sf.getPath() + ",for region = "
+                      + r.getRegionInfo().getEncodedName());
+                }));
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      regions.stream()
+          .filter(region -> scanner.containsKey(region.getRegionInfo().getEncodedName()))
+          .forEach(r -> Assert.assertTrue("Contains an open file reference which can be split",
+            !r.getStores().get(0).canSplit()));
+    } finally {
+      scanner.values().stream().forEach(s -> {
+        try {
+          s.close(true);
+        } catch (IOException ioe) {
+          LOG.error("Failed while closing store file", ioe);
+        }
+      });
+      scanner.clear();
+      if (source != null) {
+        source.close();
+      }
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   /**
    * Tests that the client sees meta table changes as atomic during splits
    */
@@ -151,18 +232,17 @@ public class TestEndToEndSplitTransaction {
     public void run() {
       try {
         Random random = new Random();
-        for (int i= 0; i< 5; i++) {
-          List<RegionInfo> regions =
-              MetaTableAccessor.getTableRegions(connection, tableName, true);
+        for (int i = 0; i < 5; i++) {
+          List<RegionInfo> regions = MetaTableAccessor.getTableRegions(connection, tableName, true);
           if (regions.isEmpty()) {
             continue;
           }
           int regionIndex = random.nextInt(regions.size());
 
-          //pick a random region and split it into two
+          // pick a random region and split it into two
           RegionInfo region = Iterators.get(regions.iterator(), regionIndex);
 
-          //pick the mid split point
+          // pick the mid split point
           int start = 0, end = Integer.MAX_VALUE;
           if (region.getStartKey().length > 0) {
             start = Bytes.toInt(region.getStartKey());
@@ -173,7 +253,7 @@ public class TestEndToEndSplitTransaction {
           int mid = start + ((end - start) / 2);
           byte[] splitPoint = Bytes.toBytes(mid);
 
-          //put some rows to the regions
+          // put some rows to the regions
           addData(start);
           addData(mid);
 
@@ -183,11 +263,11 @@ public class TestEndToEndSplitTransaction {
           log("Initiating region split for:" + region.getRegionNameAsString());
           try {
             admin.splitRegion(region.getRegionName(), splitPoint);
-            //wait until the split is complete
+            // wait until the split is complete
             blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
 
           } catch (NotServingRegionException ex) {
-            //ignore
+            // ignore
           }
         }
       } catch (Throwable ex) {
@@ -226,9 +306,11 @@ public class TestEndToEndSplitTransaction {
     /** verify region boundaries obtained from MetaScanner */
     void verifyRegionsUsingMetaTableAccessor() throws Exception {
       List<RegionInfo> regionList = MetaTableAccessor.getTableRegions(connection, tableName, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
       regionList = MetaTableAccessor.getAllRegions(connection, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
     }
 
     /** verify region boundaries obtained from HTable.getStartEndKeys() */
@@ -343,7 +425,9 @@ public class TestEndToEndSplitTransaction {
     }
   }
 
-  /** Blocks until the region split is complete in hbase:meta and region server opens the daughters */
+  /**
+   * Blocks until the region split is complete in hbase:meta and region server opens the daughters
+   */
   public static void blockUntilRegionSplit(Configuration conf, long timeout,
       final byte[] regionName, boolean waitForDaughters)
       throws IOException, InterruptedException {
@@ -389,10 +473,32 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedFiles(conn, timeout, daughterA);
+        removeCompactedFiles(conn, timeout, daughterB);
       }
     }
   }
 
+  public static void removeCompactedFiles(Connection conn, long timeout, RegionInfo hri)
+      throws IOException, InterruptedException {
+    log("remove compacted files for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    regions.stream().forEach(r -> {
+      try {
+        r.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException ioe) {
+        LOG.error("failed in removing compacted file", ioe);
+      }
+    });
+  }
+
   public static void blockUntilRegionIsInMeta(Connection conn, long timeout, RegionInfo hri)
       throws IOException, InterruptedException {
     log("blocking until region is in META: " + hri.getRegionNameAsString());
@@ -415,7 +521,9 @@ public class TestEndToEndSplitTransaction {
         Table table = conn.getTable(hri.getTable())) {
       byte[] row = hri.getStartKey();
       // Check for null/empty row. If we find one, use a key that is likely to be in first region.
-      if (row == null || row.length <= 0) row = new byte[] { '0' };
+      if (row == null || row.length <= 0) {
+        row = new byte[] { '0' };
+      }
       Get get = new Get(row);
       while (System.currentTimeMillis() - start < timeout) {
         try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/52ba33aa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 95e0112..eb162de 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -31,7 +31,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -50,6 +52,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownRegionException;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Consistency;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -78,6 +81,10 @@ import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -85,7 +92,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.After;
@@ -101,14 +111,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
-
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
-
 /**
  * The below tests are testing split region against a running cluster
  */
@@ -386,11 +388,18 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compactRegion(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }
@@ -533,11 +542,19 @@ public class TestSplitTransactionOnCluster {
       // Call split.
       this.admin.splitRegion(hri.getRegionName());
       List<HRegion> daughters = checkAndGetDaughters(tableName);
+
       // Before cleanup, get a new master.
       HMaster master = abortAndWaitForMaster();
       // Now call compact on the daughters and clean up any references.
-      for (HRegion daughter: daughters) {
+      for (HRegion daughter : daughters) {
         daughter.compact(true);
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
         assertFalse(daughter.hasReferences());
       }
       // BUT calling compact on the daughters is not enough. The CatalogJanitor looks


[3/7] hbase git commit: HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

Posted by ap...@apache.org.
HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/798cb1d7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/798cb1d7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/798cb1d7

Branch: refs/heads/branch-2.1
Commit: 798cb1d793a728484d695d94ecc60c2be2e303f8
Parents: 67ad0e6
Author: Andrew Purtell <ap...@apache.org>
Authored: Fri Aug 17 15:01:44 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 17 15:02:26 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HStore.java       |  12 +-
 .../client/TestAsyncTableGetMultiThreaded.java  |  28 +++-
 .../hbase/io/encoding/TestChangingEncoding.java |   4 +
 .../hbase/namespace/TestNamespaceAuditor.java   |  20 +++
 .../TestEndToEndSplitTransaction.java           | 136 +++++++++++++++++--
 .../TestSplitTransactionOnCluster.java          |  37 +++--
 6 files changed, 210 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 06d6455..c8d4255 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1659,7 +1659,17 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat
 
   @Override
   public boolean hasReferences() {
-    return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
+    List<HStoreFile> reloadedStoreFiles = null;
+    try {
+      // Reloading the store files from file system due to HBASE-20940. As split can happen with an
+      // region which has references
+      reloadedStoreFiles = loadStoreFiles();
+      return StoreUtils.hasReferences(reloadedStoreFiles);
+    } catch (IOException ioe) {
+      LOG.error("Error trying to determine if store has references, assuming references exists",
+        ioe);
+      return true;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
index 7632716..8a2dfcc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
@@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_META_OPERATION_TIM
 import static org.apache.hadoop.hbase.master.LoadBalancer.TABLES_ON_MASTER;
 import static org.junit.Assert.assertEquals;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -35,18 +34,21 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -124,7 +126,7 @@ public class TestAsyncTableGetMultiThreaded {
   }
 
   @Test
-  public void test() throws IOException, InterruptedException, ExecutionException {
+  public void test() throws Exception {
     int numThreads = 20;
     AtomicBoolean stop = new AtomicBoolean(false);
     ExecutorService executor =
@@ -137,9 +139,31 @@ public class TestAsyncTableGetMultiThreaded {
     Collections.shuffle(Arrays.asList(SPLIT_KEYS), new Random(123));
     Admin admin = TEST_UTIL.getAdmin();
     for (byte[] splitPoint : SPLIT_KEYS) {
+      int oldRegionCount = admin.getRegions(TABLE_NAME).size();
       admin.split(TABLE_NAME, splitPoint);
+      TEST_UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return TEST_UTIL.getMiniHBaseCluster().getRegions(TABLE_NAME).size() > oldRegionCount;
+        }
+
+        @Override
+        public String explainFailure() throws Exception {
+          return "Split has not finished yet";
+        }
+      });
+
       for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
         region.compact(true);
+
+        //Waiting for compaction to complete and references are cleaned up
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(region.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        region.getStores().get(0).closeAndArchiveCompactedFiles();
       }
       Thread.sleep(5000);
       admin.balance(true);

http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
index 1937d80..38313c4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
@@ -107,6 +107,10 @@ public class TestChangingEncoding {
   public static void setUpBeforeClass() throws Exception {
     // Use a small flush size to create more HFiles.
     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
+    // Disabling split to make sure split does not cause modify column to wait which timesout test
+    // sometime
+    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
+        "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
     // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
     // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
     TEST_UTIL.startMiniCluster();

http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
index 1b4957a..cc6c217 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java
@@ -48,7 +48,9 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -81,6 +83,7 @@ import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.KeeperException;
 import org.junit.After;
@@ -365,6 +368,23 @@ public class TestNamespaceAuditor {
     HRegion regionToSplit = UTIL.getMiniHBaseCluster().getRegions(tableTwo).stream()
       .filter(r -> r.getRegionInfo().containsRow(splitKey)).findFirst().get();
     regionToSplit.compact(true);
+    // Waiting for compaction to finish
+    UTIL.waitFor(30000, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return (CompactionState.NONE == ADMIN
+            .getCompactionStateForRegion(regionToSplit.getRegionInfo().getRegionName()));
+      }
+    });
+
+    // Cleaning compacted references for split to proceed
+    regionToSplit.getStores().stream().forEach(s -> {
+      try {
+        s.closeAndArchiveCompactedFiles();
+      } catch (IOException e1) {
+        LOG.error("Error whiling cleaning compacted file");
+      }
+    });
     // the above compact may quit immediately if there is a compaction ongoing, so here we need to
     // wait a while to let the ongoing compaction finish.
     UTIL.waitFor(10000, regionToSplit::isSplittable);

http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index b0302f6..85e9d30 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -24,10 +24,13 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ChoreService;
@@ -41,6 +44,8 @@ import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.Get;
@@ -49,13 +54,19 @@ import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.StoppableImplementation;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
+import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -65,8 +76,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
-
 @Category(LargeTests.class)
 public class TestEndToEndSplitTransaction {
 
@@ -92,6 +101,78 @@ public class TestEndToEndSplitTransaction {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+
+  /*
+   * This is the test for : HBASE-20940 This test will split the region and try to open an reference
+   * over store file. Once store file has any reference, it makes sure that region can't be split
+   * @throws Exception
+   */
+  @Test
+  public void testCanSplitJustAfterASplit() throws Exception {
+    LOG.info("Starting testCanSplitJustAfterASplit");
+    byte[] fam = Bytes.toBytes("cf_split");
+
+    TableName tableName = TableName.valueOf("CanSplitTable");
+    Table source = TEST_UTIL.getConnection().getTable(tableName);
+    Admin admin = TEST_UTIL.getAdmin();
+    Map<String, StoreFileReader> scanner = Maps.newHashMap();
+
+    try {
+      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
+          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(fam)).build();
+
+      admin.createTable(htd);
+      TEST_UTIL.loadTable(source, fam);
+      List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+      regions.get(0).forceSplit(null);
+      admin.split(tableName);
+
+      while (regions.size() <= 1) {
+        regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+        regions.stream()
+            .forEach(r -> r.getStores().get(0).getStorefiles().stream()
+                .filter(
+                  s -> s.isReference() && !scanner.containsKey(r.getRegionInfo().getEncodedName()))
+                .forEach(sf -> {
+                  StoreFileReader reader = ((HStoreFile) sf).getReader();
+                  reader.getStoreFileScanner(true, false, false, 0, 0, false);
+                  scanner.put(r.getRegionInfo().getEncodedName(), reader);
+                  LOG.info("Got reference to file = " + sf.getPath() + ",for region = "
+                      + r.getRegionInfo().getEncodedName());
+                }));
+      }
+
+      Assert.assertTrue("Regions did not split properly", regions.size() > 1);
+      Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
+
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
+
+      Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
+        admin.getCompactionState(tableName));
+
+      regions.stream()
+          .filter(region -> scanner.containsKey(region.getRegionInfo().getEncodedName()))
+          .forEach(r -> Assert.assertTrue("Contains an open file reference which can be split",
+            !r.getStores().get(0).canSplit()));
+    } finally {
+      scanner.values().stream().forEach(s -> {
+        try {
+          s.close(true);
+        } catch (IOException ioe) {
+          LOG.error("Failed while closing store file", ioe);
+        }
+      });
+      scanner.clear();
+      if (source != null) {
+        source.close();
+      }
+      TEST_UTIL.deleteTableIfAny(tableName);
+    }
+  }
+
   /**
    * Tests that the client sees meta table changes as atomic during splits
    */
@@ -151,18 +232,17 @@ public class TestEndToEndSplitTransaction {
     public void run() {
       try {
         Random random = new Random();
-        for (int i= 0; i< 5; i++) {
-          List<RegionInfo> regions =
-              MetaTableAccessor.getTableRegions(connection, tableName, true);
+        for (int i = 0; i < 5; i++) {
+          List<RegionInfo> regions = MetaTableAccessor.getTableRegions(connection, tableName, true);
           if (regions.isEmpty()) {
             continue;
           }
           int regionIndex = random.nextInt(regions.size());
 
-          //pick a random region and split it into two
+          // pick a random region and split it into two
           RegionInfo region = Iterators.get(regions.iterator(), regionIndex);
 
-          //pick the mid split point
+          // pick the mid split point
           int start = 0, end = Integer.MAX_VALUE;
           if (region.getStartKey().length > 0) {
             start = Bytes.toInt(region.getStartKey());
@@ -173,7 +253,7 @@ public class TestEndToEndSplitTransaction {
           int mid = start + ((end - start) / 2);
           byte[] splitPoint = Bytes.toBytes(mid);
 
-          //put some rows to the regions
+          // put some rows to the regions
           addData(start);
           addData(mid);
 
@@ -183,11 +263,11 @@ public class TestEndToEndSplitTransaction {
           log("Initiating region split for:" + region.getRegionNameAsString());
           try {
             admin.splitRegion(region.getRegionName(), splitPoint);
-            //wait until the split is complete
+            // wait until the split is complete
             blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
 
           } catch (NotServingRegionException ex) {
-            //ignore
+            // ignore
           }
         }
       } catch (Throwable ex) {
@@ -226,9 +306,11 @@ public class TestEndToEndSplitTransaction {
     /** verify region boundaries obtained from MetaScanner */
     void verifyRegionsUsingMetaTableAccessor() throws Exception {
       List<RegionInfo> regionList = MetaTableAccessor.getTableRegions(connection, tableName, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
       regionList = MetaTableAccessor.getAllRegions(connection, true);
-      verifyTableRegions(regionList.stream().collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
+      verifyTableRegions(regionList.stream()
+          .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
     }
 
     /** verify region boundaries obtained from HTable.getStartEndKeys() */
@@ -343,7 +425,9 @@ public class TestEndToEndSplitTransaction {
     }
   }
 
-  /** Blocks until the region split is complete in hbase:meta and region server opens the daughters */
+  /**
+   * Blocks until the region split is complete in hbase:meta and region server opens the daughters
+   */
   public static void blockUntilRegionSplit(Configuration conf, long timeout,
       final byte[] regionName, boolean waitForDaughters)
       throws IOException, InterruptedException {
@@ -389,10 +473,32 @@ public class TestEndToEndSplitTransaction {
 
         rem = timeout - (System.currentTimeMillis() - start);
         blockUntilRegionIsOpened(conf, rem, daughterB);
+
+        // Compacting the new region to make sure references can be cleaned up
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
+        compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
+          TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
+
+        removeCompactedFiles(conn, timeout, daughterA);
+        removeCompactedFiles(conn, timeout, daughterB);
       }
     }
   }
 
+  public static void removeCompactedFiles(Connection conn, long timeout, RegionInfo hri)
+      throws IOException, InterruptedException {
+    log("remove compacted files for : " + hri.getRegionNameAsString());
+    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
+    regions.stream().forEach(r -> {
+      try {
+        r.getStores().get(0).closeAndArchiveCompactedFiles();
+      } catch (IOException ioe) {
+        LOG.error("failed in removing compacted file", ioe);
+      }
+    });
+  }
+
   public static void blockUntilRegionIsInMeta(Connection conn, long timeout, RegionInfo hri)
       throws IOException, InterruptedException {
     log("blocking until region is in META: " + hri.getRegionNameAsString());
@@ -415,7 +521,9 @@ public class TestEndToEndSplitTransaction {
         Table table = conn.getTable(hri.getTable())) {
       byte[] row = hri.getStartKey();
       // Check for null/empty row. If we find one, use a key that is likely to be in first region.
-      if (row == null || row.length <= 0) row = new byte[] { '0' };
+      if (row == null || row.length <= 0) {
+        row = new byte[] { '0' };
+      }
       Get get = new Get(row);
       while (System.currentTimeMillis() - start < timeout) {
         try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/798cb1d7/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 95e0112..eb162de 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -31,7 +31,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -50,6 +52,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownRegionException;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Consistency;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@@ -78,6 +81,10 @@ import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -85,7 +92,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.junit.After;
@@ -101,14 +111,6 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
-
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
-
 /**
  * The below tests are testing split region against a running cluster
  */
@@ -386,11 +388,18 @@ public class TestSplitTransactionOnCluster {
       // Compact first to ensure we have cleaned up references -- else the split
       // will fail.
       this.admin.compactRegion(daughter.getRegionName());
+      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
+          && retrier.shouldRetry()) {
+        retrier.sleepUntilNextRetry();
+      }
       daughters = cluster.getRegions(tableName);
       HRegion daughterRegion = null;
-      for (HRegion r: daughters) {
+      for (HRegion r : daughters) {
         if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
           daughterRegion = r;
+          // Archiving the compacted references file
+          r.getStores().get(0).closeAndArchiveCompactedFiles();
           LOG.info("Found matching HRI: " + daughterRegion);
           break;
         }
@@ -533,11 +542,19 @@ public class TestSplitTransactionOnCluster {
       // Call split.
       this.admin.splitRegion(hri.getRegionName());
       List<HRegion> daughters = checkAndGetDaughters(tableName);
+
       // Before cleanup, get a new master.
       HMaster master = abortAndWaitForMaster();
       // Now call compact on the daughters and clean up any references.
-      for (HRegion daughter: daughters) {
+      for (HRegion daughter : daughters) {
         daughter.compact(true);
+        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
+        while (CompactionState.NONE != admin
+            .getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
+            && retrier.shouldRetry()) {
+          retrier.sleepUntilNextRetry();
+        }
+        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
         assertFalse(daughter.hasReferences());
       }
       // BUT calling compact on the daughters is not enough. The CatalogJanitor looks