You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/02/10 17:04:28 UTC

[hbase] branch branch-1 updated (f99e899 -> 942bb77)

This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a change to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    from f99e899  HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)
     new bb49aa2  Revert "HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)"
     new 942bb77  HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


[hbase] 01/02: Revert "HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)"

Posted by wc...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit bb49aa205c685f7ce2c222b19846b59d62b22336
Author: Wellington Chevreuil <wc...@apache.org>
AuthorDate: Mon Feb 10 17:03:09 2020 +0000

    Revert "HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)"
    
    This reverts commit f99e899ca3a6d28d935793a42af16c527e8e0d87.
---
 .../org/apache/hadoop/hbase/MetaTableAccessor.java | 14 -----
 .../apache/hadoop/hbase/master/RegionStates.java   | 66 +++-------------------
 2 files changed, 7 insertions(+), 73 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 1624364..440f8c6 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -843,20 +843,6 @@ public class MetaTableAccessor {
   /**
    * Returns the daughter regions by reading the corresponding columns of the catalog table
    * Result.
-   * @param connection connection we're using
-   * @param parent region information of parent
-   * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
-   *   parent
-   */
-  public static PairOfSameType<HRegionInfo> getDaughterRegionsFromParent(
-    final Connection connection, HRegionInfo parent) throws IOException {
-    Result parentResult = getRegionResult(connection, parent.getRegionName());
-    return getDaughterRegions(parentResult);
-  }
-
-  /**
-   * Returns the daughter regions by reading the corresponding columns of the catalog table
-   * Result.
    * @param data a Result object from the catalog table scan
    * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
    * parent
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index e31868e..3a02bdb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -44,17 +44,13 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableStateManager;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ConfigUtil;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.PairOfSameType;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
@@ -741,13 +737,11 @@ public class RegionStates {
   public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) {
     // Offline all regions on this server not already in transition.
     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
-    Set<Pair<HRegionInfo, HRegionInfo>> regionsToClean =
-      new HashSet<Pair<HRegionInfo, HRegionInfo>>();
+    Set<HRegionInfo> regionsToClean = new HashSet<HRegionInfo>();
     // Offline regions outside the loop and synchronized block to avoid
     // ConcurrentModificationException and deadlock in case of meta anassigned,
     // but RegionState a blocked.
     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
-    Map<String, HRegionInfo> daughter2Parent = new HashMap<>();
     synchronized (this) {
       Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
       if (assignedRegions == null) {
@@ -764,20 +758,8 @@ public class RegionStates {
             // Delete the ZNode if exists
             ZKAssign.deleteNodeFailSilent(watcher, region);
             regionsToOffline.add(region);
-            PairOfSameType<HRegionInfo> daughterRegions =
-              MetaTableAccessor.getDaughterRegionsFromParent(this.server.getConnection(), region);
-            if (daughterRegions != null) {
-              if (daughterRegions.getFirst() != null) {
-                daughter2Parent.put(daughterRegions.getFirst().getEncodedName(), region);
-              }
-              if (daughterRegions.getSecond() != null) {
-                daughter2Parent.put(daughterRegions.getSecond().getEncodedName(), region);
-              }
-            }
           } catch (KeeperException ke) {
             server.abort("Unexpected ZK exception deleting node " + region, ke);
-          } catch (IOException e) {
-            LOG.warn("get daughter from meta exception " + region, e);
           }
         }
       }
@@ -801,20 +783,10 @@ public class RegionStates {
             LOG.info("Found region in " + state +
               " to be reassigned by ServerCrashProcedure for " + sn);
             rits.add(hri);
-          } else if (state.isSplittingNew() || state.isMergingNew()) {
-            LOG.info(
-              "Offline/Cleanup region if no meta entry exists, hri: " + hri + " state: " + state);
-            if (daughter2Parent.containsKey(hri.getEncodedName())) {
-              HRegionInfo parent = daughter2Parent.get(hri.getEncodedName());
-              HRegionInfo info = getHRIFromMeta(parent);
-              if (info != null && info.isSplit() && info.isOffline()) {
-                regionsToClean.add(Pair.newPair(state.getRegion(), info));
-              } else {
-                regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
-              }
-            } else {
-              regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
-            }
+          } else if(state.isSplittingNew() || state.isMergingNew()) {
+            LOG.info("Offline/Cleanup region if no meta entry exists, hri: " + hri +
+                " state: " + state);
+            regionsToClean.add(state.getRegion());
           } else {
             LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
           }
@@ -831,19 +803,6 @@ public class RegionStates {
     return rits;
   }
 
-  private HRegionInfo getHRIFromMeta(HRegionInfo parent) {
-    Result result = null;
-    try {
-      result =
-        MetaTableAccessor.getRegionResult(this.server.getConnection(), parent.getRegionName());
-      HRegionInfo info = MetaTableAccessor.getHRegionInfo(result);
-      return info;
-    } catch (IOException e) {
-      LOG.error("got exception when query meta with region " + parent.getEncodedName(), e);
-      return null;
-    }
-  }
-
   /**
    * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
    * In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if
@@ -851,14 +810,12 @@ public class RegionStates {
    *
    * @param hris The hris to check if empty in hbase:meta and if so, clean them up.
    */
-  private void cleanFailedSplitMergeRegions(Set<Pair<HRegionInfo, HRegionInfo>> hris) {
+  private void cleanFailedSplitMergeRegions(Set<HRegionInfo> hris) {
     if (hris.isEmpty()) {
       return;
     }
 
-    for (Pair<HRegionInfo, HRegionInfo> hriPair : hris) {
-      HRegionInfo hri = hriPair.getFirst();
-      HRegionInfo parentInfo = hriPair.getSecond();
+    for (HRegionInfo hri : hris) {
       // This is RPC to meta table. It is done while we have a synchronize on
       // regionstates. No progress will be made if meta is not available at this time.
       // This is a cleanup task. Not critical.
@@ -872,15 +829,6 @@ public class RegionStates {
           if (regionPair != null) {
             MetaTableAccessor.deleteRegion(this.server.getConnection(), hri);
           }
-          if (parentInfo != null) {
-            List<Mutation> mutations = new ArrayList<Mutation>();
-            HRegionInfo copyOfParent = new HRegionInfo(parentInfo);
-            copyOfParent.setOffline(false);
-            copyOfParent.setSplit(false);
-            Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
-            mutations.add(putParent);
-            MetaTableAccessor.mutateMetaTable(this.server.getConnection(), mutations);
-          }
           LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri);
           FSUtils.deleteRegionDir(server.getConfiguration(), hri);
         }


[hbase] 02/02: HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)

Posted by wc...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 942bb77d84422b593a9a99b442170bd3c26fb088
Author: thangTang <ta...@gmail.com>
AuthorDate: Tue Feb 11 00:57:30 2020 +0800

    HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)
    
    Signed-off-by: stack <st...@apache.org>
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../org/apache/hadoop/hbase/MetaTableAccessor.java | 14 +++++
 .../apache/hadoop/hbase/master/RegionStates.java   | 66 +++++++++++++++++++---
 2 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 440f8c6..1624364 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -843,6 +843,20 @@ public class MetaTableAccessor {
   /**
    * Returns the daughter regions by reading the corresponding columns of the catalog table
    * Result.
+   * @param connection connection we're using
+   * @param parent region information of parent
+   * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
+   *   parent
+   */
+  public static PairOfSameType<HRegionInfo> getDaughterRegionsFromParent(
+    final Connection connection, HRegionInfo parent) throws IOException {
+    Result parentResult = getRegionResult(connection, parent.getRegionName());
+    return getDaughterRegions(parentResult);
+  }
+
+  /**
+   * Returns the daughter regions by reading the corresponding columns of the catalog table
+   * Result.
    * @param data a Result object from the catalog table scan
    * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
    * parent
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 3a02bdb..e31868e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -44,13 +44,17 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableStateManager;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ConfigUtil;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.PairOfSameType;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;
@@ -737,11 +741,13 @@ public class RegionStates {
   public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) {
     // Offline all regions on this server not already in transition.
     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
-    Set<HRegionInfo> regionsToClean = new HashSet<HRegionInfo>();
+    Set<Pair<HRegionInfo, HRegionInfo>> regionsToClean =
+      new HashSet<Pair<HRegionInfo, HRegionInfo>>();
     // Offline regions outside the loop and synchronized block to avoid
     // ConcurrentModificationException and deadlock in case of meta anassigned,
     // but RegionState a blocked.
     Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
+    Map<String, HRegionInfo> daughter2Parent = new HashMap<>();
     synchronized (this) {
       Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
       if (assignedRegions == null) {
@@ -758,8 +764,20 @@ public class RegionStates {
             // Delete the ZNode if exists
             ZKAssign.deleteNodeFailSilent(watcher, region);
             regionsToOffline.add(region);
+            PairOfSameType<HRegionInfo> daughterRegions =
+              MetaTableAccessor.getDaughterRegionsFromParent(this.server.getConnection(), region);
+            if (daughterRegions != null) {
+              if (daughterRegions.getFirst() != null) {
+                daughter2Parent.put(daughterRegions.getFirst().getEncodedName(), region);
+              }
+              if (daughterRegions.getSecond() != null) {
+                daughter2Parent.put(daughterRegions.getSecond().getEncodedName(), region);
+              }
+            }
           } catch (KeeperException ke) {
             server.abort("Unexpected ZK exception deleting node " + region, ke);
+          } catch (IOException e) {
+            LOG.warn("get daughter from meta exception " + region, e);
           }
         }
       }
@@ -783,10 +801,20 @@ public class RegionStates {
             LOG.info("Found region in " + state +
               " to be reassigned by ServerCrashProcedure for " + sn);
             rits.add(hri);
-          } else if(state.isSplittingNew() || state.isMergingNew()) {
-            LOG.info("Offline/Cleanup region if no meta entry exists, hri: " + hri +
-                " state: " + state);
-            regionsToClean.add(state.getRegion());
+          } else if (state.isSplittingNew() || state.isMergingNew()) {
+            LOG.info(
+              "Offline/Cleanup region if no meta entry exists, hri: " + hri + " state: " + state);
+            if (daughter2Parent.containsKey(hri.getEncodedName())) {
+              HRegionInfo parent = daughter2Parent.get(hri.getEncodedName());
+              HRegionInfo info = getHRIFromMeta(parent);
+              if (info != null && info.isSplit() && info.isOffline()) {
+                regionsToClean.add(Pair.newPair(state.getRegion(), info));
+              } else {
+                regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
+              }
+            } else {
+              regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
+            }
           } else {
             LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
           }
@@ -803,6 +831,19 @@ public class RegionStates {
     return rits;
   }
 
+  private HRegionInfo getHRIFromMeta(HRegionInfo parent) {
+    Result result = null;
+    try {
+      result =
+        MetaTableAccessor.getRegionResult(this.server.getConnection(), parent.getRegionName());
+      HRegionInfo info = MetaTableAccessor.getHRegionInfo(result);
+      return info;
+    } catch (IOException e) {
+      LOG.error("got exception when query meta with region " + parent.getEncodedName(), e);
+      return null;
+    }
+  }
+
   /**
    * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
    * In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if
@@ -810,12 +851,14 @@ public class RegionStates {
    *
    * @param hris The hris to check if empty in hbase:meta and if so, clean them up.
    */
-  private void cleanFailedSplitMergeRegions(Set<HRegionInfo> hris) {
+  private void cleanFailedSplitMergeRegions(Set<Pair<HRegionInfo, HRegionInfo>> hris) {
     if (hris.isEmpty()) {
       return;
     }
 
-    for (HRegionInfo hri : hris) {
+    for (Pair<HRegionInfo, HRegionInfo> hriPair : hris) {
+      HRegionInfo hri = hriPair.getFirst();
+      HRegionInfo parentInfo = hriPair.getSecond();
       // This is RPC to meta table. It is done while we have a synchronize on
       // regionstates. No progress will be made if meta is not available at this time.
       // This is a cleanup task. Not critical.
@@ -829,6 +872,15 @@ public class RegionStates {
           if (regionPair != null) {
             MetaTableAccessor.deleteRegion(this.server.getConnection(), hri);
           }
+          if (parentInfo != null) {
+            List<Mutation> mutations = new ArrayList<Mutation>();
+            HRegionInfo copyOfParent = new HRegionInfo(parentInfo);
+            copyOfParent.setOffline(false);
+            copyOfParent.setSplit(false);
+            Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
+            mutations.add(putParent);
+            MetaTableAccessor.mutateMetaTable(this.server.getConnection(), mutations);
+          }
           LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri);
           FSUtils.deleteRegionDir(server.getConfiguration(), hri);
         }