You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/02/10 17:04:30 UTC
[hbase] 02/02: HBASE-23693 Split failure may cause region hole and
data loss when use zk assign (#1071)
This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 942bb77d84422b593a9a99b442170bd3c26fb088
Author: thangTang <ta...@gmail.com>
AuthorDate: Tue Feb 11 00:57:30 2020 +0800
HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)
Signed-off-by: stack <st...@apache.org>
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
.../org/apache/hadoop/hbase/MetaTableAccessor.java | 14 +++++
.../apache/hadoop/hbase/master/RegionStates.java | 66 +++++++++++++++++++---
2 files changed, 73 insertions(+), 7 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index 440f8c6..1624364 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -843,6 +843,20 @@ public class MetaTableAccessor {
/**
* Returns the daughter regions by reading the corresponding columns of the catalog table
* Result.
+ * @param connection connection we're using
+ * @param parent region information of parent
+ * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
+ * parent
+ */
+ public static PairOfSameType<HRegionInfo> getDaughterRegionsFromParent(
+ final Connection connection, HRegionInfo parent) throws IOException {
+ Result parentResult = getRegionResult(connection, parent.getRegionName());
+ return getDaughterRegions(parentResult);
+ }
+
+ /**
+ * Returns the daughter regions by reading the corresponding columns of the catalog table
+ * Result.
* @param data a Result object from the catalog table scan
* @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
* parent
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 3a02bdb..e31868e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -44,13 +44,17 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableStateManager;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ConfigUtil;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
@@ -737,11 +741,13 @@ public class RegionStates {
public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) {
// Offline all regions on this server not already in transition.
List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
- Set<HRegionInfo> regionsToClean = new HashSet<HRegionInfo>();
+ Set<Pair<HRegionInfo, HRegionInfo>> regionsToClean =
+ new HashSet<Pair<HRegionInfo, HRegionInfo>>();
// Offline regions outside the loop and synchronized block to avoid
// ConcurrentModificationException and deadlock in case of meta anassigned,
// but RegionState a blocked.
Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
+ Map<String, HRegionInfo> daughter2Parent = new HashMap<>();
synchronized (this) {
Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
if (assignedRegions == null) {
@@ -758,8 +764,20 @@ public class RegionStates {
// Delete the ZNode if exists
ZKAssign.deleteNodeFailSilent(watcher, region);
regionsToOffline.add(region);
+ PairOfSameType<HRegionInfo> daughterRegions =
+ MetaTableAccessor.getDaughterRegionsFromParent(this.server.getConnection(), region);
+ if (daughterRegions != null) {
+ if (daughterRegions.getFirst() != null) {
+ daughter2Parent.put(daughterRegions.getFirst().getEncodedName(), region);
+ }
+ if (daughterRegions.getSecond() != null) {
+ daughter2Parent.put(daughterRegions.getSecond().getEncodedName(), region);
+ }
+ }
} catch (KeeperException ke) {
server.abort("Unexpected ZK exception deleting node " + region, ke);
+ } catch (IOException e) {
+ LOG.warn("get daughter from meta exception " + region, e);
}
}
}
@@ -783,10 +801,20 @@ public class RegionStates {
LOG.info("Found region in " + state +
" to be reassigned by ServerCrashProcedure for " + sn);
rits.add(hri);
- } else if(state.isSplittingNew() || state.isMergingNew()) {
- LOG.info("Offline/Cleanup region if no meta entry exists, hri: " + hri +
- " state: " + state);
- regionsToClean.add(state.getRegion());
+ } else if (state.isSplittingNew() || state.isMergingNew()) {
+ LOG.info(
+ "Offline/Cleanup region if no meta entry exists, hri: " + hri + " state: " + state);
+ if (daughter2Parent.containsKey(hri.getEncodedName())) {
+ HRegionInfo parent = daughter2Parent.get(hri.getEncodedName());
+ HRegionInfo info = getHRIFromMeta(parent);
+ if (info != null && info.isSplit() && info.isOffline()) {
+ regionsToClean.add(Pair.newPair(state.getRegion(), info));
+ } else {
+ regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
+ }
+ } else {
+ regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
+ }
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
}
@@ -803,6 +831,19 @@ public class RegionStates {
return rits;
}
+ private HRegionInfo getHRIFromMeta(HRegionInfo parent) {
+ Result result = null;
+ try {
+ result =
+ MetaTableAccessor.getRegionResult(this.server.getConnection(), parent.getRegionName());
+ HRegionInfo info = MetaTableAccessor.getHRegionInfo(result);
+ return info;
+ } catch (IOException e) {
+ LOG.error("got exception when query meta with region " + parent.getEncodedName(), e);
+ return null;
+ }
+ }
+
/**
* This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
* In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if
@@ -810,12 +851,14 @@ public class RegionStates {
*
* @param hris The hris to check if empty in hbase:meta and if so, clean them up.
*/
- private void cleanFailedSplitMergeRegions(Set<HRegionInfo> hris) {
+ private void cleanFailedSplitMergeRegions(Set<Pair<HRegionInfo, HRegionInfo>> hris) {
if (hris.isEmpty()) {
return;
}
- for (HRegionInfo hri : hris) {
+ for (Pair<HRegionInfo, HRegionInfo> hriPair : hris) {
+ HRegionInfo hri = hriPair.getFirst();
+ HRegionInfo parentInfo = hriPair.getSecond();
// This is RPC to meta table. It is done while we have a synchronize on
// regionstates. No progress will be made if meta is not available at this time.
// This is a cleanup task. Not critical.
@@ -829,6 +872,15 @@ public class RegionStates {
if (regionPair != null) {
MetaTableAccessor.deleteRegion(this.server.getConnection(), hri);
}
+ if (parentInfo != null) {
+ List<Mutation> mutations = new ArrayList<Mutation>();
+ HRegionInfo copyOfParent = new HRegionInfo(parentInfo);
+ copyOfParent.setOffline(false);
+ copyOfParent.setSplit(false);
+ Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
+ mutations.add(putParent);
+ MetaTableAccessor.mutateMetaTable(this.server.getConnection(), mutations);
+ }
LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri);
FSUtils.deleteRegionDir(server.getConfiguration(), hri);
}