You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by qi...@apache.org on 2015/08/20 00:20:33 UTC

incubator-geode git commit: GEODE-77: improve restart locator error handling on corrupted state file

Repository: incubator-geode
Updated Branches:
  refs/heads/feature/GEODE-77 c61fe3466 -> 7fd67b883


GEODE-77: improve restart locator error handling on corrupted state file


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/7fd67b88
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/7fd67b88
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/7fd67b88

Branch: refs/heads/feature/GEODE-77
Commit: 7fd67b8836b1c0ff95034a9a6f25901e805354f2
Parents: c61fe34
Author: Qihong Chen <qc...@pivotal.io>
Authored: Wed Aug 19 15:12:16 2015 -0700
Committer: Qihong Chen <qc...@pivotal.io>
Committed: Wed Aug 19 15:13:06 2015 -0700

----------------------------------------------------------------------
 gemfire-assembly/build.gradle                   |  2 +-
 .../distributed/internal/InternalLocator.java   |  7 +-
 .../membership/gms/locator/GMSLocator.java      | 66 ++++++---------
 .../gemfire/internal/i18n/LocalizedStrings.java |  2 +-
 .../gms/locator/GMSLocatorJUnitTest.java        | 87 ++++++++++++++++++++
 5 files changed, 118 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-assembly/build.gradle
----------------------------------------------------------------------
diff --git a/gemfire-assembly/build.gradle b/gemfire-assembly/build.gradle
index 7e7dcb3..be655e2 100755
--- a/gemfire-assembly/build.gradle
+++ b/gemfire-assembly/build.gradle
@@ -112,7 +112,7 @@ def cp = {
       it.contains('spring-shell') ||
       it.contains('snappy-java') ||
       it.contains('hbase') ||
-      it.contains('jgroups')
+      it.contains('jgroups') ||
       it.contains('netty')
     }.join(' ') 
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java
index f649713..32dee46 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java
@@ -24,6 +24,7 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import com.gemstone.gemfire.InternalGemFireException;
 import org.apache.logging.log4j.Logger;
 
 import com.gemstone.gemfire.CancelException;
@@ -1066,7 +1067,7 @@ public class InternalLocator extends Locator implements ConnectListener {
     ThreadGroup group = LoggingThreadGroup.createThreadGroup("Locator restart thread group");
     this.restartThread = new Thread(group, "Location services restart thread") {
       public void run() {
-        boolean restarted;
+        boolean restarted = false;
         try {
           restarted = attemptReconnect();
           logger.info("attemptReconnect returned {}", restarted);
@@ -1074,6 +1075,10 @@ public class InternalLocator extends Locator implements ConnectListener {
           logger.info("attempt to restart location services was interrupted", e);
         } catch (IOException e) {
           logger.info("attempt to restart location services terminated", e);
+        } finally {
+          if (! restarted) {
+            stoppedForReconnect = false;
+          }
         }
         InternalLocator.this.restartThread = null;
       }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
index a988dec..dd4ac51 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
@@ -14,6 +14,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import com.gemstone.gemfire.InternalGemFireException;
 import org.apache.logging.log4j.Logger;
 
 import com.gemstone.gemfire.DataSerializer;
@@ -30,16 +31,16 @@ import com.gemstone.gemfire.distributed.internal.membership.gms.Services;
 import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.Locator;
 import com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager;
 import com.gemstone.gemfire.distributed.internal.tcpserver.TcpClient;
-import com.gemstone.gemfire.distributed.internal.tcpserver.TcpHandler;
 import com.gemstone.gemfire.distributed.internal.tcpserver.TcpServer;
 import com.gemstone.gemfire.internal.Version;
 import com.gemstone.gemfire.internal.VersionedObjectInput;
-import com.gemstone.gemfire.internal.i18n.LocalizedStrings;
 import com.gemstone.gemfire.internal.logging.LogService;
 
+import static com.gemstone.gemfire.internal.i18n.LocalizedStrings.LOCATOR_UNABLE_TO_RECOVER_VIEW;
+
 public class GMSLocator implements Locator, NetLocator {
 
-  private static final int LOCATOR_FILE_STAMP = 0x7b8cf741;
+  /* package */ static final int LOCATOR_FILE_STAMP = 0x7b8cf741;
   
   private static final Logger logger = LogService.getLogger();
 
@@ -89,7 +90,7 @@ public class GMSLocator implements Locator, NetLocator {
   }
   
   @Override
-  public void init(TcpServer server) {
+  public void init(TcpServer server) throws InternalGemFireException {
     recover();
   }
   
@@ -236,7 +237,7 @@ public class GMSLocator implements Locator, NetLocator {
     setMembershipManager(((InternalDistributedSystem)ds).getDM().getMembershipManager());
   }
 
-  public void recover() {
+  public void recover() throws InternalGemFireException {
     if (!recoverFromOthers()) {
       recoverFromFile(viewFile);
     }
@@ -265,61 +266,40 @@ public class GMSLocator implements Locator, NetLocator {
         logger.info("Peer locator recovered initial membership of {}", view);
         return true;
       }
-    } catch (IOException e) {
-      // ignore
-    } catch (ClassNotFoundException e) {
-      // hmm - odd response?
-    }
+    } catch (IOException | ClassNotFoundException ignore) {}
     return false;
   }
-  
-  private boolean recoverFromFile(File file) {
+
+  /* package */ boolean recoverFromFile(File file) throws InternalGemFireException {
     if (!file.exists()) {
       return false;
     }
+
     logger.info("Peer locator recovering from " + file.getAbsolutePath());
-    FileInputStream fis = null;
-    try {
-      fis = new FileInputStream(file);
-      ObjectInput ois = new ObjectInputStream(fis);
-      
-      int magic = ois.readInt();
-      if (magic != LOCATOR_FILE_STAMP) {
+    try (ObjectInput ois = new ObjectInputStream(new FileInputStream(file))) {
+      if (ois.readInt() != LOCATOR_FILE_STAMP) {
         return false;
       }
-      
+
       int version = ois.readInt();
       Version geodeVersion = Version.fromOrdinalNoThrow((short)version, false);
-      if (geodeVersion != null  &&  version != Version.CURRENT_ORDINAL) {
+      if (geodeVersion != null  &&  version == Version.CURRENT_ORDINAL) {
         logger.info("Peer locator found that persistent view was written with {}", geodeVersion);
-        ois = new VersionedObjectInput(ois, geodeVersion);
-      } else {
-        return false;
-      }
-
-      this.view = DataSerializer.readObject(ois);
-
-      logger.info("Initial membership is " + view);
-      return true;
-    }
-    catch (Exception e) {
-      e.printStackTrace();
-      logger.warn(LocalizedStrings.Locator_unable_to_recover_view_0
-          .toLocalizedString(file), e);
-      try {
-        fis.close();
-      } catch (IOException ex) {
-        // ignore
+        ObjectInput ois2 = new VersionedObjectInput(ois, geodeVersion);
+        this.view = DataSerializer.readObject(ois2);
+        logger.info("Initial membership is " + view);
+        return true;
       }
+      return false;
+    } catch (Exception e) {
+      String msg = LOCATOR_UNABLE_TO_RECOVER_VIEW.toLocalizedString(file.toString());
+      logger.warn(msg, e);
       if (!file.delete() && file.exists()) {
         logger.warn("Peer locator was unable to recover from or delete " + file);
         this.viewFile = null;
       }
+      throw new InternalGemFireException(msg, e);
     }
-    finally {
-      try { fis.close(); } catch (IOException e) { }
-    }
-    return false;
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java
index c4d9c58..f146834 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java
@@ -2125,7 +2125,7 @@ public class LocalizedStrings extends ParentLocalizedStrings {
   public static final StringId MinimumSystemRequirements_NOT_MET = new StringId(6604, "Minimum system requirements not met. Unexpected behavior may result in additional errors.");
   public static final StringId MinimumSystemRequirements_JAVA_VERSION = new StringId(6605, "Java version older than {0}.");
 
-  public static final StringId Locator_unable_to_recover_view_0 = new StringId(6606, "Unable to recover previous membership view from  {0}");
+  public static final StringId LOCATOR_UNABLE_TO_RECOVER_VIEW = new StringId(6606, "Unable to recover previous membership view from {0}");
 
   public static final StringId Network_partition_detected = new StringId(6607, "Exiting due to possible network partition event due to loss of {0} cache processes: {1}");
   

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java
new file mode 100644
index 0000000..e79dcbc
--- /dev/null
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java
@@ -0,0 +1,87 @@
+package com.gemstone.gemfire.distributed.internal.membership.gms.locator;
+
+import com.gemstone.gemfire.DataSerializer;
+import com.gemstone.gemfire.InternalGemFireException;
+import com.gemstone.gemfire.distributed.internal.membership.NetView;
+import com.gemstone.gemfire.internal.Version;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import com.gemstone.gemfire.test.junit.categories.UnitTest;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.ObjectOutputStream;
+
+import static org.junit.Assert.*;
+
+@Category(UnitTest.class)
+public class GMSLocatorJUnitTest {
+
+  File tempStateFile = null;
+  GMSLocator locator = null;
+
+  @Before
+  public void setUp() throws Exception {
+    tempStateFile = File.createTempFile("tempLocator-", ".dat", new File("/tmp"));
+    locator = new GMSLocator(null, tempStateFile, null, false, false);
+    // System.out.println("temp state file: " + tempStateFile);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (tempStateFile.exists()) {
+      tempStateFile.delete();
+    }
+  }
+
+  private void populateStateFile(File file, int fileStamp, int ordinal, Object object) throws Exception {
+    try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file))) {
+      oos.writeInt(fileStamp);
+      oos.writeInt(ordinal);
+      DataSerializer.writeObject(object, oos);
+    }
+  }
+
+  @Test
+  public void testRecoverFromFileWithNonExistFile() throws Exception {
+    tempStateFile.delete();
+    assertFalse(tempStateFile.exists());
+    assertFalse(locator.recoverFromFile(tempStateFile));
+  }
+
+  @Test
+  public void testRecoverFromFileWithNormalFile() throws Exception {
+    NetView view = new NetView();
+    populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL, view);
+    assertTrue(locator.recoverFromFile(tempStateFile));
+  }
+
+  @Test
+  public void testRecoverFromFileWithWrongFileStamp() throws Exception {
+    // add 1 to file stamp to make it invalid
+    populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP + 1, Version.CURRENT_ORDINAL, 1);
+    assertFalse(locator.recoverFromFile(tempStateFile));
+  }
+
+  @Test
+  public void testRecoverFromFileWithWrongOrdinal() throws Exception {
+    // add 1 to ordinal to make it wrong
+    populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL + 1, 1);
+    assertFalse(locator.recoverFromFile(tempStateFile));
+  }
+
+  @Test
+  public void testRecoverFromFileWithInvalidViewObject() throws Exception {
+    populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL, 1);
+    try {
+      locator.recoverFromFile(tempStateFile);
+      fail("should catch InternalGemFileException");
+    } catch (InternalGemFireException e) {
+      assertTrue(e.getMessage().startsWith("Unable to recover previous membership view from"));
+    }
+  }
+
+}
+