You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2014/01/22 23:39:11 UTC

svn commit: r1560553 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/store/hdfs/ core/src/java/org/apache/solr/update/ core/src/test/org/apache/solr/cloud/hdfs/ core/src/test/org/apache/solr/search/

Author: markrmiller
Date: Wed Jan 22 22:39:10 2014
New Revision: 1560553

URL: http://svn.apache.org/r1560553
Log:
SOLR-5657: When a SolrCore starts on HDFS, it should gracefully handle HDFS being in safe mode.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 22 22:39:10 2014
@@ -219,6 +219,9 @@ Bug Fixes
 * SOLR-5650: When a replica becomes a leader, only peer sync with other replicas
   that last published an ACTIVE state. (Mark Miller)
 
+* SOLR-5657: When a SolrCore starts on HDFS, it should gracefully handle HDFS
+  being in safe mode. (Mark Miller)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java Wed Jan 22 22:39:10 2014
@@ -28,9 +28,9 @@ import org.apache.hadoop.fs.FSDataInputS
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.lucene.store.BaseDirectory;
 import org.apache.lucene.store.BufferedIndexOutput;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -58,14 +58,37 @@ public class HdfsDirectory extends BaseD
     this.hdfsDirPath = hdfsDirPath;
     this.configuration = configuration;
     fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), configuration);
-    try {
-      if (!fileSystem.exists(hdfsDirPath)) {
-        fileSystem.mkdirs(hdfsDirPath);
+    
+    while (true) {
+      try {
+        if (!fileSystem.exists(hdfsDirPath)) {
+          boolean success = fileSystem.mkdirs(hdfsDirPath);
+          if (!success) {
+            throw new RuntimeException("Could not create directory: " + hdfsDirPath);
+          }
+        } else {
+          fileSystem.mkdirs(hdfsDirPath); // check for safe mode
+        }
+        
+        break;
+      } catch (RemoteException e) {
+        if (e.getClassName().equals("org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+          LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+          try {
+            Thread.sleep(5000);
+          } catch (InterruptedException e1) {
+            Thread.interrupted();
+          }
+          continue;
+        }
+        org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
+        throw new RuntimeException(
+            "Problem creating directory: " + hdfsDirPath, e);
+      } catch (Exception e) {
+        org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
+        throw new RuntimeException(
+            "Problem creating directory: " + hdfsDirPath, e);
       }
-    } catch (Exception e) {
-      org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
-      throw new RuntimeException("Problem creating directory: " + hdfsDirPath,
-          e);
     }
   }
   

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java Wed Jan 22 22:39:10 2014
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockFactory;
 import org.apache.lucene.store.LockReleaseFailedException;
@@ -59,16 +60,31 @@ public class HdfsLockFactory extends Loc
     FileSystem fs = null;
     try {
       fs = FileSystem.newInstance(lockPath.toUri(), configuration);
-      
-      if (fs.exists(lockPath)) {
-        if (lockPrefix != null) {
-          lockName = lockPrefix + "-" + lockName;
-        }
-        
-        Path lockFile = new Path(lockPath, lockName);
-
-        if (fs.exists(lockFile) && !fs.delete(lockFile, false)) {
-          throw new IOException("Cannot delete " + lockFile);
+      while (true) {
+        if (fs.exists(lockPath)) {
+          if (lockPrefix != null) {
+            lockName = lockPrefix + "-" + lockName;
+          }
+          
+          Path lockFile = new Path(lockPath, lockName);
+          try {
+            if (fs.exists(lockFile) && !fs.delete(lockFile, false)) {
+              throw new IOException("Cannot delete " + lockFile);
+            }
+          } catch (RemoteException e) {
+            if (e.getClassName().equals(
+                "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+              log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+              try {
+                Thread.sleep(5000);
+              } catch (InterruptedException e1) {
+                Thread.interrupted();
+              }
+              continue;
+            }
+            throw e;
+          }
+          break;
         }
       }
     } finally {
@@ -99,20 +115,46 @@ public class HdfsLockFactory extends Loc
     @Override
     public boolean obtain() throws IOException {
       FSDataOutputStream file = null;
-      FileSystem fs = null;
+      FileSystem fs = FileSystem.newInstance(lockPath.toUri(), conf);
       try {
-        fs = FileSystem.newInstance(lockPath.toUri(), conf);
-        if (!fs.exists(lockPath)) {
-          fs.mkdirs(lockPath);
+        while (true) {
+          try {
+            if (!fs.exists(lockPath)) {
+              boolean success = fs.mkdirs(lockPath);
+              if (!success) {
+                throw new RuntimeException("Could not create directory: " + lockPath);
+              }
+            } else {
+              // just to check for safe mode
+              fs.mkdirs(lockPath);
+            }
+
+            
+            file = fs.create(new Path(lockPath, lockName), false);
+            break;
+          } catch (FileAlreadyExistsException e) {
+            return false;
+          } catch (RemoteException e) {
+            if (e.getClassName().equals(
+                "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+              log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+              try {
+                Thread.sleep(5000);
+              } catch (InterruptedException e1) {
+                Thread.interrupted();
+              }
+              continue;
+            }
+            log.error("Error creating lock file", e);
+            return false;
+          } catch (IOException e) {
+            log.error("Error creating lock file", e);
+            return false;
+          } finally {
+            IOUtils.closeQuietly(file);
+          }
         }
-        file = fs.create(new Path(lockPath, lockName), false);
-      } catch (FileAlreadyExistsException e) { 
-        return false;
-      }catch (IOException e) {
-        log.error("Error creating lock file", e);
-        return false;
       } finally {
-        IOUtils.closeQuietly(file);
         IOUtils.closeQuietly(fs);
       }
       return true;

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java Wed Jan 22 22:39:10 2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
@@ -142,16 +143,33 @@ public class HdfsUpdateLog extends Updat
     }
     lastDataDir = dataDir;
     tlogDir = new Path(dataDir, TLOG_NAME);
-    
-    try {
-      if (!fs.exists(tlogDir)) {
-        boolean success = fs.mkdirs(tlogDir);
-        if (!success) {
-          throw new RuntimeException("Could not create directory:" + tlogDir);
+    while (true) {
+      try {
+        if (!fs.exists(tlogDir)) {
+          boolean success = fs.mkdirs(tlogDir);
+          if (!success) {
+            throw new RuntimeException("Could not create directory:" + tlogDir);
+          }
+        } else {
+          fs.mkdirs(tlogDir); // To check for safe mode
         }
+        break;
+      } catch (RemoteException e) {
+        if (e.getClassName().equals(
+            "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+          log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+          try {
+            Thread.sleep(5000);
+          } catch (InterruptedException e1) {
+            Thread.interrupted();
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            "Problem creating directory: " + tlogDir, e);
+      } catch (IOException e) {
+        throw new RuntimeException("Problem creating directory: " + tlogDir, e);
       }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
     }
     
     tlogFiles = getLogList(fs, tlogDir);

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java Wed Jan 22 22:39:10 2014
@@ -4,12 +4,16 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.util.Locale;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.SolrTestCaseJ4;
-import org.junit.Assert;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -31,6 +35,8 @@ import org.junit.Assert;
 public class HdfsTestUtil {
   
   private static Locale savedLocale;
+  
+  private static Map<MiniDFSCluster,Timer> timers = new ConcurrentHashMap<MiniDFSCluster,Timer>();
 
   public static MiniDFSCluster setupClass(String dataDir) throws Exception {
     LuceneTestCase.assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs",
@@ -58,7 +64,22 @@ public class HdfsTestUtil {
     
     System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
     
-    MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
+    final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
+    dfsCluster.waitActive();
+    
+    NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
+    
+    int rnd = LuceneTestCase.random().nextInt(10000);
+    Timer timer = new Timer();
+    timer.schedule(new TimerTask() {
+      
+      @Override
+      public void run() {
+        NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
+      }
+    }, rnd);
+    
+    timers.put(dfsCluster, timer);
     
     SolrTestCaseJ4.useFactory("org.apache.solr.core.HdfsDirectoryFactory");
     
@@ -72,6 +93,7 @@ public class HdfsTestUtil {
     System.clearProperty("test.cache.data");
     System.clearProperty("solr.hdfs.home");
     if (dfsCluster != null) {
+      timers.remove(dfsCluster);
       dfsCluster.shutdown();
     }
     

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java Wed Jan 22 22:39:10 2014
@@ -23,11 +23,15 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
@@ -35,6 +39,7 @@ import org.apache.solr.client.solrj.Solr
 import org.apache.solr.client.solrj.impl.HttpSolrServer;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.cloud.BasicDistributedZkTest;
+import org.apache.solr.cloud.ChaosMonkey;
 import org.apache.solr.common.params.CollectionParams.CollectionAction;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -52,6 +57,9 @@ public class StressHdfsTest extends Basi
   private static final String DELETE_DATA_DIR_COLLECTION = "delete_data_dir";
   private static MiniDFSCluster dfsCluster;
   
+
+  private boolean testRestartIntoSafeMode;
+  
   @BeforeClass
   public static void setupClass() throws Exception {
 
@@ -67,7 +75,6 @@ public class StressHdfsTest extends Basi
     System.clearProperty("solr.hdfs.home");
     dfsCluster = null;
   }
-
   
   @Override
   protected String getDataDir(String dataDir) throws IOException {
@@ -78,6 +85,7 @@ public class StressHdfsTest extends Basi
     super();
     sliceCount = 1;
     shardCount = TEST_NIGHTLY ? 7 : random().nextInt(2) + 1;
+    testRestartIntoSafeMode = random().nextBoolean();
   }
   
   protected String getSolrXml() {
@@ -90,6 +98,31 @@ public class StressHdfsTest extends Basi
     for (int i = 0; i < cnt; i++) {
       createAndDeleteCollection();
     }
+
+    if (testRestartIntoSafeMode) {
+      createCollection(DELETE_DATA_DIR_COLLECTION, 1, 1, 1);
+      
+      waitForRecoveriesToFinish(DELETE_DATA_DIR_COLLECTION, false);
+      
+      ChaosMonkey.stop(jettys.get(0));
+      
+      // enter safe mode and restart a node
+      NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
+      
+      int rnd = LuceneTestCase.random().nextInt(10000);
+      Timer timer = new Timer();
+      timer.schedule(new TimerTask() {
+        
+        @Override
+        public void run() {
+          NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
+        }
+      }, rnd);
+      
+      ChaosMonkey.start(jettys.get(0));
+      
+      waitForRecoveriesToFinish(DELETE_DATA_DIR_COLLECTION, false);
+    }
   }
 
   private void createAndDeleteCollection() throws SolrServerException,

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java?rev=1560553&r1=1560552&r2=1560553&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java Wed Jan 22 22:39:10 2014
@@ -90,8 +90,8 @@ public class TestRecoveryHdfs extends So
       throw new RuntimeException(e);
     }
     
-    hdfsDataDir = hdfsUri + "/solr/shard1";
-    System.setProperty("solr.data.dir", hdfsUri + "/solr/shard1");
+    //hdfsDataDir = hdfsUri + "/solr/shard1";
+    // System.setProperty("solr.data.dir", hdfsUri + "/solr/shard1");
     System.setProperty("solr.ulog.dir", hdfsUri + "/solr/shard1");
     
     initCore("solrconfig-tlog.xml","schema15.xml");