You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2014/01/22 23:40:32 UTC
svn commit: r1560554 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/store/hdfs/
solr/core/src/java/org/apache/solr/update/
solr/core/src/test/org/apache/solr/cloud/hdfs/
solr/core/src/test/org/apache/solr/se...
Author: markrmiller
Date: Wed Jan 22 22:40:32 2014
New Revision: 1560554
URL: http://svn.apache.org/r1560554
Log:
SOLR-5657: When a SolrCore starts on HDFS, it should gracefully handle HDFS being in safe mode.
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Wed Jan 22 22:40:32 2014
@@ -188,6 +188,9 @@ Bug Fixes
* SOLR-5650: When a replica becomes a leader, only peer sync with other replicas
that last published an ACTIVE state. (Mark Miller)
+* SOLR-5657: When a SolrCore starts on HDFS, it should gracefully handle HDFS
+ being in safe mode. (Mark Miller)
+
Optimizations
----------------------
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java Wed Jan 22 22:40:32 2014
@@ -28,9 +28,9 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.lucene.store.BaseDirectory;
import org.apache.lucene.store.BufferedIndexOutput;
-import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -58,14 +58,37 @@ public class HdfsDirectory extends BaseD
this.hdfsDirPath = hdfsDirPath;
this.configuration = configuration;
fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), configuration);
- try {
- if (!fileSystem.exists(hdfsDirPath)) {
- fileSystem.mkdirs(hdfsDirPath);
+
+ while (true) {
+ try {
+ if (!fileSystem.exists(hdfsDirPath)) {
+ boolean success = fileSystem.mkdirs(hdfsDirPath);
+ if (!success) {
+ throw new RuntimeException("Could not create directory: " + hdfsDirPath);
+ }
+ } else {
+ fileSystem.mkdirs(hdfsDirPath); // check for safe mode
+ }
+
+ break;
+ } catch (RemoteException e) {
+ if (e.getClassName().equals("org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+ LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e1) {
+ Thread.interrupted();
+ }
+ continue;
+ }
+ org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
+ throw new RuntimeException(
+ "Problem creating directory: " + hdfsDirPath, e);
+ } catch (Exception e) {
+ org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
+ throw new RuntimeException(
+ "Problem creating directory: " + hdfsDirPath, e);
}
- } catch (Exception e) {
- org.apache.solr.util.IOUtils.closeQuietly(fileSystem);
- throw new RuntimeException("Problem creating directory: " + hdfsDirPath,
- e);
}
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/store/hdfs/HdfsLockFactory.java Wed Jan 22 22:40:32 2014
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.LockReleaseFailedException;
@@ -59,16 +60,31 @@ public class HdfsLockFactory extends Loc
FileSystem fs = null;
try {
fs = FileSystem.newInstance(lockPath.toUri(), configuration);
-
- if (fs.exists(lockPath)) {
- if (lockPrefix != null) {
- lockName = lockPrefix + "-" + lockName;
- }
-
- Path lockFile = new Path(lockPath, lockName);
-
- if (fs.exists(lockFile) && !fs.delete(lockFile, false)) {
- throw new IOException("Cannot delete " + lockFile);
+ while (true) {
+ if (fs.exists(lockPath)) {
+ if (lockPrefix != null) {
+ lockName = lockPrefix + "-" + lockName;
+ }
+
+ Path lockFile = new Path(lockPath, lockName);
+ try {
+ if (fs.exists(lockFile) && !fs.delete(lockFile, false)) {
+ throw new IOException("Cannot delete " + lockFile);
+ }
+ } catch (RemoteException e) {
+ if (e.getClassName().equals(
+ "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+ log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e1) {
+ Thread.interrupted();
+ }
+ continue;
+ }
+ throw e;
+ }
+ break;
}
}
} finally {
@@ -99,20 +115,46 @@ public class HdfsLockFactory extends Loc
@Override
public boolean obtain() throws IOException {
FSDataOutputStream file = null;
- FileSystem fs = null;
+ FileSystem fs = FileSystem.newInstance(lockPath.toUri(), conf);
try {
- fs = FileSystem.newInstance(lockPath.toUri(), conf);
- if (!fs.exists(lockPath)) {
- fs.mkdirs(lockPath);
+ while (true) {
+ try {
+ if (!fs.exists(lockPath)) {
+ boolean success = fs.mkdirs(lockPath);
+ if (!success) {
+ throw new RuntimeException("Could not create directory: " + lockPath);
+ }
+ } else {
+ // just to check for safe mode
+ fs.mkdirs(lockPath);
+ }
+
+
+ file = fs.create(new Path(lockPath, lockName), false);
+ break;
+ } catch (FileAlreadyExistsException e) {
+ return false;
+ } catch (RemoteException e) {
+ if (e.getClassName().equals(
+ "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+ log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e1) {
+ Thread.interrupted();
+ }
+ continue;
+ }
+ log.error("Error creating lock file", e);
+ return false;
+ } catch (IOException e) {
+ log.error("Error creating lock file", e);
+ return false;
+ } finally {
+ IOUtils.closeQuietly(file);
+ }
}
- file = fs.create(new Path(lockPath, lockName), false);
- } catch (FileAlreadyExistsException e) {
- return false;
- }catch (IOException e) {
- log.error("Error creating lock file", e);
- return false;
} finally {
- IOUtils.closeQuietly(file);
IOUtils.closeQuietly(fs);
}
return true;
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java Wed Jan 22 22:40:32 2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@@ -142,16 +143,33 @@ public class HdfsUpdateLog extends Updat
}
lastDataDir = dataDir;
tlogDir = new Path(dataDir, TLOG_NAME);
-
- try {
- if (!fs.exists(tlogDir)) {
- boolean success = fs.mkdirs(tlogDir);
- if (!success) {
- throw new RuntimeException("Could not create directory:" + tlogDir);
+ while (true) {
+ try {
+ if (!fs.exists(tlogDir)) {
+ boolean success = fs.mkdirs(tlogDir);
+ if (!success) {
+ throw new RuntimeException("Could not create directory:" + tlogDir);
+ }
+ } else {
+ fs.mkdirs(tlogDir); // To check for safe mode
}
+ break;
+ } catch (RemoteException e) {
+ if (e.getClassName().equals(
+ "org.apache.hadoop.hdfs.server.namenode.SafeModeException")) {
+ log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e1) {
+ Thread.interrupted();
+ }
+ continue;
+ }
+ throw new RuntimeException(
+ "Problem creating directory: " + tlogDir, e);
+ } catch (IOException e) {
+ throw new RuntimeException("Problem creating directory: " + tlogDir, e);
}
- } catch (IOException e) {
- throw new RuntimeException(e);
}
tlogFiles = getLogList(fs, tlogDir);
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java Wed Jan 22 22:40:32 2014
@@ -4,12 +4,16 @@ import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Locale;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
-import org.junit.Assert;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -31,6 +35,8 @@ import org.junit.Assert;
public class HdfsTestUtil {
private static Locale savedLocale;
+
+ private static Map<MiniDFSCluster,Timer> timers = new ConcurrentHashMap<MiniDFSCluster,Timer>();
public static MiniDFSCluster setupClass(String dataDir) throws Exception {
LuceneTestCase.assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs",
@@ -58,7 +64,22 @@ public class HdfsTestUtil {
System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
- MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
+ final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
+ dfsCluster.waitActive();
+
+ NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
+
+ int rnd = LuceneTestCase.random().nextInt(10000);
+ Timer timer = new Timer();
+ timer.schedule(new TimerTask() {
+
+ @Override
+ public void run() {
+ NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
+ }
+ }, rnd);
+
+ timers.put(dfsCluster, timer);
SolrTestCaseJ4.useFactory("org.apache.solr.core.HdfsDirectoryFactory");
@@ -72,6 +93,7 @@ public class HdfsTestUtil {
System.clearProperty("test.cache.data");
System.clearProperty("solr.hdfs.home");
if (dfsCluster != null) {
+ timers.remove(dfsCluster);
dfsCluster.shutdown();
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/hdfs/StressHdfsTest.java Wed Jan 22 22:40:32 2014
@@ -23,11 +23,15 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
@@ -35,6 +39,7 @@ import org.apache.solr.client.solrj.Solr
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.cloud.BasicDistributedZkTest;
+import org.apache.solr.cloud.ChaosMonkey;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
@@ -52,6 +57,9 @@ public class StressHdfsTest extends Basi
private static final String DELETE_DATA_DIR_COLLECTION = "delete_data_dir";
private static MiniDFSCluster dfsCluster;
+
+ private boolean testRestartIntoSafeMode;
+
@BeforeClass
public static void setupClass() throws Exception {
@@ -67,7 +75,6 @@ public class StressHdfsTest extends Basi
System.clearProperty("solr.hdfs.home");
dfsCluster = null;
}
-
@Override
protected String getDataDir(String dataDir) throws IOException {
@@ -78,6 +85,7 @@ public class StressHdfsTest extends Basi
super();
sliceCount = 1;
shardCount = TEST_NIGHTLY ? 7 : random().nextInt(2) + 1;
+ testRestartIntoSafeMode = random().nextBoolean();
}
protected String getSolrXml() {
@@ -90,6 +98,31 @@ public class StressHdfsTest extends Basi
for (int i = 0; i < cnt; i++) {
createAndDeleteCollection();
}
+
+ if (testRestartIntoSafeMode) {
+ createCollection(DELETE_DATA_DIR_COLLECTION, 1, 1, 1);
+
+ waitForRecoveriesToFinish(DELETE_DATA_DIR_COLLECTION, false);
+
+ ChaosMonkey.stop(jettys.get(0));
+
+ // enter safe mode and restart a node
+ NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
+
+ int rnd = LuceneTestCase.random().nextInt(10000);
+ Timer timer = new Timer();
+ timer.schedule(new TimerTask() {
+
+ @Override
+ public void run() {
+ NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
+ }
+ }, rnd);
+
+ ChaosMonkey.start(jettys.get(0));
+
+ waitForRecoveriesToFinish(DELETE_DATA_DIR_COLLECTION, false);
+ }
}
private void createAndDeleteCollection() throws SolrServerException,
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java?rev=1560554&r1=1560553&r2=1560554&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java Wed Jan 22 22:40:32 2014
@@ -90,8 +90,8 @@ public class TestRecoveryHdfs extends So
throw new RuntimeException(e);
}
- hdfsDataDir = hdfsUri + "/solr/shard1";
- System.setProperty("solr.data.dir", hdfsUri + "/solr/shard1");
+ //hdfsDataDir = hdfsUri + "/solr/shard1";
+ // System.setProperty("solr.data.dir", hdfsUri + "/solr/shard1");
System.setProperty("solr.ulog.dir", hdfsUri + "/solr/shard1");
initCore("solrconfig-tlog.xml","schema15.xml");