You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2015/01/29 18:33:29 UTC

svn commit: r1655761 - in /lucene/dev/branches/lucene_solr_5_0: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/update/ solr/core/src/java/org/apache/solr/util/ solr/core/src/test/org/apache/solr/cloud/hdfs/

Author: markrmiller
Date: Thu Jan 29 17:33:29 2015
New Revision: 1655761

URL: http://svn.apache.org/r1655761
Log:
SOLR-6969: When opening an HDFSTransactionLog for append we must first attempt to recover  it's lease to prevent data loss.

Added:
    lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/util/FSHDFSUtils.java
      - copied unchanged from r1655756, lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/util/FSHDFSUtils.java
Modified:
    lucene/dev/branches/lucene_solr_5_0/   (props changed)
    lucene/dev/branches/lucene_solr_5_0/solr/   (props changed)
    lucene/dev/branches/lucene_solr_5_0/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_5_0/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_5_0/solr/core/ivy.xml
    lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsTransactionLog.java
    lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
    lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsCollectionsAPIDistributedZkTest.java
    lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java

Modified: lucene/dev/branches/lucene_solr_5_0/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/CHANGES.txt?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/CHANGES.txt Thu Jan 29 17:33:29 2015
@@ -471,6 +471,9 @@ Bug Fixes
 * SOLR-7016: Fix bin\solr.cmd to work in a directory with spaces in the name.
   (Timothy Potter, Uwe Schindler)
 
+* SOLR-6969: When opening an HDFSTransactionLog for append we must first attempt to recover
+  it's lease to prevent data loss. (Mark Miller, Praneeth Varma, Colin McCabe)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/branches/lucene_solr_5_0/solr/core/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/core/ivy.xml?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/core/ivy.xml (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/core/ivy.xml Thu Jan 29 17:33:29 2015
@@ -63,6 +63,8 @@
     <dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${/org.apache.hadoop/hadoop-annotations}" conf="compile.hadoop"/>
     <dependency org="org.apache.hadoop" name="hadoop-auth" rev="${/org.apache.hadoop/hadoop-auth}" conf="compile.hadoop"/>
     <dependency org="commons-configuration" name="commons-configuration" rev="${/commons-configuration/commons-configuration}" conf="compile.hadoop"/>
+    <dependency org="commons-collections" name="commons-collections" rev="${/commons-collections/commons-collections}" conf="compile.hadoop"/>
+    
     <dependency org="com.google.protobuf" name="protobuf-java" rev="${/com.google.protobuf/protobuf-java}" conf="compile.hadoop"/>
     <dependency org="com.googlecode.concurrentlinkedhashmap" name="concurrentlinkedhashmap-lru" rev="${/com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru}" conf="compile.hadoop"/>
 

Modified: lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsTransactionLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsTransactionLog.java?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsTransactionLog.java (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsTransactionLog.java Thu Jan 29 17:33:29 2015
@@ -34,6 +34,7 @@ import org.apache.solr.common.util.FastI
 import org.apache.solr.common.util.FastOutputStream;
 import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.common.util.ObjectReleaseTracker;
+import org.apache.solr.util.FSHDFSUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,8 +80,9 @@ public class HdfsTransactionLog extends
       }
       this.tlogFile = tlogFile;
       
-      // TODO: look into forcefully taking over any lease
       if (fs.exists(tlogFile) && openExisting) {
+        FSHDFSUtils.recoverFileLease(fs, tlogFile, fs.getConf());
+        
         tlogOutStream = fs.append(tlogFile);
       } else {
         fs.delete(tlogFile, false);

Modified: lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/core/src/java/org/apache/solr/update/HdfsUpdateLog.java Thu Jan 29 17:33:29 2015
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -46,6 +47,9 @@ public class HdfsUpdateLog extends Updat
   private volatile FileSystem fs;
   private volatile Path tlogDir;
   private final String confDir;
+  
+  // used internally by tests to track total count of failed tran log loads in init
+  public static AtomicLong INIT_FAILED_LOGS_COUNT = new AtomicLong();
 
   public HdfsUpdateLog() {
     this.confDir = null;
@@ -191,6 +195,7 @@ public class HdfsUpdateLog extends Updat
         addOldLog(oldLog, false); // don't remove old logs on startup since more
                                   // than one may be uncapped.
       } catch (Exception e) {
+        INIT_FAILED_LOGS_COUNT.incrementAndGet();
         SolrException.log(log, "Failure to open existing log file (non fatal) "
             + f, e);
         try {

Modified: lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsCollectionsAPIDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsCollectionsAPIDistributedZkTest.java?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsCollectionsAPIDistributedZkTest.java (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsCollectionsAPIDistributedZkTest.java Thu Jan 29 17:33:29 2015
@@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.cloud.CollectionsAPIDistributedZkTest;
+import org.apache.solr.update.HdfsUpdateLog;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -46,6 +47,7 @@ public class HdfsCollectionsAPIDistribut
   
   @AfterClass
   public static void teardownClass() throws Exception {
+    assertEquals(0, HdfsUpdateLog.INIT_FAILED_LOGS_COUNT.get());
     HdfsTestUtil.teardownClass(dfsCluster);
     System.clearProperty("solr.hdfs.home");
     System.clearProperty("solr.hdfs.blockcache.enabled");

Modified: lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java?rev=1655761&r1=1655760&r2=1655761&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java (original)
+++ lucene/dev/branches/lucene_solr_5_0/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java Thu Jan 29 17:33:29 2015
@@ -10,10 +10,14 @@ import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.IOUtils;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -38,6 +42,8 @@ public class HdfsTestUtil {
   
   private static Map<MiniDFSCluster,Timer> timers = new ConcurrentHashMap<>();
 
+  private static FSDataOutputStream badTlogOutStream;
+
   public static MiniDFSCluster setupClass(String dir) throws Exception {
     return setupClass(dir, true);
   }
@@ -69,10 +75,11 @@ public class HdfsTestUtil {
     
     final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
     dfsCluster.waitActive();
-
+    
     System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
     
-    if (safeModeTesting) {
+    int rndMode = LuceneTestCase.random().nextInt(10);
+    if (safeModeTesting && rndMode > 4) {
       NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
       
       int rnd = LuceneTestCase.random().nextInt(10000);
@@ -86,6 +93,13 @@ public class HdfsTestUtil {
       }, rnd);
       
       timers.put(dfsCluster, timer);
+    } else {
+      // force a lease recovery by creating a tlog file and not closing it
+      URI uri = dfsCluster.getURI();
+      Path hdfsDirPath = new Path(uri.toString() + "/solr/collection1/core_node1/data/tlog/tlog.0000000000000000000");
+      // tran log already being created testing
+      FileSystem fs = FileSystem.newInstance(hdfsDirPath.toUri(), conf);
+      badTlogOutStream = fs.create(hdfsDirPath);
     }
     
     SolrTestCaseJ4.useFactory("org.apache.solr.core.HdfsDirectoryFactory");
@@ -105,6 +119,10 @@ public class HdfsTestUtil {
       dfsCluster.shutdown();
     }
     
+    if (badTlogOutStream != null) {
+      IOUtils.closeQuietly(badTlogOutStream);
+    }
+    
     // TODO: we HACK around HADOOP-9643
     if (savedLocale != null) {
       Locale.setDefault(savedLocale);