You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2015/02/06 22:38:58 UTC

svn commit: r1657969 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/handler/ReplicationHandler.java core/src/java/org/apache/solr/handler/SnapPuller.java core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java

Author: markrmiller
Date: Fri Feb  6 21:38:57 2015
New Revision: 1657969

URL: http://svn.apache.org/r1657969
Log:
SOLR-6920, SOLR-6640: A replicated index can end up corrupted when small files end up with the same file name and size.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Feb  6 21:38:57 2015
@@ -616,6 +616,9 @@ Bug Fixes
     * olap.* in AnalyticsComponent
   (Alexandre Rafalovitch & hossman)
 
+* SOLR-6920: A replicated index can end up corrupted when small files end up with the same 
+  file name and size. (Varun Thacker, Mark Miller)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java Fri Feb  6 21:38:57 2015
@@ -33,7 +33,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -45,10 +44,13 @@ import java.util.zip.Checksum;
 import java.util.zip.DeflaterOutputStream;
 
 import org.apache.commons.io.IOUtils;
+import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.IndexDeletionPolicy;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
@@ -425,27 +427,49 @@ public class ReplicationHandler extends
     List<Map<String, Object>> result = new ArrayList<>();
     Directory dir = null;
     try {
-      // get all the files in the commit
-      // use a set to workaround possible Lucene bug which returns same file
-      // name multiple times
-      Collection<String> files = new HashSet<>(commit.getFileNames());
       dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
-      try {
-        
-        for (String fileName : files) {
-          if (fileName.endsWith(".lock")) continue;
+      SegmentInfos infos = SegmentInfos.readCommit(dir, commit.getSegmentsFileName());
+      for (SegmentCommitInfo commitInfo : infos) {
+        for (String file : commitInfo.files()) {
           Map<String,Object> fileMeta = new HashMap<>();
-          fileMeta.put(NAME, fileName);
-          fileMeta.put(SIZE, dir.fileLength(fileName));
+          fileMeta.put(NAME, file);
+          fileMeta.put(SIZE, dir.fileLength(file));
+          
+          try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) {
+            try {
+              long checksum = CodecUtil.retrieveChecksum(in);
+              fileMeta.put(CHECKSUM, checksum);
+            } catch(Exception e) {
+              LOG.warn("Could not read checksum from index file.", e);
+            }
+          }
+          
           result.add(fileMeta);
         }
-      } finally {
-        core.getDirectoryFactory().release(dir);
       }
+
+      // add the segments_N file
+      Map<String,Object> fileMeta = new HashMap<>();
+      fileMeta.put(NAME, infos.getSegmentsFileName());
+      fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName()));
+      if (infos.getId() != null) {
+        try (final IndexInput in = dir.openInput(infos.getSegmentsFileName(), IOContext.READONCE)) {
+          fileMeta.put(CHECKSUM, CodecUtil.retrieveChecksum(in));
+        }
+      }
+      result.add(fileMeta);
     } catch (IOException e) {
       rsp.add("status", "unable to get file names for given index generation");
       rsp.add("exception", e);
       LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
+    } finally {
+      if (dir != null) {
+        try {
+          core.getDirectoryFactory().release(dir);
+        } catch (IOException e) {
+          SolrException.log(LOG, "Could not release directory after fetching file list", e);
+        }
+      }
     }
     rsp.add(CMD_GET_FILE_LIST, result);
     if (confFileNameAlias.size() < 1 || core.getCoreDescriptor().getCoreContainer().isZooKeeperAware())

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Fri Feb  6 21:38:57 2015
@@ -16,43 +16,25 @@
  */
 package org.apache.solr.handler;
 
-import org.apache.commons.io.IOUtils;
-import org.apache.http.client.HttpClient;
-import org.apache.lucene.index.IndexCommit;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.HttpClientUtil;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.common.util.FastInputStream;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.DirectoryFactory;
-import org.apache.solr.core.DirectoryFactory.DirContext;
-import org.apache.solr.core.IndexDeletionPolicyWrapper;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.ReplicationHandler.FileInfo;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.update.CommitUpdateCommand;
-import org.apache.solr.util.DefaultSolrThreadFactory;
-import org.apache.solr.util.FileUtils;
-import org.apache.solr.util.PropertiesInputStream;
-import org.apache.solr.util.PropertiesOutputStream;
-import org.apache.solr.util.RefCounted;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import static org.apache.solr.handler.ReplicationHandler.ALIAS;
+import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
+import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
+import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
+import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
+import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
+import static org.apache.solr.handler.ReplicationHandler.COMMAND;
+import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
+import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
+import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
+import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
+import static org.apache.solr.handler.ReplicationHandler.FILE;
+import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
+import static org.apache.solr.handler.ReplicationHandler.GENERATION;
+import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
+import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
+import static org.apache.solr.handler.ReplicationHandler.NAME;
+import static org.apache.solr.handler.ReplicationHandler.OFFSET;
+import static org.apache.solr.handler.ReplicationHandler.SIZE;
 
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -94,25 +76,43 @@ import java.util.zip.Adler32;
 import java.util.zip.Checksum;
 import java.util.zip.InflaterInputStream;
 
-import static org.apache.solr.handler.ReplicationHandler.ALIAS;
-import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
-import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
-import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
-import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
-import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
-import static org.apache.solr.handler.ReplicationHandler.COMMAND;
-import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
-import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
-import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
-import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
-import static org.apache.solr.handler.ReplicationHandler.FILE;
-import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
-import static org.apache.solr.handler.ReplicationHandler.GENERATION;
-import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
-import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
-import static org.apache.solr.handler.ReplicationHandler.NAME;
-import static org.apache.solr.handler.ReplicationHandler.OFFSET;
-import static org.apache.solr.handler.ReplicationHandler.SIZE;
+import org.apache.commons.io.IOUtils;
+import org.apache.http.client.HttpClient;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpClientUtil;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.FastInputStream;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.DirectoryFactory;
+import org.apache.solr.core.DirectoryFactory.DirContext;
+import org.apache.solr.core.IndexDeletionPolicyWrapper;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ReplicationHandler.FileInfo;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.util.DefaultSolrThreadFactory;
+import org.apache.solr.util.FileUtils;
+import org.apache.solr.util.PropertiesInputStream;
+import org.apache.solr.util.PropertiesOutputStream;
+import org.apache.solr.util.RefCounted;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p/> Provides functionality of downloading changed index files as well as config files and a timer for scheduling fetches from the
@@ -445,8 +445,7 @@ public class SnapPuller {
               + isFullCopyNeeded);
           successfulInstall = false;
           
-          downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir,
-              latestGeneration);
+          downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
           LOG.info("Total time taken for download : "
               + ((System.currentTimeMillis() - replicationStartTime) / 1000)
               + " secs");
@@ -796,15 +795,17 @@ public class SnapPuller {
    * @param indexDir                 the indexDir to be merged to
    * @param latestGeneration         the version number
    */
-  private void downloadIndexFiles(boolean downloadCompleteIndex,
-      Directory indexDir, Directory tmpIndexDir, long latestGeneration)
+  private void downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
       throws Exception {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
     }
     for (Map<String,Object> file : filesToDownload) {
-      if (!slowFileExists(indexDir, (String) file.get(NAME))
-          || downloadCompleteIndex) {
+      String filename = (String) file.get(NAME);
+      CompareResult compareResult = compareFile(indexDir, filename, (Long) file.get(SIZE), (Long) file.get(CHECKSUM));
+      if (!compareResult.equal || downloadCompleteIndex
+          || (!compareResult.checkSummed && (filename.endsWith(".si") || filename.endsWith(".liv")
+          || filename.startsWith("segments_")))) {
         dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
             (String) file.get(NAME), false, latestGeneration);
         currentFile = file;
@@ -817,6 +818,54 @@ public class SnapPuller {
     }
   }
 
+  static class CompareResult {
+    boolean equal = false;
+    boolean checkSummed = false;
+  }
+  
+  private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
+    CompareResult compareResult = new CompareResult();
+    try {
+      try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
+        long indexFileLen = indexInput.length();
+        long indexFileChecksum = 0;
+        
+        try {
+          indexFileChecksum = CodecUtil.retrieveChecksum(indexInput);
+          compareResult.checkSummed = true;
+        } catch (Exception e) {
+          LOG.warn("Could not retrieve checksum from file.", e);
+          
+          if (indexFileLen == backupIndexFileLen) {
+            compareResult.equal = true;
+            return compareResult;
+          } else {
+            LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
+                "expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
+                backupIndexFileLen, indexFileLen);
+            compareResult.equal = false;
+            return compareResult;
+          }
+        }
+        
+        if (indexFileLen == backupIndexFileLen && indexFileChecksum == backupIndexFileChecksum) {
+          compareResult.equal = true;
+          return compareResult;
+        } else {
+          LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
+              "expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
+              backupIndexFileLen, indexFileLen);
+          compareResult.equal = false;
+          return compareResult;
+        }
+      }
+    } catch (IOException e) {
+      LOG.error("Could not read file " + filename + ". Downloading it again", e);
+      compareResult.equal = false;
+      return compareResult;
+    }
+  }
+
   /** Returns true if the file exists (can be opened), false
    *  if it cannot be opened, and (unlike Java's
    *  File.exists) throws IOException if there's some
@@ -839,13 +888,22 @@ public class SnapPuller {
    */
   private boolean isIndexStale(Directory dir) throws IOException {
     for (Map<String, Object> file : filesToDownload) {
-      if (slowFileExists(dir, (String) file.get(NAME))
-              && dir.fileLength((String) file.get(NAME)) != (Long) file.get(SIZE)) {
-        LOG.warn("File " + file.get(NAME) + " expected to be " + file.get(SIZE)
-            + " while it is " + dir.fileLength((String) file.get(NAME)));
-        // file exists and size is different, therefore we must assume
-        // corrupted index
-        return true;
+      String filename = (String) file.get(NAME);
+      Long length = (Long) file.get(SIZE);
+      Long checksum = (Long) file.get(CHECKSUM);
+      if (slowFileExists(dir, filename)) {
+        if (checksum != null) {
+          if (!(compareFile(dir, filename, length, checksum).equal)) {
+            // file exists and size or checksum is different, therefore we must download it again
+            return true;
+          }
+        } else {
+          if (length != dir.fileLength(filename)) {
+            LOG.warn("File {} did not match. expected length is {} and actual length is {}",
+                filename, length, dir.fileLength(filename));
+            return true;
+          }
+        }
       }
     }
     return false;

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java Fri Feb  6 21:38:57 2015
@@ -138,7 +138,7 @@ public class HdfsTestUtil {
     String dir = uri.toString()
         + "/"
         + new File(dataDir).toString().replaceAll(":", "_")
-            .replaceAll("/", "_");
+            .replaceAll("/", "_").replaceAll(" ", "_");
     return dir;
   }