You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2015/02/06 22:38:58 UTC
svn commit: r1657969 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/handler/ReplicationHandler.java
core/src/java/org/apache/solr/handler/SnapPuller.java
core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
Author: markrmiller
Date: Fri Feb 6 21:38:57 2015
New Revision: 1657969
URL: http://svn.apache.org/r1657969
Log:
SOLR-6920, SOLR-6640: A replicated index can end up corrupted when small files end up with the same file name and size.
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Feb 6 21:38:57 2015
@@ -616,6 +616,9 @@ Bug Fixes
* olap.* in AnalyticsComponent
(Alexandre Rafalovitch & hossman)
+* SOLR-6920: A replicated index can end up corrupted when small files end up with the same
+ file name and size. (Varun Thacker, Mark Miller)
+
Optimizations
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java Fri Feb 6 21:38:57 2015
@@ -33,7 +33,6 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -45,10 +44,13 @@ import java.util.zip.Checksum;
import java.util.zip.DeflaterOutputStream;
import org.apache.commons.io.IOUtils;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -425,27 +427,49 @@ public class ReplicationHandler extends
List<Map<String, Object>> result = new ArrayList<>();
Directory dir = null;
try {
- // get all the files in the commit
- // use a set to workaround possible Lucene bug which returns same file
- // name multiple times
- Collection<String> files = new HashSet<>(commit.getFileNames());
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
- try {
-
- for (String fileName : files) {
- if (fileName.endsWith(".lock")) continue;
+ SegmentInfos infos = SegmentInfos.readCommit(dir, commit.getSegmentsFileName());
+ for (SegmentCommitInfo commitInfo : infos) {
+ for (String file : commitInfo.files()) {
Map<String,Object> fileMeta = new HashMap<>();
- fileMeta.put(NAME, fileName);
- fileMeta.put(SIZE, dir.fileLength(fileName));
+ fileMeta.put(NAME, file);
+ fileMeta.put(SIZE, dir.fileLength(file));
+
+ try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) {
+ try {
+ long checksum = CodecUtil.retrieveChecksum(in);
+ fileMeta.put(CHECKSUM, checksum);
+ } catch(Exception e) {
+ LOG.warn("Could not read checksum from index file.", e);
+ }
+ }
+
result.add(fileMeta);
}
- } finally {
- core.getDirectoryFactory().release(dir);
}
+
+ // add the segments_N file
+ Map<String,Object> fileMeta = new HashMap<>();
+ fileMeta.put(NAME, infos.getSegmentsFileName());
+ fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName()));
+ if (infos.getId() != null) {
+ try (final IndexInput in = dir.openInput(infos.getSegmentsFileName(), IOContext.READONCE)) {
+ fileMeta.put(CHECKSUM, CodecUtil.retrieveChecksum(in));
+ }
+ }
+ result.add(fileMeta);
} catch (IOException e) {
rsp.add("status", "unable to get file names for given index generation");
rsp.add("exception", e);
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
+ } finally {
+ if (dir != null) {
+ try {
+ core.getDirectoryFactory().release(dir);
+ } catch (IOException e) {
+ SolrException.log(LOG, "Could not release directory after fetching file list", e);
+ }
+ }
}
rsp.add(CMD_GET_FILE_LIST, result);
if (confFileNameAlias.size() < 1 || core.getCoreDescriptor().getCoreContainer().isZooKeeperAware())
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Fri Feb 6 21:38:57 2015
@@ -16,43 +16,25 @@
*/
package org.apache.solr.handler;
-import org.apache.commons.io.IOUtils;
-import org.apache.http.client.HttpClient;
-import org.apache.lucene.index.IndexCommit;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.HttpClientUtil;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.common.util.FastInputStream;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.DirectoryFactory;
-import org.apache.solr.core.DirectoryFactory.DirContext;
-import org.apache.solr.core.IndexDeletionPolicyWrapper;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.ReplicationHandler.FileInfo;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.update.CommitUpdateCommand;
-import org.apache.solr.util.DefaultSolrThreadFactory;
-import org.apache.solr.util.FileUtils;
-import org.apache.solr.util.PropertiesInputStream;
-import org.apache.solr.util.PropertiesOutputStream;
-import org.apache.solr.util.RefCounted;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import static org.apache.solr.handler.ReplicationHandler.ALIAS;
+import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
+import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
+import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
+import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
+import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
+import static org.apache.solr.handler.ReplicationHandler.COMMAND;
+import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
+import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
+import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
+import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
+import static org.apache.solr.handler.ReplicationHandler.FILE;
+import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
+import static org.apache.solr.handler.ReplicationHandler.GENERATION;
+import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
+import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
+import static org.apache.solr.handler.ReplicationHandler.NAME;
+import static org.apache.solr.handler.ReplicationHandler.OFFSET;
+import static org.apache.solr.handler.ReplicationHandler.SIZE;
import java.io.File;
import java.io.FileNotFoundException;
@@ -94,25 +76,43 @@ import java.util.zip.Adler32;
import java.util.zip.Checksum;
import java.util.zip.InflaterInputStream;
-import static org.apache.solr.handler.ReplicationHandler.ALIAS;
-import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
-import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
-import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
-import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
-import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
-import static org.apache.solr.handler.ReplicationHandler.COMMAND;
-import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
-import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
-import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
-import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
-import static org.apache.solr.handler.ReplicationHandler.FILE;
-import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
-import static org.apache.solr.handler.ReplicationHandler.GENERATION;
-import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
-import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
-import static org.apache.solr.handler.ReplicationHandler.NAME;
-import static org.apache.solr.handler.ReplicationHandler.OFFSET;
-import static org.apache.solr.handler.ReplicationHandler.SIZE;
+import org.apache.commons.io.IOUtils;
+import org.apache.http.client.HttpClient;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpClientUtil;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.FastInputStream;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.DirectoryFactory;
+import org.apache.solr.core.DirectoryFactory.DirContext;
+import org.apache.solr.core.IndexDeletionPolicyWrapper;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ReplicationHandler.FileInfo;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.util.DefaultSolrThreadFactory;
+import org.apache.solr.util.FileUtils;
+import org.apache.solr.util.PropertiesInputStream;
+import org.apache.solr.util.PropertiesOutputStream;
+import org.apache.solr.util.RefCounted;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p/> Provides functionality of downloading changed index files as well as config files and a timer for scheduling fetches from the
@@ -445,8 +445,7 @@ public class SnapPuller {
+ isFullCopyNeeded);
successfulInstall = false;
- downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir,
- latestGeneration);
+ downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
LOG.info("Total time taken for download : "
+ ((System.currentTimeMillis() - replicationStartTime) / 1000)
+ " secs");
@@ -796,15 +795,17 @@ public class SnapPuller {
* @param indexDir the indexDir to be merged to
* @param latestGeneration the version number
*/
- private void downloadIndexFiles(boolean downloadCompleteIndex,
- Directory indexDir, Directory tmpIndexDir, long latestGeneration)
+ private void downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
}
for (Map<String,Object> file : filesToDownload) {
- if (!slowFileExists(indexDir, (String) file.get(NAME))
- || downloadCompleteIndex) {
+ String filename = (String) file.get(NAME);
+ CompareResult compareResult = compareFile(indexDir, filename, (Long) file.get(SIZE), (Long) file.get(CHECKSUM));
+ if (!compareResult.equal || downloadCompleteIndex
+ || (!compareResult.checkSummed && (filename.endsWith(".si") || filename.endsWith(".liv")
+ || filename.startsWith("segments_")))) {
dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
(String) file.get(NAME), false, latestGeneration);
currentFile = file;
@@ -817,6 +818,54 @@ public class SnapPuller {
}
}
+ static class CompareResult {
+ boolean equal = false;
+ boolean checkSummed = false;
+ }
+
+ private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
+ CompareResult compareResult = new CompareResult();
+ try {
+ try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
+ long indexFileLen = indexInput.length();
+ long indexFileChecksum = 0;
+
+ try {
+ indexFileChecksum = CodecUtil.retrieveChecksum(indexInput);
+ compareResult.checkSummed = true;
+ } catch (Exception e) {
+ LOG.warn("Could not retrieve checksum from file.", e);
+
+ if (indexFileLen == backupIndexFileLen) {
+ compareResult.equal = true;
+ return compareResult;
+ } else {
+ LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
+ "expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
+ backupIndexFileLen, indexFileLen);
+ compareResult.equal = false;
+ return compareResult;
+ }
+ }
+
+ if (indexFileLen == backupIndexFileLen && indexFileChecksum == backupIndexFileChecksum) {
+ compareResult.equal = true;
+ return compareResult;
+ } else {
+ LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
+ "expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
+ backupIndexFileLen, indexFileLen);
+ compareResult.equal = false;
+ return compareResult;
+ }
+ }
+ } catch (IOException e) {
+ LOG.error("Could not read file " + filename + ". Downloading it again", e);
+ compareResult.equal = false;
+ return compareResult;
+ }
+ }
+
/** Returns true if the file exists (can be opened), false
* if it cannot be opened, and (unlike Java's
* File.exists) throws IOException if there's some
@@ -839,13 +888,22 @@ public class SnapPuller {
*/
private boolean isIndexStale(Directory dir) throws IOException {
for (Map<String, Object> file : filesToDownload) {
- if (slowFileExists(dir, (String) file.get(NAME))
- && dir.fileLength((String) file.get(NAME)) != (Long) file.get(SIZE)) {
- LOG.warn("File " + file.get(NAME) + " expected to be " + file.get(SIZE)
- + " while it is " + dir.fileLength((String) file.get(NAME)));
- // file exists and size is different, therefore we must assume
- // corrupted index
- return true;
+ String filename = (String) file.get(NAME);
+ Long length = (Long) file.get(SIZE);
+ Long checksum = (Long) file.get(CHECKSUM);
+ if (slowFileExists(dir, filename)) {
+ if (checksum != null) {
+ if (!(compareFile(dir, filename, length, checksum).equal)) {
+ // file exists and size or checksum is different, therefore we must download it again
+ return true;
+ }
+ } else {
+ if (length != dir.fileLength(filename)) {
+ LOG.warn("File {} did not match. expected length is {} and actual length is {}",
+ filename, length, dir.fileLength(filename));
+ return true;
+ }
+ }
}
}
return false;
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java?rev=1657969&r1=1657968&r2=1657969&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsTestUtil.java Fri Feb 6 21:38:57 2015
@@ -138,7 +138,7 @@ public class HdfsTestUtil {
String dir = uri.toString()
+ "/"
+ new File(dataDir).toString().replaceAll(":", "_")
- .replaceAll("/", "_");
+ .replaceAll("/", "_").replaceAll(" ", "_");
return dir;
}