You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2018/03/15 14:02:35 UTC
lucene-solr:branch_7x: SOLR-11920: Differential file copy for
IndexFetcher
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x cf8c9cabd -> 3dece5309
SOLR-11920: Differential file copy for IndexFetcher
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3dece530
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3dece530
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3dece530
Branch: refs/heads/branch_7x
Commit: 3dece53092b866259bd805d29e6c7e3964ed1d7b
Parents: cf8c9ca
Author: Ishan Chattopadhyaya <is...@apache.org>
Authored: Thu Mar 15 19:31:15 2018 +0530
Committer: Ishan Chattopadhyaya <is...@apache.org>
Committed: Thu Mar 15 19:32:19 2018 +0530
----------------------------------------------------------------------
lucene/tools/junit4/solr-tests.policy | 1 +
solr/CHANGES.txt | 4 ++
.../org/apache/solr/handler/IndexFetcher.java | 52 ++++++++++++++------
3 files changed, 42 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/lucene/tools/junit4/solr-tests.policy
----------------------------------------------------------------------
diff --git a/lucene/tools/junit4/solr-tests.policy b/lucene/tools/junit4/solr-tests.policy
index 9c8a39f..1c46a78 100644
--- a/lucene/tools/junit4/solr-tests.policy
+++ b/lucene/tools/junit4/solr-tests.policy
@@ -33,6 +33,7 @@ grant {
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete";
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete";
permission java.io.FilePermission "${tests.linedocsfile}", "read";
+ permission java.nio.file.LinkPermission "hard";
// all possibilities of accepting/binding connections on localhost with ports >=1024:
permission java.net.SocketPermission "localhost:1024-", "accept,listen";
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a581b39..57bcd3b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -42,6 +42,10 @@ Bug Fixes
Optimizations
----------------------
+* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This
+ differential fetching now speeds up recovery times when full index replication is needed, but only
+ a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 7837bdc..2476a81 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -360,7 +362,7 @@ public class IndexFetcher {
boolean successfulInstall = false;
markReplicationStart();
Directory tmpIndexDir = null;
- String tmpIndex;
+ String tmpIndexDirPath;
Directory indexDir = null;
String indexDirPath;
boolean deleteTmpIdxDir = true;
@@ -496,9 +498,9 @@ public class IndexFetcher {
String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
String tmpIdxDirName = "index." + timestamp;
- tmpIndex = solrCore.getDataDir() + tmpIdxDirName;
+ tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName;
- tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
+ tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
// tmp dir for tlog files
if (tlogFilesToDownload != null) {
@@ -511,8 +513,9 @@ public class IndexFetcher {
try {
- //We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
- //in the metadata. If there is a mismatch for the same index file then we download the entire index again.
+ // We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
+ // in the metadata. If there is a mismatch for the same index file then we download the entire index
+ // (except when differential copy is applicable) again.
if (!isFullCopyNeeded && isIndexStale(indexDir)) {
isFullCopyNeeded = true;
}
@@ -563,7 +566,8 @@ public class IndexFetcher {
LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir);
successfulInstall = false;
- long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
+ long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir,
+ tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration);
if (tlogFilesToDownload != null) {
bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration);
reloadCore = true; // reload update log
@@ -983,18 +987,26 @@ public class IndexFetcher {
* Download the index files. If a new index is needed, download all the files.
*
* @param downloadCompleteIndex is it a fresh index copy
- * @param tmpIndexDir the directory to which files need to be downloadeed to
* @param indexDir the indexDir to be merged to
+ * @param tmpIndexDir the directory to which files need to be downloaded to
+ * @param indexDirPath the path of indexDir
* @param latestGeneration the version number
*
* @return number of bytes downloaded
*/
- private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
+ private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir,
+ String indexDirPath, String tmpIndexDirPath, long latestGeneration)
throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
}
long bytesDownloaded = 0;
+ long bytesSkippedCopying = 0;
+ boolean doDifferentialCopy = (indexDir instanceof FSDirectory ||
+ (indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory))
+ && (tmpIndexDir instanceof FSDirectory ||
+ (tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory));
+
for (Map<String,Object> file : filesToDownload) {
String filename = (String) file.get(NAME);
long size = (Long) file.get(SIZE);
@@ -1002,17 +1014,27 @@ public class IndexFetcher {
boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult);
LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload);
if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) {
- dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
- (String) file.get(NAME), FILE, latestGeneration);
- currentFile = file;
- dirFileFetcher.fetchFile();
- bytesDownloaded += dirFileFetcher.getBytesDownloaded();
+ if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) {
+ File localFile = new File(indexDirPath, filename);
+ LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}",
+ localFile.getAbsolutePath(), file.get(CHECKSUM));
+ // A hard link here should survive the eventual directory move, and should be more space efficient as
+ // compared to a file copy. TODO: Maybe we could do a move safely here?
+ Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath());
+ bytesSkippedCopying += localFile.length();
+ } else {
+ dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
+ (String) file.get(NAME), FILE, latestGeneration);
+ currentFile = file;
+ dirFileFetcher.fetchFile();
+ bytesDownloaded += dirFileFetcher.getBytesDownloaded();
+ }
filesDownloaded.add(new HashMap<>(file));
} else {
- LOG.info("Skipping download for " + file.get(NAME)
- + " because it already exists");
+ LOG.info("Skipping download for {} because it already exists", file.get(NAME));
}
}
+ LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying);
return bytesDownloaded;
}