You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2018/03/15 14:02:35 UTC

lucene-solr:branch_7x: SOLR-11920: Differential file copy for IndexFetcher

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x cf8c9cabd -> 3dece5309


SOLR-11920: Differential file copy for IndexFetcher


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3dece530
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3dece530
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3dece530

Branch: refs/heads/branch_7x
Commit: 3dece53092b866259bd805d29e6c7e3964ed1d7b
Parents: cf8c9ca
Author: Ishan Chattopadhyaya <is...@apache.org>
Authored: Thu Mar 15 19:31:15 2018 +0530
Committer: Ishan Chattopadhyaya <is...@apache.org>
Committed: Thu Mar 15 19:32:19 2018 +0530

----------------------------------------------------------------------
 lucene/tools/junit4/solr-tests.policy           |  1 +
 solr/CHANGES.txt                                |  4 ++
 .../org/apache/solr/handler/IndexFetcher.java   | 52 ++++++++++++++------
 3 files changed, 42 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/lucene/tools/junit4/solr-tests.policy
----------------------------------------------------------------------
diff --git a/lucene/tools/junit4/solr-tests.policy b/lucene/tools/junit4/solr-tests.policy
index 9c8a39f..1c46a78 100644
--- a/lucene/tools/junit4/solr-tests.policy
+++ b/lucene/tools/junit4/solr-tests.policy
@@ -33,6 +33,7 @@ grant {
   permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete";
   permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete";
   permission java.io.FilePermission "${tests.linedocsfile}", "read";
+  permission java.nio.file.LinkPermission "hard";
   
   // all possibilities of accepting/binding connections on localhost with ports >=1024:
   permission java.net.SocketPermission "localhost:1024-", "accept,listen";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a581b39..57bcd3b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -42,6 +42,10 @@ Bug Fixes
 Optimizations
 ----------------------
 
+* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This
+  differential fetching now speeds up recovery times when full index replication is needed, but only
+  a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 7837bdc..2476a81 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FilterDirectory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -360,7 +362,7 @@ public class IndexFetcher {
     boolean successfulInstall = false;
     markReplicationStart();
     Directory tmpIndexDir = null;
-    String tmpIndex;
+    String tmpIndexDirPath;
     Directory indexDir = null;
     String indexDirPath;
     boolean deleteTmpIdxDir = true;
@@ -496,9 +498,9 @@ public class IndexFetcher {
 
       String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
       String tmpIdxDirName = "index." + timestamp;
-      tmpIndex = solrCore.getDataDir() + tmpIdxDirName;
+      tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName;
 
-      tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
+      tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
 
       // tmp dir for tlog files
       if (tlogFilesToDownload != null) {
@@ -511,8 +513,9 @@ public class IndexFetcher {
 
       try {
 
-        //We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
-        //in the metadata. If there is a mismatch for the same index file then we download the entire index again.
+        // We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
+        // in the metadata. If there is a mismatch for the same index file then we download the entire index
+        // (except when differential copy is applicable) again.
         if (!isFullCopyNeeded && isIndexStale(indexDir)) {
           isFullCopyNeeded = true;
         }
@@ -563,7 +566,8 @@ public class IndexFetcher {
           LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir);
           successfulInstall = false;
 
-          long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
+          long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir,
+              tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration);
           if (tlogFilesToDownload != null) {
             bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration);
             reloadCore = true; // reload update log
@@ -983,18 +987,26 @@ public class IndexFetcher {
    * Download the index files. If a new index is needed, download all the files.
    *
    * @param downloadCompleteIndex is it a fresh index copy
-   * @param tmpIndexDir              the directory to which files need to be downloadeed to
    * @param indexDir                 the indexDir to be merged to
+   * @param tmpIndexDir              the directory to which files need to be downloaded to
+   * @param indexDirPath             the path of indexDir
    * @param latestGeneration         the version number
    *
    * @return number of bytes downloaded
    */
-  private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
+  private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir,
+                                  String indexDirPath, String tmpIndexDirPath, long latestGeneration)
       throws Exception {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
     }
     long bytesDownloaded = 0;
+    long bytesSkippedCopying = 0;
+    boolean doDifferentialCopy = (indexDir instanceof FSDirectory ||
+        (indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory))
+        && (tmpIndexDir instanceof FSDirectory ||
+        (tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory));
+
     for (Map<String,Object> file : filesToDownload) {
       String filename = (String) file.get(NAME);
       long size = (Long) file.get(SIZE);
@@ -1002,17 +1014,27 @@ public class IndexFetcher {
       boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult);
       LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload);
       if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) {
-        dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
-            (String) file.get(NAME), FILE, latestGeneration);
-        currentFile = file;
-        dirFileFetcher.fetchFile();
-        bytesDownloaded += dirFileFetcher.getBytesDownloaded();
+        if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) {
+          File localFile = new File(indexDirPath, filename);
+          LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}",
+              localFile.getAbsolutePath(), file.get(CHECKSUM));
+          // A hard link here should survive the eventual directory move, and should be more space efficient as
+          // compared to a file copy. TODO: Maybe we could do a move safely here?
+          Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath());
+          bytesSkippedCopying += localFile.length();
+        } else {
+          dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
+              (String) file.get(NAME), FILE, latestGeneration);
+          currentFile = file;
+          dirFileFetcher.fetchFile();
+          bytesDownloaded += dirFileFetcher.getBytesDownloaded();
+        }
         filesDownloaded.add(new HashMap<>(file));
       } else {
-        LOG.info("Skipping download for " + file.get(NAME)
-            + " because it already exists");
+        LOG.info("Skipping download for {} because it already exists", file.get(NAME));
       }
     }
+    LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying);
     return bytesDownloaded;
   }