You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2019/06/11 15:28:01 UTC

[lucene-solr] branch master updated: LUCENE-8835: Respect file extension when listing files form FileSwitchDirectory (#700)

This is an automated email from the ASF dual-hosted git repository.

simonw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new b6c68cc  LUCENE-8835: Respect file extension when listing files form FileSwitchDirectory (#700)
b6c68cc is described below

commit b6c68ccdedc7bedb73d90c7e21b285b4e71f7ff4
Author: Simon Willnauer <si...@apache.org>
AuthorDate: Tue Jun 11 17:27:55 2019 +0200

    LUCENE-8835: Respect file extension when listing files form FileSwitchDirectory (#700)
    
    FileSwitchDirectory splits file actions between 2 directories based
    on file extensions. The extensions are respected on write operations
    like delete or create but ignored when we list the content of the
    directories. Until now we only deduplicated the contents on
    Directory#listAll which can cause inconsistencies and hard to debug
    errors due to double deletions in IndexWriter is a file is pending
    delete in one of the directories but still shows up in the directory
    listing form the other directory. This case can happen if both
    directories point to the same underlying FS directory which is a
    common use-case to split between mmap and NIOFS.
    
    This change filters out files from directories depending on their
    file extension to make sure files that are deleted in one directory
    are not returned form another if they point to the same FS directory.
---
 lucene/CHANGES.txt                                 |  4 +++
 .../apache/lucene/store/FileSwitchDirectory.java   | 16 +++++++++--
 .../lucene/store/TestFileSwitchDirectory.java      | 33 +++++++++++++++++++++-
 .../org/apache/lucene/util/LuceneTestCase.java     |  8 ++++++
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 148cf4b..0c80901 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -74,6 +74,10 @@ Bug Fixes
 * LUCENE-8843: Don't ignore exceptions that are thrown when trying to open a
   file in IOUtils#fsync. (Jason Tedor via Adrien Grand)
 
+* LUCENE-8835: FileSwitchDirectory now respects the file extension when listing directory
+  contents to ensure we don't expose pending deletes if both directory point to the same
+  underlying filesystem directory. (Simon Willnauer)
+
 Improvements
 
 * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
diff --git a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
index da52c5d..7484ae0 100644
--- a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
@@ -86,7 +86,7 @@ public class FileSwitchDirectory extends Directory {
   
   @Override
   public String[] listAll() throws IOException {
-    Set<String> files = new HashSet<>();
+    List<String> files = new ArrayList<>();
     // LUCENE-3380: either or both of our dirs could be FSDirs,
     // but if one underlying delegate is an FSDir and mkdirs() has not
     // yet been called, because so far everything is written to the other,
@@ -94,14 +94,24 @@ public class FileSwitchDirectory extends Directory {
     NoSuchFileException exc = null;
     try {
       for(String f : primaryDir.listAll()) {
-        files.add(f);
+        String ext = getExtension(f);
+        // we should respect the extension here as well to ensure that we don't list a file that is already
+        // deleted or rather in the one of the directories pending deletions if both directories point
+        // to the same filesystem path. This is quite common for instance to use NIOFS as a primary
+        // and MMap as a secondary to only mmap files like docvalues or term dictionaries.
+        if (primaryExtensions.contains(ext)) {
+          files.add(f);
+        }
       }
     } catch (NoSuchFileException e) {
       exc = e;
     }
     try {
       for(String f : secondaryDir.listAll()) {
-        files.add(f);
+        String ext = getExtension(f);
+        if (primaryExtensions.contains(ext) == false) {
+          files.add(f);
+        }
       }
     } catch (NoSuchFileException e) {
       // we got NoSuchFileException from both dirs
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
index f324bb3..031d5eb 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
@@ -18,10 +18,14 @@ package org.apache.lucene.store;
 
 
 import java.io.IOException;
+import java.net.URI;
+import java.nio.file.FileSystem;
 import java.nio.file.Path;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.function.Function;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter;
@@ -31,9 +35,10 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.TestIndexWriterReader;
+import org.apache.lucene.mockfile.FilterPath;
+import org.apache.lucene.mockfile.WindowsFS;
 import org.apache.lucene.util.TestUtil;
 
-// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
 public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
 
   /**
@@ -131,4 +136,30 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
     }
     return newFSSwitchDirectory(extensions);
   }
+
+  public void testDeleteAndList() throws IOException {
+    // relies on windows semantics
+    Path path = createTempDir();
+    FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///"));
+    Path indexPath = new FilterPath(path, fs);
+    try (final FileSwitchDirectory dir = new FileSwitchDirectory(Collections.singleton("tim"),
+        new SimpleFSDirectory(indexPath), new SimpleFSDirectory(indexPath), true)) {
+      dir.createOutput("foo.tim", IOContext.DEFAULT).close();
+      Function<String[], Long> stripExtra = array -> Arrays.asList(array).stream()
+          .filter(f -> f.startsWith("extra") == false).count();
+      try (IndexInput indexInput = dir.openInput("foo.tim", IOContext.DEFAULT)) {
+        dir.deleteFile("foo.tim");
+        assertEquals(1, dir.getPrimaryDir().getPendingDeletions().size());
+        assertEquals(1, dir.getPendingDeletions().size());
+        assertEquals(0, stripExtra.apply(dir.listAll()).intValue());
+        assertEquals(0, stripExtra.apply(dir.getPrimaryDir().listAll()).intValue());
+        assertEquals(1, stripExtra.apply(dir.getSecondaryDir().listAll()).intValue());
+      }
+      assertEquals(0, dir.getPrimaryDir().getPendingDeletions().size());
+      assertEquals(0, dir.getPendingDeletions().size());
+      assertEquals(0, stripExtra.apply(dir.listAll()).intValue());
+      assertEquals(0, stripExtra.apply(dir.getPrimaryDir().listAll()).intValue());
+      assertEquals(0, stripExtra.apply(dir.getSecondaryDir().listAll()).intValue());
+    }
+  }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index ae73b3f..e5159a7 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -91,6 +91,7 @@ import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.FSLockFactory;
+import org.apache.lucene.store.FileSwitchDirectory;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.LockFactory;
@@ -1414,6 +1415,13 @@ public abstract class LuceneTestCase extends Assert {
       }
 
       Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
+      if (rarely()) {
+        List<String> fileExtensions =
+            Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
+        Collections.shuffle(fileExtensions, random());
+        fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
+        fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
+      }
       BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
       return wrapped;
     } catch (Exception e) {