You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2019/06/18 06:48:06 UTC

[lucene-solr] branch master updated: LUCENE-8853: Try parsing original file extension from tmp file (#716)

This is an automated email from the ASF dual-hosted git repository.

simonw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new fb6e28d  LUCENE-8853: Try parsing original file extension from tmp file (#716)
fb6e28d is described below

commit fb6e28d9f1ea016a8c937f15c8043b0684cf77dd
Author: Simon Willnauer <si...@apache.org>
AuthorDate: Tue Jun 18 08:47:59 2019 +0200

    LUCENE-8853: Try parsing original file extension from tmp file (#716)
    
    FileSwitchDirectory fails if the tmp file are not in the same directory
    as the file it's renamed to. This is correct behavior but breaks with
    tmp files used with index sorting. This change tries best effort to find
    the right extension directory if the file ends with `.tmp`
---
 lucene/CHANGES.txt                                 |  3 +++
 .../java/org/apache/lucene/store/Directory.java    | 10 ++++++++
 .../java/org/apache/lucene/store/FSDirectory.java  |  3 +--
 .../apache/lucene/store/FileSwitchDirectory.java   | 27 ++++++++++++++++++----
 .../lucene/store/TestFileSwitchDirectory.java      | 27 ++++++++++++++++++++++
 .../org/apache/lucene/util/LuceneTestCase.java     | 16 ++++++-------
 6 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2c3efe1..b85ddf0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -83,6 +83,9 @@ Bug Fixes
   contents to ensure we don't expose pending deletes if both directory point to the same
   underlying filesystem directory. (Simon Willnauer)
 
+* LUCENE-8853: FileSwitchDirectory now applies best effort to place tmp files in the same
+  direcotry as the target files. (Simon Willnauer)
+
 Improvements
 
 * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
diff --git a/lucene/core/src/java/org/apache/lucene/store/Directory.java b/lucene/core/src/java/org/apache/lucene/store/Directory.java
index 13e8e90..5763d2c 100644
--- a/lucene/core/src/java/org/apache/lucene/store/Directory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/Directory.java
@@ -24,6 +24,7 @@ import java.nio.file.NoSuchFileException;
 import java.util.Collection; // for javadocs
 import java.util.Set;
 
+import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.util.IOUtils;
 
 /**
@@ -207,4 +208,13 @@ public abstract class Directory implements Closeable {
    * @lucene.internal
    */
   public abstract Set<String> getPendingDeletions() throws IOException;
+
+  /**
+   * Creates a file name for a temporary file. The name will start with
+   * {@code prefix}, end with {@code suffix} and have a reserved file extension {@code .tmp}.
+   * @see #createTempOutput(String, String, IOContext)
+   */
+  protected static String getTempFileName(String prefix, String suffix, long counter) {
+    return IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(counter, Character.MAX_RADIX), "tmp");
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
index 615c089..cbaafb2 100644
--- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
@@ -41,7 +41,6 @@ import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.IOUtils;
 
@@ -261,7 +260,7 @@ public abstract class FSDirectory extends BaseDirectory {
     maybeDeletePendingFiles();
     while (true) {
       try {
-        String name = IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp");
+        String name = getTempFileName(prefix, suffix, nextTempFileCounter.getAndIncrement());
         if (pendingDeletes.contains(name)) {
           continue;
         }
diff --git a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
index 7484ae0..9552710 100644
--- a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
@@ -27,6 +27,8 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.util.IOUtils;
 
@@ -53,8 +55,12 @@ public class FileSwitchDirectory extends Directory {
   private final Directory primaryDir;
   private final Set<String> primaryExtensions;
   private boolean doClose;
+  private static final Pattern EXT_PATTERN = Pattern.compile("\\.([a-zA-Z]+)");
 
   public FileSwitchDirectory(Set<String> primaryExtensions, Directory primaryDir, Directory secondaryDir, boolean doClose) {
+    if (primaryExtensions.contains("tmp")) {
+      throw new IllegalArgumentException("tmp is a reserved extension");
+    }
     this.primaryExtensions = primaryExtensions;
     this.primaryDir = primaryDir;
     this.secondaryDir = secondaryDir;
@@ -141,7 +147,14 @@ public class FileSwitchDirectory extends Directory {
     if (i == -1) {
       return "";
     }
-    return name.substring(i+1, name.length());
+    String ext = name.substring(i + 1);
+    if (ext.equals("tmp")) {
+      Matcher matcher = EXT_PATTERN.matcher(name.substring(0, i + 1));
+      if (matcher.find()) {
+        return matcher.group(1);
+      }
+    }
+    return ext;
   }
 
   private Directory getDirectory(String name) {
@@ -174,7 +187,12 @@ public class FileSwitchDirectory extends Directory {
 
   @Override
   public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
-    return getDirectory("."+suffix).createTempOutput(prefix, suffix, context);
+    // this is best effort - it's ok to create a tmp file with any prefix and suffix. Yet if this file is then
+    // in-turn used to rename they must match to the same directory hence we use the full file-name to find
+    // the right directory. Here we can't make a decision but we need to ensure that all other operations
+    // map to the right directory.
+    String tmpFileName = getTempFileName(prefix, suffix, 0);
+    return getDirectory(tmpFileName).createTempOutput(prefix, suffix, context);
   }
 
   @Override
@@ -183,10 +201,11 @@ public class FileSwitchDirectory extends Directory {
     List<String> secondaryNames = new ArrayList<>();
 
     for (String name : names)
-      if (primaryExtensions.contains(getExtension(name)))
+      if (primaryExtensions.contains(getExtension(name))) {
         primaryNames.add(name);
-      else
+      } else {
         secondaryNames.add(name);
+      }
 
     primaryDir.sync(primaryNames);
     secondaryDir.sync(secondaryNames);
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
index 256ee1b..0a12eed 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
@@ -19,6 +19,7 @@ package org.apache.lucene.store;
 
 import java.io.IOException;
 import java.net.URI;
+import java.nio.file.AtomicMoveNotSupportedException;
 import java.nio.file.FileSystem;
 import java.nio.file.Path;
 import java.util.Arrays;
@@ -114,6 +115,32 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
     dir.close();
   }
 
+  public void testRenameTmpFile() throws IOException {
+    try (Directory directory = getDirectory(createTempDir())) {
+      final String name;
+      try (IndexOutput out = directory.createTempOutput("foo.cfs", "", IOContext.DEFAULT)) {
+        out.writeInt(1);
+        name = out.getName();
+      }
+      assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count());
+      assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count());
+      directory.rename(name, "foo.cfs");
+      assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count());
+      assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count());
+    }
+
+    try (Directory directory= newFSSwitchDirectory(Collections.singleton("bar"))) {
+      String brokenName;
+      try (IndexOutput out = directory.createTempOutput("foo", "bar", IOContext.DEFAULT)) {
+        out.writeInt(1);
+        brokenName = out.getName();
+      }
+      AtomicMoveNotSupportedException exception = expectThrows(AtomicMoveNotSupportedException.class,
+          () -> directory.rename(brokenName, "foo.bar"));
+      assertEquals("foo_bar_0.tmp -> foo.bar: source and dest are in different directories", exception.getMessage());
+    }
+  }
+
   @Override
   protected Directory getDirectory(Path path) throws IOException {
     Set<String> extensions = new HashSet<String>();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index cd8d492..e5159a7 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -91,6 +91,7 @@ import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.FSLockFactory;
+import org.apache.lucene.store.FileSwitchDirectory;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.LockFactory;
@@ -1414,14 +1415,13 @@ public abstract class LuceneTestCase extends Assert {
       }
 
       Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
-// LUCENE-8853: FileSwitchDirectory is broken for tmp outputs.
-//      if (rarely()) {
-//        List<String> fileExtensions =
-//            Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
-//        Collections.shuffle(fileExtensions, random());
-//        fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
-//        fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
-//      }
+      if (rarely()) {
+        List<String> fileExtensions =
+            Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
+        Collections.shuffle(fileExtensions, random());
+        fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
+        fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
+      }
       BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
       return wrapped;
     } catch (Exception e) {