You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/29 20:10:10 UTC

svn commit: r1366883 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/src/test/org/apache/lucene/index/

Author: mikemccand
Date: Sun Jul 29 18:10:10 2012
New Revision: 1366883

URL: http://svn.apache.org/viewvc?rev=1366883&view=rev
Log:
LUCENE-4190: restrict allowed filenames to reduce risk of deleting non-lucene file from the index directory

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sun Jul 29 18:10:10 2012
@@ -115,6 +115,12 @@ Bug Fixes
 * LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
   non-interruptible.  (Mark Miller, Uwe Schindler)
 
+* LUCENE-4190: restrict allowed filenames that a codec may create to
+  the patterns recognized by IndexFileNames.  This also fixes
+  IndexWriter to only delete files matching this pattern from an index
+  directory, to reduce risk when the wrong index path is accidentally
+  passed to IndexWriter (Robert Muir, Mike McCandless)
+
 Changes in Runtime Behavior
 
 * LUCENE-4109: Enable position increments in the flexible queryparser by default.

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Sun Jul 29 18:10:10 2012
@@ -25,6 +25,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
 
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NoSuchDirectoryException;
@@ -146,57 +147,61 @@ final class IndexFileDeleter {
       // it means the directory is empty, so ignore it.
       files = new String[0];
     }
-
-    for (String fileName : files) {
-
-      if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
-
-        // Add this file to refCounts with initial count 0:
-        getRefCount(fileName);
-
-        if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
-
-          // This is a commit (segments or segments_N), and
-          // it's valid (<= the max gen).  Load it, then
-          // incref all files it refers to:
-          if (infoStream.isEnabled("IFD")) {
-            infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
-          }
-          SegmentInfos sis = new SegmentInfos();
-          try {
-            sis.read(directory, fileName);
-          } catch (FileNotFoundException e) {
-            // LUCENE-948: on NFS (and maybe others), if
-            // you have writers switching back and forth
-            // between machines, it's very likely that the
-            // dir listing will be stale and will claim a
-            // file segments_X exists when in fact it
-            // doesn't.  So, we catch this and handle it
-            // as if the file does not exist
+    
+    if (currentSegmentsFile != null) {
+      Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
+      for (String fileName : files) {
+        m.reset(fileName);
+        if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)
+            && (m.matches() || fileName.startsWith(IndexFileNames.SEGMENTS))) {
+          
+          // Add this file to refCounts with initial count 0:
+          getRefCount(fileName);
+          
+          if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+            
+            // This is a commit (segments or segments_N), and
+            // it's valid (<= the max gen).  Load it, then
+            // incref all files it refers to:
             if (infoStream.isEnabled("IFD")) {
-              infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+              infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
             }
-            sis = null;
-          } catch (IOException e) {
-            if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
-              throw e;
-            } else {
-              // Most likely we are opening an index that
-              // has an aborted "future" commit, so suppress
-              // exc in this case
+            SegmentInfos sis = new SegmentInfos();
+            try {
+              sis.read(directory, fileName);
+            } catch (FileNotFoundException e) {
+              // LUCENE-948: on NFS (and maybe others), if
+              // you have writers switching back and forth
+              // between machines, it's very likely that the
+              // dir listing will be stale and will claim a
+              // file segments_X exists when in fact it
+              // doesn't.  So, we catch this and handle it
+              // as if the file does not exist
+              if (infoStream.isEnabled("IFD")) {
+                infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+              }
               sis = null;
+            } catch (IOException e) {
+              if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
+                throw e;
+              } else {
+                // Most likely we are opening an index that
+                // has an aborted "future" commit, so suppress
+                // exc in this case
+                sis = null;
+              }
             }
-          }
-          if (sis != null) {
-            final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
-            if (sis.getGeneration() == segmentInfos.getGeneration()) {
-              currentCommitPoint = commitPoint;
-            }
-            commits.add(commitPoint);
-            incRef(sis, true);
-
-            if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
-              lastSegmentInfos = sis;
+            if (sis != null) {
+              final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+              if (sis.getGeneration() == segmentInfos.getGeneration()) {
+                currentCommitPoint = commitPoint;
+              }
+              commits.add(commitPoint);
+              incRef(sis, true);
+              
+              if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
+                lastSegmentInfos = sis;
+              }
             }
           }
         }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java Sun Jul 29 18:10:10 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.util.regex.Pattern;
+
 import org.apache.lucene.codecs.Codec;
 
 // TODO: put all files under codec and remove all the static extensions here
@@ -189,4 +191,8 @@ public final class IndexFileNames {
     }
     return filename;
   }  
+
+  // All files created by codecs much match this pattern (we
+  // check this in SegmentInfo.java):
+  static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java Sun Jul 29 18:10:10 2012
@@ -24,6 +24,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat;
@@ -251,16 +252,31 @@ public final class SegmentInfo {
   private Set<String> setFiles;
 
   public void setFiles(Set<String> files) {
+    checkFileNames(files);
     setFiles = files;
     sizeInBytes = -1;
   }
 
   public void addFiles(Collection<String> files) {
+    checkFileNames(files);
     setFiles.addAll(files);
+    sizeInBytes = -1;
   }
 
   public void addFile(String file) {
+    checkFileNames(Collections.singleton(file));
     setFiles.add(file);
+    sizeInBytes = -1;
+  }
+  
+  private void checkFileNames(Collection<String> files) {
+    Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
+    for (String file : files) {
+      m.reset(file);
+      if (!m.matches()) {
+        throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern());
+      }
+    }
   }
     
   /**

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDoc.java?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDoc.java Sun Jul 29 18:10:10 2012
@@ -128,13 +128,13 @@ public class TestDoc extends LuceneTestC
       printSegment(out, si2);
       writer.close();
 
-      SegmentInfoPerCommit siMerge = merge(directory, si1, si2, "merge", false);
+      SegmentInfoPerCommit siMerge = merge(directory, si1, si2, "_merge", false);
       printSegment(out, siMerge);
 
-      SegmentInfoPerCommit siMerge2 = merge(directory, si1, si2, "merge2", false);
+      SegmentInfoPerCommit siMerge2 = merge(directory, si1, si2, "_merge2", false);
       printSegment(out, siMerge2);
 
-      SegmentInfoPerCommit siMerge3 = merge(directory, siMerge, siMerge2, "merge3", false);
+      SegmentInfoPerCommit siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", false);
       printSegment(out, siMerge3);
       
       directory.close();
@@ -163,13 +163,13 @@ public class TestDoc extends LuceneTestC
       printSegment(out, si2);
       writer.close();
 
-      siMerge = merge(directory, si1, si2, "merge", true);
+      siMerge = merge(directory, si1, si2, "_merge", true);
       printSegment(out, siMerge);
 
-      siMerge2 = merge(directory, si1, si2, "merge2", true);
+      siMerge2 = merge(directory, si1, si2, "_merge2", true);
       printSegment(out, siMerge2);
 
-      siMerge3 = merge(directory, siMerge, siMerge2, "merge3", true);
+      siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", true);
       printSegment(out, siMerge3);
       
       directory.close();

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1366883&r1=1366882&r2=1366883&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Sun Jul 29 18:10:10 2012
@@ -46,6 +46,7 @@ import org.apache.lucene.search.ScoreDoc
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockFactory;
 import org.apache.lucene.store.LockObtainFailedException;
@@ -1836,4 +1837,53 @@ public class TestIndexWriter extends Luc
     w.close();
     dir.close();
   }
+  
+  //LUCENE-1468 -- make sure opening an IndexWriter with
+  // create=true does not remove non-index files
+  
+  public void testOtherFiles() throws Throwable {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, 
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    iw.addDocument(new Document());
+    iw.close();
+    try {
+      // Create my own random file:
+      IndexOutput out = dir.createOutput("myrandomfile", newIOContext(random()));
+      out.writeByte((byte) 42);
+      out.close();
+      
+      new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))).close();
+      
+      assertTrue(dir.fileExists("myrandomfile"));
+    } finally {
+      dir.close();
+    }
+  }
+  
+  // here we do better, there is no current segments file, so we don't delete anything.
+  // however, if you actually go and make a commit, the next time you run indexwriter
+  // this file will be gone.
+  public void testOtherFiles2() throws Throwable {
+    Directory dir = newDirectory();
+    try {
+      // Create my own random file:
+      IndexOutput out = dir.createOutput("_a.frq", newIOContext(random()));
+      out.writeByte((byte) 42);
+      out.close();
+      
+      new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))).close();
+      
+      assertTrue(dir.fileExists("_a.frq"));
+      
+      IndexWriter iw = new IndexWriter(dir, 
+          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+      iw.addDocument(new Document());
+      iw.close();
+      
+      assertFalse(dir.fileExists("_a.frq"));
+    } finally {
+      dir.close();
+    }
+  }
 }