You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/11/16 16:59:43 UTC

svn commit: r1202754 - in /lucene/dev/trunk: lucene/src/java/org/apache/lucene/index/ modules/facet/docs/ modules/facet/src/java/org/apache/lucene/facet/taxonomy/ modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ modules/facet/src/test...

Author: doronc
Date: Wed Nov 16 15:59:43 2011
New Revision: 1202754

URL: http://svn.apache.org/viewvc?rev=1202754&view=rev
Log:
LUCENE-3573: TaxonomyReader.refresh() was broken if taxonomy index recreated since taxo reader last opened or refreshed.

Added:
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java   (with props)
Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/trunk/modules/facet/docs/userguide.html
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
    lucene/dev/trunk/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Nov 16 15:59:43 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import org.apache.lucene.index.codecs.PerDocValues;
-import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -442,6 +441,11 @@ public class FilterIndexReader extends I
   }
 
   @Override
+  public Map<String, String> getCommitUserData() { 
+    return in.getCommitUserData();
+  }
+  
+  @Override
   public Fields fields() throws IOException {
     ensureOpen();
     return in.fields();

Modified: lucene/dev/trunk/modules/facet/docs/userguide.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/docs/userguide.html?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/docs/userguide.html (original)
+++ lucene/dev/trunk/modules/facet/docs/userguide.html Wed Nov 16 15:59:43 2011
@@ -779,6 +779,12 @@ example) so a thread which is in the mid
 <code>TaxonomyReader</code>, however, we are guaranteed that existing categories are never deleted or modified - 
 the only thing that can happen is that new categories are added. Since search threads do not care if new categories 
 are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices.
+<br><b>However</b>, if the taxonomy index was recreated since the <code>TaxonomyReader</code> was opened or
+refreshed, this assumption (that categories are forevr) no longer holds, and <code>refresh()</code> will 
+throw an <code>InconsistentTaxonomyException</code>, guiding the application to open 
+a new <code>TaxonomyReader</code> for up-to-date taxonomy data. (Old one can
+be closed as soon as it is no more used.)
+
 
 </body>
 </html>

Added: lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java?rev=1202754&view=auto
==============================================================================
--- lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java (added)
+++ lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java Wed Nov 16 15:59:43 2011
@@ -0,0 +1,40 @@
+package org.apache.lucene.facet.taxonomy;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Exception indicating that a certain operation could not be performed 
+ * on a taxonomy related object because of an inconsistency.
+ * <p>
+ * For example, trying to refresh a taxonomy reader might fail in case 
+ * the underlying taxonomy was meanwhile modified in a manner which 
+ * does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.)
+ *   
+ * @lucene.experimental
+ */
+public class InconsistentTaxonomyException extends Exception {
+  
+  public InconsistentTaxonomyException(String message) {
+    super(message);
+  }
+  
+  public InconsistentTaxonomyException() {
+    super();
+  }
+  
+}

Modified: lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (original)
+++ lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java Wed Nov 16 15:59:43 2011
@@ -120,6 +120,14 @@ public interface TaxonomyReader extends 
    * faceted search, the taxonomy reader's refresh() should be called only after
    * a reopen() of the main index.
    * <P>
+   * Refreshing the taxonomy might fail in some cases, for example 
+   * if the taxonomy was recreated since this instance was opened or last refreshed.
+   * In this case an {@link InconsistentTaxonomyException} is thrown,
+   * suggesting that in order to obtain up-to-date taxonomy data a new
+   * {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader} 
+   * instance remains unchanged and usable in this case, and the application can
+   * continue to use it, and should still {@link #close()} when no longer needed.  
+   * <P>
    * It should be noted that refresh() is similar in purpose to
    * IndexReader.reopen(), but the two methods behave differently. refresh()
    * refreshes the existing TaxonomyReader object, rather than opening a new one
@@ -129,8 +137,9 @@ public interface TaxonomyReader extends 
    * of the taxonomy open - refreshing the taxonomy to the newest data and using
    * this new snapshots in all threads (whether new or old) is fine. This saves
    * us needing to keep multiple copies of the taxonomy open in memory.
+   * @return true if anything has changed, false otherwise. 
    */
-  public void refresh() throws IOException;
+  public boolean refresh() throws IOException, InconsistentTaxonomyException;
   
   /**
    * getParent() returns the ordinal of the parent category of the category

Modified: lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java Wed Nov 16 15:59:43 2011
@@ -10,6 +10,7 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
 import org.apache.lucene.index.CorruptIndexException;
@@ -333,7 +334,7 @@ public class DirectoryTaxonomyReader imp
   // Note that refresh() is synchronized (it is the only synchronized
   // method in this class) to ensure that it never gets called concurrently
   // with itself.
-  public synchronized void refresh() throws IOException {
+  public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
     ensureOpen();
     /*
      * Since refresh() can be a lengthy operation, it is very important that we
@@ -349,7 +350,24 @@ public class DirectoryTaxonomyReader imp
     // no other thread can be writing at this time (this method is the
     // only possible writer, and it is "synchronized" to avoid this case).
     IndexReader r2 = IndexReader.openIfChanged(indexReader);
-    if (r2 != null) {
+    if (r2 == null) {
+    	return false; // no changes, nothing to do
+    } 
+    
+    // validate that a refresh is valid at this point, i.e. that the taxonomy 
+    // was not recreated since this reader was last opened or refresshed.
+    String t1 = indexReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+    String t2 = r2.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+    if (t1==null) {
+    	if (t2!=null) {
+    		r2.close();
+    		throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
+    	}
+    } else if (!t1.equals(t2)) {
+    	r2.close();
+    	throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+"  !=  "+t1);
+    }
+    
       IndexReader oldreader = indexReader;
       // we can close the old searcher, but need to synchronize this
       // so that we don't close it in the middle that another routine
@@ -392,8 +410,8 @@ public class DirectoryTaxonomyReader imp
           i.remove();
         }
       }
+      return true;
     }
-  }
 
   public void close() throws IOException {
     if (!closed) {

Modified: lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Wed Nov 16 15:59:43 2011
@@ -9,6 +9,7 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
@@ -83,6 +84,14 @@ import org.apache.lucene.facet.taxonomy.
  */
 public class DirectoryTaxonomyWriter implements TaxonomyWriter {
 
+  /**
+   * Property name of user commit data that contains the creation time of a taxonomy index.
+   * <p>
+   * Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this
+   * particular property name. 
+   */
+  public static final String INDEX_CREATE_TIME = "index.create.time";
+  
   private IndexWriter indexWriter;
   private int nextID;
   private char delimiter = Consts.DEFAULT_DELIMITER;
@@ -106,6 +115,12 @@ public class DirectoryTaxonomyWriter imp
   private int cacheMisses;
 
   /**
+   * When a taxonomy is created, we mark that its create time should be committed in the 
+   * next commit.
+   */
+  private String taxoIndexCreateTime = null;
+  
+  /**
    * setDelimiter changes the character that the taxonomy uses in its internal
    * storage as a delimiter between category components. Do not use this
    * method unless you really know what you are doing. It has nothing to do
@@ -172,6 +187,10 @@ public class DirectoryTaxonomyWriter imp
   throws CorruptIndexException, LockObtainFailedException,
   IOException {
 
+    if (!IndexReader.indexExists(directory) || openMode==OpenMode.CREATE) {
+      taxoIndexCreateTime = Long.toString(System.nanoTime());
+    }
+    
     indexWriter = openIndexWriter(directory, openMode);
     reader = null;
 
@@ -280,10 +299,17 @@ public class DirectoryTaxonomyWriter imp
   @Override
   public synchronized void close() throws CorruptIndexException, IOException {
     if (indexWriter != null) {
-      indexWriter.close();
-      indexWriter = null;
+      if (taxoIndexCreateTime != null) {
+        indexWriter.commit(combinedCommitData(null));
+        taxoIndexCreateTime = null;
+      }
+      doClose();
     }
-
+  }
+  
+  private void doClose() throws CorruptIndexException, IOException {
+    indexWriter.close();
+    indexWriter = null;
     closeResources();
   }
 
@@ -584,11 +610,28 @@ public class DirectoryTaxonomyWriter imp
   @Override
   public synchronized void commit() throws CorruptIndexException, IOException {
     ensureOpen();
-    indexWriter.commit();
+    if (taxoIndexCreateTime != null) {
+      indexWriter.commit(combinedCommitData(null));
+      taxoIndexCreateTime = null;
+    } else {
+      indexWriter.commit();
+    }
     refreshReader();
   }
 
   /**
+   * Combine original user data with that of the taxonomy creation time
+   */
+  private Map<String,String> combinedCommitData(Map<String,String> userData) {
+    Map<String,String> m = new HashMap<String, String>();
+    if (userData != null) {
+      m.putAll(userData);
+    }
+    m.put(INDEX_CREATE_TIME, taxoIndexCreateTime);
+    return m;
+  }
+  
+  /**
    * Like commit(), but also store properties with the index. These properties
    * are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}.
    * See {@link TaxonomyWriter#commit(Map)}. 
@@ -596,7 +639,12 @@ public class DirectoryTaxonomyWriter imp
   @Override
   public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
     ensureOpen();
-    indexWriter.commit(commitUserData);
+    if (taxoIndexCreateTime != null) {
+      indexWriter.commit(combinedCommitData(commitUserData));
+      taxoIndexCreateTime = null;
+    } else {
+      indexWriter.commit(commitUserData);
+    }
     refreshReader();
   }
   
@@ -607,7 +655,12 @@ public class DirectoryTaxonomyWriter imp
   @Override
   public synchronized void prepareCommit() throws CorruptIndexException, IOException {
     ensureOpen();
-    indexWriter.prepareCommit();
+    if (taxoIndexCreateTime != null) {
+      indexWriter.prepareCommit(combinedCommitData(null));
+      taxoIndexCreateTime = null;
+    } else {
+      indexWriter.prepareCommit();
+    }
   }
 
   /**
@@ -617,7 +670,12 @@ public class DirectoryTaxonomyWriter imp
   @Override
   public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
     ensureOpen();
-    indexWriter.prepareCommit(commitUserData);
+    if (taxoIndexCreateTime != null) {
+      indexWriter.prepareCommit(combinedCommitData(commitUserData));
+      taxoIndexCreateTime = null;
+    } else {
+      indexWriter.prepareCommit(commitUserData);
+    }
   }
   
   /**
@@ -1037,10 +1095,10 @@ public class DirectoryTaxonomyWriter imp
    * will yield an {@link AlreadyClosedException}).
    */
   @Override
-  public void rollback() throws IOException {
+  public synchronized void rollback() throws IOException {
     ensureOpen();
     indexWriter.rollback();
-    close();
+    doClose();
   }
   
 }

Modified: lucene/dev/trunk/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java?rev=1202754&r1=1202753&r2=1202754&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java Wed Nov 16 15:59:43 2011
@@ -1,10 +1,17 @@
 package org.apache.lucene.facet.taxonomy.directory;
 
+import java.util.Random;
+
 import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;
 
@@ -59,6 +66,36 @@ public class TestDirectoryTaxonomyReader
     dir.close();
   }
   
+  /**
+   * Test the boolean returned by TR.refresh
+   * @throws Exception
+   */
+  @Test
+  public void testReaderRefreshResult() throws Exception {
+    Directory dir = null;
+    DirectoryTaxonomyWriter ltw = null;
+    DirectoryTaxonomyReader ltr = null;
+    
+    try {
+      dir = newDirectory();
+      ltw = new DirectoryTaxonomyWriter(dir);
+      
+      ltw.addCategory(new CategoryPath("a"));
+      ltw.commit();
+      
+      ltr = new DirectoryTaxonomyReader(dir);
+      assertFalse("Nothing has changed",ltr.refresh());
+      
+      ltw.addCategory(new CategoryPath("b"));
+      ltw.commit();
+      
+      assertTrue("changes were committed",ltr.refresh());
+      assertFalse("Nothing has changed",ltr.refresh());
+    } finally {
+      IOUtils.close(ltw, ltr, dir);
+    }
+  }
+  
   @Test
   public void testAlreadyClosed() throws Exception {
     Directory dir = newDirectory();
@@ -77,4 +114,68 @@ public class TestDirectoryTaxonomyReader
     dir.close();
   }
   
+  /**
+   * recreating a taxonomy should work well with a freshly opened taxonomy reader 
+   */
+  @Test
+  public void testFreshReadRecreatedTaxonomy() throws Exception {
+    doTestReadRecreatedTaxono(random, true);
+  }
+  
+  /**
+   * recreating a taxonomy should work well with a refreshed taxonomy reader 
+   */
+  @Test
+  public void testRefreshReadRecreatedTaxonomy() throws Exception {
+    doTestReadRecreatedTaxono(random, false);
+  }
+  
+  private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
+    Directory dir = null;
+    TaxonomyWriter tw = null;
+    TaxonomyReader tr = null;
+    
+    // prepare a few categories
+    int  n = 10;
+    CategoryPath[] cp = new CategoryPath[n];
+    for (int i=0; i<n; i++) {
+      cp[i] = new CategoryPath("a", Integer.toString(i));
+    }
+    
+    try {
+      dir = newDirectory();
+      
+      tw = new DirectoryTaxonomyWriter(dir);
+      tw.addCategory(new CategoryPath("a"));
+      tw.close();
+      
+      tr = new DirectoryTaxonomyReader(dir);
+      int baseNumcategories = tr.getSize();
+      
+      for (int i=0; i<n; i++) {
+        int k = random.nextInt(n);
+        tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
+        for (int j=0; j<=k; j++) {
+          tw.addCategory(new CategoryPath(cp[j]));
+        }
+        tw.close();
+        if (closeReader) {
+          tr.close();
+          tr = new DirectoryTaxonomyReader(dir);
+        } else {
+          try {
+            tr.refresh();
+            fail("Expected InconsistentTaxonomyException");
+          } catch (InconsistentTaxonomyException e) {
+            tr.close();
+            tr = new DirectoryTaxonomyReader(dir);
+          }
+        }
+        assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumcategories + 1 + k, tr.getSize());
+      }
+    } finally {
+      IOUtils.close(tr, tw, dir);
+    }
+  }
+  
 }