You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2012/11/07 11:50:15 UTC

svn commit: r1406539 - in /lucene/dev/trunk/lucene: ./ facet/src/java/org/apache/lucene/facet/taxonomy/directory/ facet/src/test/org/apache/lucene/facet/taxonomy/directory/

Author: shaie
Date: Wed Nov  7 10:50:14 2012
New Revision: 1406539

URL: http://svn.apache.org/viewvc?rev=1406539&view=rev
Log:
LUCENE-4532: fix DirectoryTaxonomyWriter to track the taxonomy epoch, rather than create timestamp

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov  7 10:50:14 2012
@@ -135,6 +135,11 @@ Bug Fixes
 * LUCENE-4534: Fixed WFSTCompletionLookup and Analyzing/FuzzySuggester
   to allow 0 byte values in the lookup keys.  (Mike McCandless)
 
+* LUCENE-4532: DirectoryTaxonomyWriter use a timestamp to denote taxonomy
+  index re-creation, which could cause a bug in case machine clocks were
+  not synced. Instead, it now tracks an 'epoch' version, which is incremented
+  whenever the taxonomy is re-created, or replaced. (Shai Erera)
+
 Optimizations
 
 * LUCENE-4512: Additional memory savings for CompressingStoredFieldsFormat.
@@ -374,7 +379,7 @@ Bug Fixes
   many cases where IndexWriter could leave leftover files (on
   exception in various places, on reuse of a segment name after crash
   and recovery.  (Uwe Schindler, Robert Muir, Mike McCandless)
-
+  
 Optimizations
 
 * LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java Wed Nov  7 10:50:14 2012
@@ -174,6 +174,7 @@ public class DirectoryTaxonomyReader imp
     this.delimiter = delimiter;
   }
 
+  @Override
   public int getOrdinal(CategoryPath categoryPath) throws IOException {
     ensureOpen();
     if (categoryPath.length()==0) {
@@ -218,6 +219,7 @@ public class DirectoryTaxonomyReader imp
     return ret;
   }
 
+  @Override
   public CategoryPath getPath(int ordinal) throws IOException {
     ensureOpen();
     // TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
@@ -235,6 +237,7 @@ public class DirectoryTaxonomyReader imp
     return new CategoryPath(label, delimiter);
   }
 
+  @Override
   public boolean getPath(int ordinal, CategoryPath result) throws IOException {
     ensureOpen();
     String label = getLabel(ordinal);
@@ -296,6 +299,7 @@ public class DirectoryTaxonomyReader imp
     return ret;
   }
 
+  @Override
   public int getParent(int ordinal) {
     ensureOpen();
     // Note how we don't need to hold the read lock to do the following,
@@ -327,6 +331,7 @@ public class DirectoryTaxonomyReader imp
    * so you should always call getParentArray() again after a refresh().
    */
 
+  @Override
   public int[] getParentArray() {
     ensureOpen();
     // Note how we don't need to hold the read lock to do the following,
@@ -339,6 +344,7 @@ public class DirectoryTaxonomyReader imp
   // Note that refresh() is synchronized (it is the only synchronized
   // method in this class) to ensure that it never gets called concurrently
   // with itself.
+  @Override
   public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
     ensureOpen();
     /*
@@ -357,67 +363,70 @@ public class DirectoryTaxonomyReader imp
     DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
     if (r2 == null) {
       return false; // no changes, nothing to do
-    } 
+    }
     
     // validate that a refresh is valid at this point, i.e. that the taxonomy 
     // was not recreated since this reader was last opened or refresshed.
-    String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
-    String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
-    if (t1==null) {
-      if (t2!=null) {
+    String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
+    String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
+    if (t1 == null) {
+      if (t2 != null) {
         r2.close();
-        throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
+        throw new InconsistentTaxonomyException("Taxonomy was recreated, epoch= " + t2);
       }
     } else if (!t1.equals(t2)) {
+      // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
+      // it's ok to use String.equals because we require the two epoch values to be the same.
       r2.close();
-      throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+"  !=  "+t1);
+      throw new InconsistentTaxonomyException("Taxonomy was recreated epoch = " + t2 + "  !=  " + t1);
     }
     
-      IndexReader oldreader = indexReader;
-      // we can close the old searcher, but need to synchronize this
-      // so that we don't close it in the middle that another routine
-      // is reading from it.
-      indexReaderLock.writeLock().lock();
-      indexReader = r2;
-      indexReaderLock.writeLock().unlock();
-      // We can close the old reader, but need to be certain that we
-      // don't close it while another method is reading from it.
-      // Luckily, we can be certain of that even without putting the
-      // oldreader.close() in the locked section. The reason is that
-      // after lock() succeeded above, we know that all existing readers
-      // had finished (this is what a read-write lock ensures). New
-      // readers, starting after the unlock() we just did, already got
-      // the new indexReader we set above. So nobody can be possibly
-      // using the old indexReader, and we can close it:
-      oldreader.close();
-
-      // We prefetch some of the arrays to make requests much faster.
-      // Let's refresh these prefetched arrays; This refresh is much
-      // is made more efficient by assuming that it is enough to read
-      // the values for new categories (old categories could not have been
-      // changed or deleted)
-      // Note that this this done without the write lock being held,
-      // which means that it is possible that during a refresh(), a
-      // reader will have some methods (like getOrdinal and getCategory)
-      // return fresh information, while getParent()
-      // (only to be prefetched now) still return older information.
-      // We consider this to be acceptable. The important thing,
-      // however, is that refreshPrefetchArrays() itself writes to
-      // the arrays in a correct manner (see discussion there)
-      parentArray.refresh(indexReader);
-
-      // Remove any INVALID_ORDINAL values from the ordinal cache,
-      // because it is possible those are now answered by the new data!
-      Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
-      while (i.hasNext()) {
-        Entry<String, Integer> e = i.next();
-        if (e.getValue().intValue() == INVALID_ORDINAL) {
-          i.remove();
-        }
+    IndexReader oldreader = indexReader;
+    // we can close the old searcher, but need to synchronize this
+    // so that we don't close it in the middle that another routine
+    // is reading from it.
+    indexReaderLock.writeLock().lock();
+    indexReader = r2;
+    indexReaderLock.writeLock().unlock();
+    // We can close the old reader, but need to be certain that we
+    // don't close it while another method is reading from it.
+    // Luckily, we can be certain of that even without putting the
+    // oldreader.close() in the locked section. The reason is that
+    // after lock() succeeded above, we know that all existing readers
+    // had finished (this is what a read-write lock ensures). New
+    // readers, starting after the unlock() we just did, already got
+    // the new indexReader we set above. So nobody can be possibly
+    // using the old indexReader, and we can close it:
+    oldreader.close();
+    
+    // We prefetch some of the arrays to make requests much faster.
+    // Let's refresh these prefetched arrays; This refresh is much
+    // is made more efficient by assuming that it is enough to read
+    // the values for new categories (old categories could not have been
+    // changed or deleted)
+    // Note that this this done without the write lock being held,
+    // which means that it is possible that during a refresh(), a
+    // reader will have some methods (like getOrdinal and getCategory)
+    // return fresh information, while getParent()
+    // (only to be prefetched now) still return older information.
+    // We consider this to be acceptable. The important thing,
+    // however, is that refreshPrefetchArrays() itself writes to
+    // the arrays in a correct manner (see discussion there)
+    parentArray.refresh(indexReader);
+    
+    // Remove any INVALID_ORDINAL values from the ordinal cache,
+    // because it is possible those are now answered by the new data!
+    Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
+    while (i.hasNext()) {
+      Entry<String, Integer> e = i.next();
+      if (e.getValue().intValue() == INVALID_ORDINAL) {
+        i.remove();
       }
-      return true;
     }
+    return true;
+  }
 
+  @Override
   public void close() throws IOException {
     if (!closed) {
       synchronized (this) {
@@ -440,6 +449,7 @@ public class DirectoryTaxonomyReader imp
     ordinalCache.clear();
   }
 
+  @Override
   public int getSize() {
     ensureOpen();
     indexReaderLock.readLock().lock();
@@ -450,6 +460,7 @@ public class DirectoryTaxonomyReader imp
     }
   }
 
+  @Override
   public Map<String, String> getCommitUserData() throws IOException {
     ensureOpen();
     return indexReader.getIndexCommit().getUserData();
@@ -458,6 +469,7 @@ public class DirectoryTaxonomyReader imp
   private ChildrenArrays childrenArrays;
   Object childrenArraysRebuild = new Object();
 
+  @Override
   public ChildrenArrays getChildrenArrays() {
     ensureOpen();
     // Check if the taxonomy grew since we built the array, and if it
@@ -543,9 +555,11 @@ public class DirectoryTaxonomyReader imp
       this.youngestChildArray = youngestChildArray;
       this.olderSiblingArray = olderSiblingArray;
     }
+    @Override
     public int[] getOlderSiblingArray() {
       return olderSiblingArray;
     }
+    @Override
     public int[] getYoungestChildArray() {
       return youngestChildArray;
     }    
@@ -567,6 +581,7 @@ public class DirectoryTaxonomyReader imp
    * Expert: decreases the refCount of this TaxonomyReader instance. If the
    * refCount drops to 0, then this reader is closed.
    */
+  @Override
   public void decRef() throws IOException {
     ensureOpen();
     final int rc = refCount.decrementAndGet();
@@ -587,6 +602,7 @@ public class DirectoryTaxonomyReader imp
   }
   
   /** Expert: returns the current refCount for this taxonomy reader */
+  @Override
   public int getRefCount() {
     return refCount.get();
   }
@@ -598,6 +614,7 @@ public class DirectoryTaxonomyReader imp
    * Be sure to always call a corresponding decRef(), in a finally clause; 
    * otherwise the reader may never be closed. 
    */
+  @Override
   public void incRef() {
     ensureOpen();
     refCount.incrementAndGet();

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Wed Nov  7 10:50:14 2012
@@ -86,23 +86,24 @@ import org.apache.lucene.util.Version;
  * @lucene.experimental
  */
 public class DirectoryTaxonomyWriter implements TaxonomyWriter {
-
+  
   /**
-   * Property name of user commit data that contains the creation time of a
-   * taxonomy index.
+   * Property name of user commit data that contains the index epoch. The epoch
+   * changes whenever the taxonomy is recreated (i.e. opened with
+   * {@link OpenMode#CREATE}.
    * <p>
    * Applications should not use this property in their commit data because it
    * will be overridden by this taxonomy writer.
    */
-  public static final String INDEX_CREATE_TIME = "index.create.time";
-
+  public static final String INDEX_EPOCH = "index.epoch";
+  
   private final Directory dir;
   private final IndexWriter indexWriter;
   private final TaxonomyWriterCache cache;
   private final AtomicInteger cacheMisses = new AtomicInteger(0);
   
-  /** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */
-  private String createTime;
+  // Records the taxonomy index epoch, updated on replaceTaxonomy as well.
+  private long indexEpoch;
   
   private char delimiter = Consts.DEFAULT_DELIMITER;
   private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT);
@@ -200,28 +201,34 @@ public class DirectoryTaxonomyWriter imp
   public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode,
       TaxonomyWriterCache cache) throws IOException {
 
-    if (!DirectoryReader.indexExists(directory) || openMode==OpenMode.CREATE) {
-      createTime = Long.toString(System.nanoTime());
-    } else {
-      Map<String, String> commitData = readCommitData(directory);
-      if (commitData != null) {
-        // It is ok if an existing index doesn't have commitData, or the
-        // INDEX_CREATE_TIME property. If ever it will be recreated, we'll set
-        // createTime accordingly in the above 'if'. 
-        createTime = commitData.get(INDEX_CREATE_TIME);
-      } else {
-        createTime = null;
-      }
-    }
-    
     dir = directory;
     IndexWriterConfig config = createIndexWriterConfig(openMode);
     indexWriter = openIndexWriter(dir, config);
-    
+
     // verify (to some extent) that merge policy in effect would preserve category docids 
     assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) : 
       "for preserving category docids, merging none-adjacent segments is not allowed";
     
+    // after we opened the writer, and the index is locked, it's safe to check
+    // the commit data and read the index epoch
+    openMode = config.getOpenMode();
+    if (!DirectoryReader.indexExists(directory)) {
+      indexEpoch = 1;
+    } else {
+      String epochStr = null;
+      Map<String, String> commitData = readCommitData(directory);
+      if (commitData != null) {
+        epochStr = commitData.get(INDEX_EPOCH);
+      }
+      // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
+      // of a better value.
+      indexEpoch = epochStr == null ? 1 : Long.parseLong(epochStr);
+    }
+    
+    if (openMode == OpenMode.CREATE) {
+      ++indexEpoch;
+    }
+    
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.setOmitNorms(true);
     parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
@@ -670,9 +677,7 @@ public class DirectoryTaxonomyWriter imp
     if (userData != null) {
       m.putAll(userData);
     }
-    if (createTime != null) {
-      m.put(INDEX_CREATE_TIME, createTime);
-    }
+    m.put(INDEX_EPOCH, Long.toString(indexEpoch));
     return m;
   }
   
@@ -1022,8 +1027,8 @@ public class DirectoryTaxonomyWriter imp
     cacheIsComplete = false;
     shouldFillCache = true;
     
-    // update createTime as a taxonomy replace is just like it has be recreated
-    createTime = Long.toString(System.nanoTime());
+    // update indexEpoch as a taxonomy replace is just like it has be recreated
+    ++indexEpoch;
   }
 
   /** Returns the {@link Directory} of this taxonomy writer. */

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Wed Nov  7 10:50:14 2012
@@ -96,10 +96,10 @@ public class TestDirectoryTaxonomyWriter
     Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
     assertTrue("wrong value extracted from commit data", 
         "1 2 3".equals(readUserCommitData.get("testing")));
-    assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+    assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
     r.close();
     
-    // open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist
+    // open DirTaxoWriter again and commit, INDEX_EPOCH should still exist
     // in the commit data, otherwise DirTaxoReader.refresh() might not detect
     // that the taxonomy index has been recreated.
     taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
@@ -111,7 +111,7 @@ public class TestDirectoryTaxonomyWriter
     
     r = DirectoryReader.open(dir);
     readUserCommitData = r.getIndexCommit().getUserData();
-    assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+    assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
     r.close();
     
     dir.close();
@@ -119,7 +119,7 @@ public class TestDirectoryTaxonomyWriter
   
   @Test
   public void testRollback() throws Exception {
-    // Verifies that if callback is called, DTW is closed.
+    // Verifies that if rollback is called, DTW is closed.
     Directory dir = newDirectory();
     DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
     dtw.addCategory(new CategoryPath("a"));
@@ -130,6 +130,19 @@ public class TestDirectoryTaxonomyWriter
     } catch (AlreadyClosedException e) {
       // expected
     }
+    
+    dir.close();
+  }
+  
+  @Test
+  public void testRecreateRollback() throws Exception {
+    // Tests rollback with OpenMode.CREATE
+    Directory dir = newDirectory();
+    new DirectoryTaxonomyWriter(dir).close();
+    assertEquals(1, getEpoch(dir));
+    new DirectoryTaxonomyWriter(dir, OpenMode.CREATE).rollback();
+    assertEquals(1, getEpoch(dir));
+    
     dir.close();
   }
   
@@ -157,7 +170,7 @@ public class TestDirectoryTaxonomyWriter
   
   @Test
   public void testRecreateAndRefresh() throws Exception {
-    // DirTaxoWriter lost the INDEX_CREATE_TIME property if it was opened in
+    // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
     // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
     // DirTaxoReader succeeding to refresh().
     Directory dir = newDirectory();
@@ -172,7 +185,7 @@ public class TestDirectoryTaxonomyWriter
     // this should not fail
     taxoReader.refresh();
 
-    // now recreate the taxonomy, and check that the timestamp is preserved after opening DirTW again.
+    // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
     taxoWriter.close();
     taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
     touchTaxo(taxoWriter, new CategoryPath("c"));
@@ -195,19 +208,19 @@ public class TestDirectoryTaxonomyWriter
   }
 
   @Test
-  public void testUndefinedCreateTime() throws Exception {
-    // tests that if the taxonomy index doesn't have the INDEX_CREATE_TIME
+  public void testBackwardsCompatibility() throws Exception {
+    // tests that if the taxonomy index doesn't have the INDEX_EPOCH
     // property (supports pre-3.6 indexes), all still works.
     Directory dir = newDirectory();
     
-    // create an empty index first, so that DirTaxoWriter initializes createTime to null.
+    // create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1.
     new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
     
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
-    // we cannot commit null keys/values, this ensures that if DirTW.createTime is null, we can still commit.
     taxoWriter.close();
     
     DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
     taxoReader.refresh();
     taxoReader.close();
     
@@ -267,10 +280,10 @@ public class TestDirectoryTaxonomyWriter
     dir.close();
   }
 
-  private String getCreateTime(Directory taxoDir) throws IOException {
+  private long getEpoch(Directory taxoDir) throws IOException {
     SegmentInfos infos = new SegmentInfos();
     infos.read(taxoDir);
-    return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+    return Long.parseLong(infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH));
   }
   
   @Test
@@ -286,7 +299,7 @@ public class TestDirectoryTaxonomyWriter
     taxoWriter.addCategory(new CategoryPath("c"));
     taxoWriter.commit();
     
-    String origCreateTime = getCreateTime(dir);
+    long origEpoch = getEpoch(dir);
     
     // replace the taxonomy with the input one
     taxoWriter.replaceTaxonomy(input);
@@ -298,8 +311,8 @@ public class TestDirectoryTaxonomyWriter
     
     taxoWriter.close();
     
-    String newCreateTime = getCreateTime(dir);
-    assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
+    long newEpoch = getEpoch(dir);
+    assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
     
     dir.close();
     input.close();