You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2012/11/07 11:50:15 UTC
svn commit: r1406539 - in /lucene/dev/trunk/lucene: ./
facet/src/java/org/apache/lucene/facet/taxonomy/directory/
facet/src/test/org/apache/lucene/facet/taxonomy/directory/
Author: shaie
Date: Wed Nov 7 10:50:14 2012
New Revision: 1406539
URL: http://svn.apache.org/viewvc?rev=1406539&view=rev
Log:
LUCENE-4532: fix DirectoryTaxonomyWriter to track the taxonomy epoch, rather than create timestamp
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov 7 10:50:14 2012
@@ -135,6 +135,11 @@ Bug Fixes
* LUCENE-4534: Fixed WFSTCompletionLookup and Analyzing/FuzzySuggester
to allow 0 byte values in the lookup keys. (Mike McCandless)
+* LUCENE-4532: DirectoryTaxonomyWriter use a timestamp to denote taxonomy
+ index re-creation, which could cause a bug in case machine clocks were
+ not synced. Instead, it now tracks an 'epoch' version, which is incremented
+ whenever the taxonomy is re-created, or replaced. (Shai Erera)
+
Optimizations
* LUCENE-4512: Additional memory savings for CompressingStoredFieldsFormat.
@@ -374,7 +379,7 @@ Bug Fixes
many cases where IndexWriter could leave leftover files (on
exception in various places, on reuse of a segment name after crash
and recovery. (Uwe Schindler, Robert Muir, Mike McCandless)
-
+
Optimizations
* LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java Wed Nov 7 10:50:14 2012
@@ -174,6 +174,7 @@ public class DirectoryTaxonomyReader imp
this.delimiter = delimiter;
}
+ @Override
public int getOrdinal(CategoryPath categoryPath) throws IOException {
ensureOpen();
if (categoryPath.length()==0) {
@@ -218,6 +219,7 @@ public class DirectoryTaxonomyReader imp
return ret;
}
+ @Override
public CategoryPath getPath(int ordinal) throws IOException {
ensureOpen();
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
@@ -235,6 +237,7 @@ public class DirectoryTaxonomyReader imp
return new CategoryPath(label, delimiter);
}
+ @Override
public boolean getPath(int ordinal, CategoryPath result) throws IOException {
ensureOpen();
String label = getLabel(ordinal);
@@ -296,6 +299,7 @@ public class DirectoryTaxonomyReader imp
return ret;
}
+ @Override
public int getParent(int ordinal) {
ensureOpen();
// Note how we don't need to hold the read lock to do the following,
@@ -327,6 +331,7 @@ public class DirectoryTaxonomyReader imp
* so you should always call getParentArray() again after a refresh().
*/
+ @Override
public int[] getParentArray() {
ensureOpen();
// Note how we don't need to hold the read lock to do the following,
@@ -339,6 +344,7 @@ public class DirectoryTaxonomyReader imp
// Note that refresh() is synchronized (it is the only synchronized
// method in this class) to ensure that it never gets called concurrently
// with itself.
+ @Override
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
ensureOpen();
/*
@@ -357,67 +363,70 @@ public class DirectoryTaxonomyReader imp
DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
if (r2 == null) {
return false; // no changes, nothing to do
- }
+ }
// validate that a refresh is valid at this point, i.e. that the taxonomy
// was not recreated since this reader was last opened or refresshed.
- String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
- String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
- if (t1==null) {
- if (t2!=null) {
+ String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
+ String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
+ if (t1 == null) {
+ if (t2 != null) {
r2.close();
- throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
+ throw new InconsistentTaxonomyException("Taxonomy was recreated, epoch= " + t2);
}
} else if (!t1.equals(t2)) {
+ // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
+ // it's ok to use String.equals because we require the two epoch values to be the same.
r2.close();
- throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
+ throw new InconsistentTaxonomyException("Taxonomy was recreated epoch = " + t2 + " != " + t1);
}
- IndexReader oldreader = indexReader;
- // we can close the old searcher, but need to synchronize this
- // so that we don't close it in the middle that another routine
- // is reading from it.
- indexReaderLock.writeLock().lock();
- indexReader = r2;
- indexReaderLock.writeLock().unlock();
- // We can close the old reader, but need to be certain that we
- // don't close it while another method is reading from it.
- // Luckily, we can be certain of that even without putting the
- // oldreader.close() in the locked section. The reason is that
- // after lock() succeeded above, we know that all existing readers
- // had finished (this is what a read-write lock ensures). New
- // readers, starting after the unlock() we just did, already got
- // the new indexReader we set above. So nobody can be possibly
- // using the old indexReader, and we can close it:
- oldreader.close();
-
- // We prefetch some of the arrays to make requests much faster.
- // Let's refresh these prefetched arrays; This refresh is much
- // is made more efficient by assuming that it is enough to read
- // the values for new categories (old categories could not have been
- // changed or deleted)
- // Note that this this done without the write lock being held,
- // which means that it is possible that during a refresh(), a
- // reader will have some methods (like getOrdinal and getCategory)
- // return fresh information, while getParent()
- // (only to be prefetched now) still return older information.
- // We consider this to be acceptable. The important thing,
- // however, is that refreshPrefetchArrays() itself writes to
- // the arrays in a correct manner (see discussion there)
- parentArray.refresh(indexReader);
-
- // Remove any INVALID_ORDINAL values from the ordinal cache,
- // because it is possible those are now answered by the new data!
- Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
- while (i.hasNext()) {
- Entry<String, Integer> e = i.next();
- if (e.getValue().intValue() == INVALID_ORDINAL) {
- i.remove();
- }
+ IndexReader oldreader = indexReader;
+ // we can close the old searcher, but need to synchronize this
+ // so that we don't close it in the middle that another routine
+ // is reading from it.
+ indexReaderLock.writeLock().lock();
+ indexReader = r2;
+ indexReaderLock.writeLock().unlock();
+ // We can close the old reader, but need to be certain that we
+ // don't close it while another method is reading from it.
+ // Luckily, we can be certain of that even without putting the
+ // oldreader.close() in the locked section. The reason is that
+ // after lock() succeeded above, we know that all existing readers
+ // had finished (this is what a read-write lock ensures). New
+ // readers, starting after the unlock() we just did, already got
+ // the new indexReader we set above. So nobody can be possibly
+ // using the old indexReader, and we can close it:
+ oldreader.close();
+
+ // We prefetch some of the arrays to make requests much faster.
+ // Let's refresh these prefetched arrays; This refresh is much
+ // is made more efficient by assuming that it is enough to read
+ // the values for new categories (old categories could not have been
+ // changed or deleted)
+ // Note that this this done without the write lock being held,
+ // which means that it is possible that during a refresh(), a
+ // reader will have some methods (like getOrdinal and getCategory)
+ // return fresh information, while getParent()
+ // (only to be prefetched now) still return older information.
+ // We consider this to be acceptable. The important thing,
+ // however, is that refreshPrefetchArrays() itself writes to
+ // the arrays in a correct manner (see discussion there)
+ parentArray.refresh(indexReader);
+
+ // Remove any INVALID_ORDINAL values from the ordinal cache,
+ // because it is possible those are now answered by the new data!
+ Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
+ while (i.hasNext()) {
+ Entry<String, Integer> e = i.next();
+ if (e.getValue().intValue() == INVALID_ORDINAL) {
+ i.remove();
}
- return true;
}
+ return true;
+ }
+ @Override
public void close() throws IOException {
if (!closed) {
synchronized (this) {
@@ -440,6 +449,7 @@ public class DirectoryTaxonomyReader imp
ordinalCache.clear();
}
+ @Override
public int getSize() {
ensureOpen();
indexReaderLock.readLock().lock();
@@ -450,6 +460,7 @@ public class DirectoryTaxonomyReader imp
}
}
+ @Override
public Map<String, String> getCommitUserData() throws IOException {
ensureOpen();
return indexReader.getIndexCommit().getUserData();
@@ -458,6 +469,7 @@ public class DirectoryTaxonomyReader imp
private ChildrenArrays childrenArrays;
Object childrenArraysRebuild = new Object();
+ @Override
public ChildrenArrays getChildrenArrays() {
ensureOpen();
// Check if the taxonomy grew since we built the array, and if it
@@ -543,9 +555,11 @@ public class DirectoryTaxonomyReader imp
this.youngestChildArray = youngestChildArray;
this.olderSiblingArray = olderSiblingArray;
}
+ @Override
public int[] getOlderSiblingArray() {
return olderSiblingArray;
}
+ @Override
public int[] getYoungestChildArray() {
return youngestChildArray;
}
@@ -567,6 +581,7 @@ public class DirectoryTaxonomyReader imp
* Expert: decreases the refCount of this TaxonomyReader instance. If the
* refCount drops to 0, then this reader is closed.
*/
+ @Override
public void decRef() throws IOException {
ensureOpen();
final int rc = refCount.decrementAndGet();
@@ -587,6 +602,7 @@ public class DirectoryTaxonomyReader imp
}
/** Expert: returns the current refCount for this taxonomy reader */
+ @Override
public int getRefCount() {
return refCount.get();
}
@@ -598,6 +614,7 @@ public class DirectoryTaxonomyReader imp
* Be sure to always call a corresponding decRef(), in a finally clause;
* otherwise the reader may never be closed.
*/
+ @Override
public void incRef() {
ensureOpen();
refCount.incrementAndGet();
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Wed Nov 7 10:50:14 2012
@@ -86,23 +86,24 @@ import org.apache.lucene.util.Version;
* @lucene.experimental
*/
public class DirectoryTaxonomyWriter implements TaxonomyWriter {
-
+
/**
- * Property name of user commit data that contains the creation time of a
- * taxonomy index.
+ * Property name of user commit data that contains the index epoch. The epoch
+ * changes whenever the taxonomy is recreated (i.e. opened with
+ * {@link OpenMode#CREATE}.
* <p>
* Applications should not use this property in their commit data because it
* will be overridden by this taxonomy writer.
*/
- public static final String INDEX_CREATE_TIME = "index.create.time";
-
+ public static final String INDEX_EPOCH = "index.epoch";
+
private final Directory dir;
private final IndexWriter indexWriter;
private final TaxonomyWriterCache cache;
private final AtomicInteger cacheMisses = new AtomicInteger(0);
- /** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */
- private String createTime;
+ // Records the taxonomy index epoch, updated on replaceTaxonomy as well.
+ private long indexEpoch;
private char delimiter = Consts.DEFAULT_DELIMITER;
private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT);
@@ -200,28 +201,34 @@ public class DirectoryTaxonomyWriter imp
public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode,
TaxonomyWriterCache cache) throws IOException {
- if (!DirectoryReader.indexExists(directory) || openMode==OpenMode.CREATE) {
- createTime = Long.toString(System.nanoTime());
- } else {
- Map<String, String> commitData = readCommitData(directory);
- if (commitData != null) {
- // It is ok if an existing index doesn't have commitData, or the
- // INDEX_CREATE_TIME property. If ever it will be recreated, we'll set
- // createTime accordingly in the above 'if'.
- createTime = commitData.get(INDEX_CREATE_TIME);
- } else {
- createTime = null;
- }
- }
-
dir = directory;
IndexWriterConfig config = createIndexWriterConfig(openMode);
indexWriter = openIndexWriter(dir, config);
-
+
// verify (to some extent) that merge policy in effect would preserve category docids
assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) :
"for preserving category docids, merging none-adjacent segments is not allowed";
+ // after we opened the writer, and the index is locked, it's safe to check
+ // the commit data and read the index epoch
+ openMode = config.getOpenMode();
+ if (!DirectoryReader.indexExists(directory)) {
+ indexEpoch = 1;
+ } else {
+ String epochStr = null;
+ Map<String, String> commitData = readCommitData(directory);
+ if (commitData != null) {
+ epochStr = commitData.get(INDEX_EPOCH);
+ }
+ // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
+ // of a better value.
+ indexEpoch = epochStr == null ? 1 : Long.parseLong(epochStr);
+ }
+
+ if (openMode == OpenMode.CREATE) {
+ ++indexEpoch;
+ }
+
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
@@ -670,9 +677,7 @@ public class DirectoryTaxonomyWriter imp
if (userData != null) {
m.putAll(userData);
}
- if (createTime != null) {
- m.put(INDEX_CREATE_TIME, createTime);
- }
+ m.put(INDEX_EPOCH, Long.toString(indexEpoch));
return m;
}
@@ -1022,8 +1027,8 @@ public class DirectoryTaxonomyWriter imp
cacheIsComplete = false;
shouldFillCache = true;
- // update createTime as a taxonomy replace is just like it has be recreated
- createTime = Long.toString(System.nanoTime());
+ // update indexEpoch as a taxonomy replace is just like it has be recreated
+ ++indexEpoch;
}
/** Returns the {@link Directory} of this taxonomy writer. */
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1406539&r1=1406538&r2=1406539&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Wed Nov 7 10:50:14 2012
@@ -96,10 +96,10 @@ public class TestDirectoryTaxonomyWriter
Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
assertTrue("wrong value extracted from commit data",
"1 2 3".equals(readUserCommitData.get("testing")));
- assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+ assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
r.close();
- // open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist
+ // open DirTaxoWriter again and commit, INDEX_EPOCH should still exist
// in the commit data, otherwise DirTaxoReader.refresh() might not detect
// that the taxonomy index has been recreated.
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
@@ -111,7 +111,7 @@ public class TestDirectoryTaxonomyWriter
r = DirectoryReader.open(dir);
readUserCommitData = r.getIndexCommit().getUserData();
- assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+ assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
r.close();
dir.close();
@@ -119,7 +119,7 @@ public class TestDirectoryTaxonomyWriter
@Test
public void testRollback() throws Exception {
- // Verifies that if callback is called, DTW is closed.
+ // Verifies that if rollback is called, DTW is closed.
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
dtw.addCategory(new CategoryPath("a"));
@@ -130,6 +130,19 @@ public class TestDirectoryTaxonomyWriter
} catch (AlreadyClosedException e) {
// expected
}
+
+ dir.close();
+ }
+
+ @Test
+ public void testRecreateRollback() throws Exception {
+ // Tests rollback with OpenMode.CREATE
+ Directory dir = newDirectory();
+ new DirectoryTaxonomyWriter(dir).close();
+ assertEquals(1, getEpoch(dir));
+ new DirectoryTaxonomyWriter(dir, OpenMode.CREATE).rollback();
+ assertEquals(1, getEpoch(dir));
+
dir.close();
}
@@ -157,7 +170,7 @@ public class TestDirectoryTaxonomyWriter
@Test
public void testRecreateAndRefresh() throws Exception {
- // DirTaxoWriter lost the INDEX_CREATE_TIME property if it was opened in
+ // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
// CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
// DirTaxoReader succeeding to refresh().
Directory dir = newDirectory();
@@ -172,7 +185,7 @@ public class TestDirectoryTaxonomyWriter
// this should not fail
taxoReader.refresh();
- // now recreate the taxonomy, and check that the timestamp is preserved after opening DirTW again.
+ // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
touchTaxo(taxoWriter, new CategoryPath("c"));
@@ -195,19 +208,19 @@ public class TestDirectoryTaxonomyWriter
}
@Test
- public void testUndefinedCreateTime() throws Exception {
- // tests that if the taxonomy index doesn't have the INDEX_CREATE_TIME
+ public void testBackwardsCompatibility() throws Exception {
+ // tests that if the taxonomy index doesn't have the INDEX_EPOCH
// property (supports pre-3.6 indexes), all still works.
Directory dir = newDirectory();
- // create an empty index first, so that DirTaxoWriter initializes createTime to null.
+ // create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1.
new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
- // we cannot commit null keys/values, this ensures that if DirTW.createTime is null, we can still commit.
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+ assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
taxoReader.refresh();
taxoReader.close();
@@ -267,10 +280,10 @@ public class TestDirectoryTaxonomyWriter
dir.close();
}
- private String getCreateTime(Directory taxoDir) throws IOException {
+ private long getEpoch(Directory taxoDir) throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(taxoDir);
- return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+ return Long.parseLong(infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH));
}
@Test
@@ -286,7 +299,7 @@ public class TestDirectoryTaxonomyWriter
taxoWriter.addCategory(new CategoryPath("c"));
taxoWriter.commit();
- String origCreateTime = getCreateTime(dir);
+ long origEpoch = getEpoch(dir);
// replace the taxonomy with the input one
taxoWriter.replaceTaxonomy(input);
@@ -298,8 +311,8 @@ public class TestDirectoryTaxonomyWriter
taxoWriter.close();
- String newCreateTime = getCreateTime(dir);
- assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
+ long newEpoch = getEpoch(dir);
+ assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
dir.close();
input.close();