You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2012/05/16 15:11:08 UTC
svn commit: r1339150 - in /lucene/dev/trunk/lucene: ./
facet/src/java/org/apache/lucene/facet/taxonomy/directory/
facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/
facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/ facet/s...
Author: shaie
Date: Wed May 16 13:11:07 2012
New Revision: 1339150
URL: http://svn.apache.org/viewvc?rev=1339150&view=rev
Log:
LUCENE-4061: improvements to DirectoryTaxonomyWriter
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java
lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java
lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed May 16 13:11:07 2012
@@ -914,7 +914,11 @@ Optimizations
FST under the hood, which requires less RAM. NormalizeCharMap no
longer accepts empty string match (it did previously, but ignored
it). (Dawid Weiss, Mike McCandless)
-
+
+* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory
+ and few general improvements to DirectoryTaxonomyWriter.
+ (Shai Erera, Gilad Barkai)
+
Bug fixes
* LUCENE-2803: The FieldCache can miss values if an entry for a reader
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Wed May 16 13:11:07 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.facet.taxonomy.
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
@@ -40,7 +41,6 @@ import org.apache.lucene.index.SegmentIn
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
@@ -348,18 +348,6 @@ public class DirectoryTaxonomyWriter imp
}
/**
- * Returns the number of memory bytes used by the cache.
- * @return Number of cache bytes in memory, for CL2O only; zero otherwise.
- */
- public int getCacheMemoryUsage() {
- ensureOpen();
- if (this.cache == null || !(this.cache instanceof Cl2oTaxonomyWriterCache)) {
- return 0;
- }
- return ((Cl2oTaxonomyWriterCache)this.cache).getMemoryUsage();
- }
-
- /**
* A hook for extending classes to close additional resources that were used.
* The default implementation closes the {@link IndexReader} as well as the
* {@link TaxonomyWriterCache} instances that were used. <br>
@@ -413,21 +401,26 @@ public class DirectoryTaxonomyWriter imp
reader = openReader();
}
- // TODO (Facet): avoid Multi*?
- Bits liveDocs = MultiFields.getLiveDocs(reader);
- DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
- new BytesRef(categoryPath.toString(delimiter)),
- false);
- if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
- return -1; // category does not exist in taxonomy
+ int base = 0;
+ int doc = -1;
+ for (AtomicReader r : reader.getSequentialSubReaders()) {
+ DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
+ new BytesRef(categoryPath.toString(delimiter)), false);
+ if (docs != null) {
+ doc = docs.nextDoc() + base;
+ break;
+ }
+ base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
}
// Note: we do NOT add to the cache the fact that the category
// does not exist. The reason is that our only use for this
// method is just before we actually add this category. If
// in the future this usage changes, we should consider caching
// the fact that the category is not in the taxonomy.
- addToCache(categoryPath, docs.docID());
- return docs.docID();
+ if (doc > 0) {
+ addToCache(categoryPath, doc);
+ }
+ return doc;
}
/**
@@ -436,7 +429,7 @@ public class DirectoryTaxonomyWriter imp
* case the category does not yet exist in the taxonomy.
*/
private int findCategory(CategoryPath categoryPath, int prefixLen)
- throws IOException {
+ throws IOException {
int res = cache.get(categoryPath, prefixLen);
if (res >= 0) {
return res;
@@ -451,36 +444,46 @@ public class DirectoryTaxonomyWriter imp
if (reader == null) {
reader = openReader();
}
- Bits liveDocs = MultiFields.getLiveDocs(reader);
- DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
- new BytesRef(categoryPath.toString(delimiter, prefixLen)),
- false);
- if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
- return -1; // category does not exist in taxonomy
- }
- addToCache(categoryPath, prefixLen, docs.docID());
- return docs.docID();
- }
-
- // TODO (Facet): addCategory() is synchronized. This means that if indexing is
- // multi-threaded, a new category that needs to be written to disk (and
- // potentially even trigger a lengthy merge) locks out other addCategory()
- // calls - even those which could immediately return a cached value.
- // We definitely need to fix this situation!
+
+ int base = 0;
+ int doc = -1;
+ for (AtomicReader r : reader.getSequentialSubReaders()) {
+ DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
+ new BytesRef(categoryPath.toString(delimiter, prefixLen)), false);
+ if (docs != null) {
+ doc = docs.nextDoc() + base;
+ break;
+ }
+ base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
+ }
+
+ if (doc > 0) {
+ addToCache(categoryPath, prefixLen, doc);
+ }
+ return doc;
+ }
+
@Override
- public synchronized int addCategory(CategoryPath categoryPath) throws IOException {
+ public int addCategory(CategoryPath categoryPath) throws IOException {
ensureOpen();
// If the category is already in the cache and/or the taxonomy, we
- // should return its existing ordinal:
+ // should return its existing ordinal
int res = findCategory(categoryPath);
if (res < 0) {
- // This is a new category, and we need to insert it into the index
- // (and the cache). Actually, we might also need to add some of
- // the category's ancestors before we can add the category itself
- // (while keeping the invariant that a parent is always added to
- // the taxonomy before its child). internalAddCategory() does all
- // this recursively:
- res = internalAddCategory(categoryPath, categoryPath.length());
+ // the category is neither in the cache nor in the index - following code
+ // cannot be executed in parallel.
+ synchronized (this) {
+ res = findCategory(categoryPath);
+ if (res < 0) {
+ // This is a new category, and we need to insert it into the index
+ // (and the cache). Actually, we might also need to add some of
+ // the category's ancestors before we can add the category itself
+ // (while keeping the invariant that a parent is always added to
+ // the taxonomy before its child). internalAddCategory() does all
+ // this recursively
+ res = internalAddCategory(categoryPath, categoryPath.length());
+ }
+ }
}
return res;
@@ -496,7 +499,7 @@ public class DirectoryTaxonomyWriter imp
* recursion.
*/
private int internalAddCategory(CategoryPath categoryPath, int length)
- throws CorruptIndexException, IOException {
+ throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
@@ -528,13 +531,12 @@ public class DirectoryTaxonomyWriter imp
}
}
- // Note that the methods calling addCategoryDocument() are synchornized,
- // so this method is effectively synchronized as well, but we'll add
- // synchronized to be on the safe side, and we can reuse class-local objects
- // instead of allocating them every time
- protected synchronized int addCategoryDocument(CategoryPath categoryPath,
- int length, int parent)
- throws CorruptIndexException, IOException {
+ /**
+ * Note that the methods calling addCategoryDocument() are synchornized, so
+ * this method is effectively synchronized as well.
+ */
+ private int addCategoryDocument(CategoryPath categoryPath, int length,
+ int parent) throws IOException {
// Before Lucene 2.9, position increments >=0 were supported, so we
// added 1 to parent to allow the parent -1 (the parent of the root).
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
@@ -544,7 +546,7 @@ public class DirectoryTaxonomyWriter imp
// we write here (e.g., to write parent+2), and need to do a workaround
// in the reader (which knows that anyway only category 0 has a parent
// -1).
- parentStream.set(parent+1);
+ parentStream.set(parent + 1);
Document d = new Document();
d.add(parentStreamField);
@@ -601,8 +603,7 @@ public class DirectoryTaxonomyWriter imp
}
}
- private void addToCache(CategoryPath categoryPath, int id)
- throws CorruptIndexException, IOException {
+ private void addToCache(CategoryPath categoryPath, int id) throws IOException {
if (cache.put(categoryPath, id)) {
// If cache.put() returned true, it means the cache was limited in
// size, became full, so parts of it had to be cleared.
@@ -620,7 +621,7 @@ public class DirectoryTaxonomyWriter imp
}
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
- throws CorruptIndexException, IOException {
+ throws IOException {
if (cache.put(categoryPath, prefixLen, id)) {
refreshReader();
cacheIsComplete = false;
@@ -766,7 +767,29 @@ public class DirectoryTaxonomyWriter imp
}
CategoryPath cp = new CategoryPath();
- Terms terms = MultiFields.getTerms(reader, Consts.FULL);
+ TermsEnum termsEnum = null;
+ DocsEnum docsEnum = null;
+ int base = 0;
+ for (AtomicReader r : reader.getSequentialSubReaders()) {
+ Terms terms = r.terms(Consts.FULL);
+ if (terms != null) { // cannot really happen, but be on the safe side
+ termsEnum = terms.iterator(termsEnum);
+ while (termsEnum.next() != null) {
+ BytesRef t = termsEnum.term();
+ // Since we guarantee uniqueness of categories, each term has exactly
+ // one document. Also, since we do not allow removing categories (and
+ // hence documents), there are no deletions in the index. Therefore, it
+ // is sufficient to call next(), and then doc(), exactly once with no
+ // 'validation' checks.
+ cp.clear();
+ cp.add(t.utf8ToString(), delimiter);
+ docsEnum = termsEnum.docs(null, docsEnum, false);
+ cache.put(cp, docsEnum.nextDoc() + base);
+ }
+ }
+ base += r.maxDoc(); // we don't have any deletions, so we're ok
+ }
+ /*Terms terms = MultiFields.getTerms(reader, Consts.FULL);
// The check is done here to avoid checking it on every iteration of the
// below loop. A null term wlil be returned if there are no terms in the
// lexicon, or after the Consts.FULL term. However while the loop is
@@ -786,11 +809,10 @@ public class DirectoryTaxonomyWriter imp
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
docsEnum.nextDoc();
cp.clear();
- // TODO (Facet): avoid String creation/use bytes?
cp.add(t.utf8ToString(), delimiter);
cache.put(cp, docsEnum.docID());
}
- }
+ }*/
cacheIsComplete = true;
// No sense to keep the reader open - we will not need to read from it
@@ -832,35 +854,34 @@ public class DirectoryTaxonomyWriter imp
*/
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
ensureOpen();
- IndexReader r = DirectoryReader.open(taxoDir);
+ DirectoryReader r = DirectoryReader.open(taxoDir);
try {
final int size = r.numDocs();
final OrdinalMap ordinalMap = map;
ordinalMap.setSize(size);
CategoryPath cp = new CategoryPath();
- Terms terms = MultiFields.getTerms(r, Consts.FULL);
- TermsEnum te = terms.iterator(null);
- Bits liveDocs = MultiFields.getLiveDocs(r);
+ int base = 0;
+ TermsEnum te = null;
DocsEnum docs = null;
- // we call next() first, to skip the root category which always exists.
- while (te.next() != null) {
- String value = te.term().utf8ToString();
- cp.clear();
- cp.add(value, Consts.DEFAULT_DELIMITER);
- int ordinal = findCategory(cp);
- if (ordinal < 0) {
- // NOTE: call addCategory so that it works well in a multi-threaded
- // environment, in case e.g. a thread just added the category, after
- // the findCategory() call above failed to find it.
- ordinal = addCategory(cp);
+ for (AtomicReader ar : r.getSequentialSubReaders()) {
+ Terms terms = ar.terms(Consts.FULL);
+ te = terms.iterator(te);
+ while (te.next() != null) {
+ String value = te.term().utf8ToString();
+ cp.clear();
+ cp.add(value, Consts.DEFAULT_DELIMITER);
+ int ordinal = findCategory(cp);
+ if (ordinal < 0) {
+ // NOTE: call addCategory so that it works well in a multi-threaded
+ // environment, in case e.g. a thread just added the category, after
+ // the findCategory() call above failed to find it.
+ ordinal = addCategory(cp);
+ }
+ docs = te.docs(null, docs, false);
+ ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
}
- docs = te.docs(liveDocs, docs, false);
- ordinalMap.addMapping(docs.nextDoc(), ordinal);
+ base += ar.maxDoc(); // no deletions, so we're ok
}
- // we must add the root ordinal map, so that the map will be complete
- // (otherwise e.g. DiskOrdinalMap may fail because it expects more
- // categories to exist in the file).
- ordinalMap.addMapping(0, 0);
ordinalMap.addDone();
} finally {
r.close();
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java Wed May 16 13:11:07 2012
@@ -1,5 +1,8 @@
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
@@ -30,44 +33,71 @@ import org.apache.lucene.facet.taxonomy.
*/
public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
+ private final ReadWriteLock lock = new ReentrantReadWriteLock();
private CompactLabelToOrdinal cache;
public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
}
- public void close() {
- cache=null;
+ @Override
+ public synchronized void close() {
+ cache = null;
}
+ @Override
public boolean hasRoom(int n) {
// This cache is unlimited, so we always have room for remembering more:
return true;
}
+ @Override
public int get(CategoryPath categoryPath) {
- return cache.getOrdinal(categoryPath);
+ lock.readLock().lock();
+ try {
+ return cache.getOrdinal(categoryPath);
+ } finally {
+ lock.readLock().unlock();
+ }
}
+ @Override
public int get(CategoryPath categoryPath, int length) {
- if (length<0 || length>categoryPath.length()) {
+ if (length < 0 || length > categoryPath.length()) {
length = categoryPath.length();
}
- return cache.getOrdinal(categoryPath, length);
+ lock.readLock().lock();
+ try {
+ return cache.getOrdinal(categoryPath, length);
+ } finally {
+ lock.readLock().unlock();
+ }
}
+ @Override
public boolean put(CategoryPath categoryPath, int ordinal) {
- cache.addLabel(categoryPath, ordinal);
- // Tell the caller we didn't clear part of the cache, so it doesn't
- // have to flush its on-disk index now
- return false;
+ lock.writeLock().lock();
+ try {
+ cache.addLabel(categoryPath, ordinal);
+ // Tell the caller we didn't clear part of the cache, so it doesn't
+ // have to flush its on-disk index now
+ return false;
+ } finally {
+ lock.writeLock().unlock();
+ }
}
+ @Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
- cache.addLabel(categoryPath, prefixLen, ordinal);
- // Tell the caller we didn't clear part of the cache, so it doesn't
- // have to flush its on-disk index now
- return false;
+ lock.writeLock().lock();
+ try {
+ cache.addLabel(categoryPath, prefixLen, ordinal);
+ // Tell the caller we didn't clear part of the cache, so it doesn't
+ // have to flush its on-disk index now
+ return false;
+ } finally {
+ lock.writeLock().unlock();
+ }
}
/**
@@ -75,8 +105,7 @@ public class Cl2oTaxonomyWriterCache imp
* @return Number of bytes in memory used by this object.
*/
public int getMemoryUsage() {
- int memoryUsage = (this.cache == null) ? 0 : this.cache.getMemoryUsage();
- return memoryUsage;
+ return cache == null ? 0 : cache.getMemoryUsage();
}
}
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java Wed May 16 13:11:07 2012
@@ -60,16 +60,19 @@ public class LruTaxonomyWriterCache impl
}
}
- public boolean hasRoom(int n) {
- return n<=(cache.getMaxSize()-cache.getSize());
+ @Override
+ public synchronized boolean hasRoom(int n) {
+ return n <= (cache.getMaxSize() - cache.getSize());
}
- public void close() {
+ @Override
+ public synchronized void close() {
cache.clear();
- cache=null;
+ cache = null;
}
- public int get(CategoryPath categoryPath) {
+ @Override
+ public synchronized int get(CategoryPath categoryPath) {
Integer res = cache.get(categoryPath);
if (res == null) {
return -1;
@@ -78,7 +81,8 @@ public class LruTaxonomyWriterCache impl
return res.intValue();
}
- public int get(CategoryPath categoryPath, int length) {
+ @Override
+ public synchronized int get(CategoryPath categoryPath, int length) {
if (length<0 || length>categoryPath.length()) {
length = categoryPath.length();
}
@@ -94,7 +98,8 @@ public class LruTaxonomyWriterCache impl
return res.intValue();
}
- public boolean put(CategoryPath categoryPath, int ordinal) {
+ @Override
+ public synchronized boolean put(CategoryPath categoryPath, int ordinal) {
boolean ret = cache.put(categoryPath, new Integer(ordinal));
// If the cache is full, we need to clear one or more old entries
// from the cache. However, if we delete from the cache a recent
@@ -109,7 +114,8 @@ public class LruTaxonomyWriterCache impl
return ret;
}
- public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
+ @Override
+ public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
// If the cache is full, we need to clear one or more old entries
// from the cache. However, if we delete from the cache a recent
@@ -125,4 +131,3 @@ public class LruTaxonomyWriterCache impl
}
}
-
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java Wed May 16 13:11:07 2012
@@ -3,6 +3,7 @@ package org.apache.lucene.facet.taxonomy
import java.io.IOException;
import java.util.HashSet;
import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
@@ -32,16 +33,32 @@ import org.apache.lucene.util._TestUtil;
public class TestAddTaxonomy extends LuceneTestCase {
- private void dotest(int ncats, int range) throws Exception {
+ private void dotest(int ncats, final int range) throws Exception {
+ final AtomicInteger numCats = new AtomicInteger(ncats);
Directory dirs[] = new Directory[2];
- Random random = random();
for (int i = 0; i < dirs.length; i++) {
dirs[i] = newDirectory();
- DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
- for (int j = 0; j < ncats; j++) {
- String cat = Integer.toString(random.nextInt(range));
- tw.addCategory(new CategoryPath("a", cat));
+ final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
+ Thread[] addThreads = new Thread[4];
+ for (int j = 0; j < addThreads.length; j++) {
+ addThreads[j] = new Thread() {
+ @Override
+ public void run() {
+ Random random = random();
+ while (numCats.decrementAndGet() > 0) {
+ String cat = Integer.toString(random.nextInt(range));
+ try {
+ tw.addCategory(new CategoryPath("a", cat));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ };
}
+
+ for (Thread t : addThreads) t.start();
+ for (Thread t : addThreads) t.join();
tw.close();
}
@@ -133,11 +150,9 @@ public class TestAddTaxonomy extends Luc
}
// A more comprehensive and big random test.
- @Nightly
public void testBig() throws Exception {
dotest(200, 10000);
dotest(1000, 20000);
- // really big
dotest(400000, 1000000);
}
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1339150&r1=1339149&r2=1339150&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Wed May 16 13:11:07 2012
@@ -3,11 +3,16 @@ package org.apache.lucene.facet.taxonomy
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@@ -42,11 +47,17 @@ public class TestDirectoryTaxonomyWriter
NoOpCache() { }
+ @Override
public void close() {}
+ @Override
public int get(CategoryPath categoryPath) { return -1; }
+ @Override
public int get(CategoryPath categoryPath, int length) { return get(categoryPath); }
+ @Override
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
+ @Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
+ @Override
public boolean hasRoom(int numberOfEntries) { return false; }
}
@@ -201,5 +212,48 @@ public class TestDirectoryTaxonomyWriter
dir.close();
}
+
+ public void testConcurrency() throws Exception {
+ int ncats = atLeast(100000); // add many categories
+ final int range = ncats * 3; // affects the categories selection
+ final AtomicInteger numCats = new AtomicInteger(ncats);
+ Directory dir = newDirectory();
+ final ConcurrentHashMap<Integer,Integer> values = new ConcurrentHashMap<Integer,Integer>();
+ TaxonomyWriterCache cache = random().nextBoolean()
+ ? new Cl2oTaxonomyWriterCache(1024, 0.15f, 3)
+ : new LruTaxonomyWriterCache(ncats / 10);
+ final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
+ Thread[] addThreads = new Thread[atLeast(4)];
+ for (int z = 0; z < addThreads.length; z++) {
+ addThreads[z] = new Thread() {
+ @Override
+ public void run() {
+ Random random = random();
+ while (numCats.decrementAndGet() > 0) {
+ try {
+ int value = random.nextInt(range);
+ tw.addCategory(new CategoryPath("a", Integer.toString(value)));
+ values.put(value, value);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ };
+ }
+
+ for (Thread t : addThreads) t.start();
+ for (Thread t : addThreads) t.join();
+ tw.close();
+
+ DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
+ assertEquals(values.size() + 2, dtr.getSize()); // +2 for root category + "a"
+ for (Integer value : values.keySet()) {
+ assertTrue("category not found a/" + value, dtr.getOrdinal(new CategoryPath("a", value.toString())) > 0);
+ }
+ dtr.close();
+
+ dir.close();
+ }
}