You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by vi...@apache.org on 2023/01/18 23:12:20 UTC
[lucene] branch branch_9x updated: Deprecate support for UTF8TaxonomyWriterCache (#12093)
This is an automated email from the ASF dual-hosted git repository.
vigyasharma pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 9d6173148d7 Deprecate support for UTF8TaxonomyWriterCache (#12093)
9d6173148d7 is described below
commit 9d6173148d7a7c25128f86ee61fca00ac88baaf7
Author: Vigya Sharma <vi...@gmail.com>
AuthorDate: Wed Jan 18 15:12:13 2023 -0800
Deprecate support for UTF8TaxonomyWriterCache (#12093)
Deprecate support for UTF8TaxonomyWriterCache and change the default to LruTaxonomyWriterCache.
---
lucene/CHANGES.txt | 3 +++
.../facet/taxonomy/directory/DirectoryTaxonomyWriter.java | 13 ++++++-------
.../facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java | 7 ++++++-
.../taxonomy/directory/TestConcurrentFacetedIndexing.java | 5 ++---
.../taxonomy/directory/TestDirectoryTaxonomyWriter.java | 10 ++++------
5 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a49ea36a082..838452a56ec 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,6 +7,9 @@ http://s.apache.org/luceneversions
API Changes
---------------------
+* GITHUB#12093: Deprecate support for UTF8TaxonomyWriterCache and changed default to LruTaxonomyWriterCache.
+ Please use LruTaxonomyWriterCache instead. (Vigya Sharma)
+
* GITHUB#11998: Add new stored fields and termvectors interfaces: IndexReader.storedFields()
and IndexReader.termVectors(). Deprecate IndexReader.document() and IndexReader.getTermVector().
The new APIs do not rely upon ThreadLocal storage for each index segment, which can greatly
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index de5a78bbf17..d55010019a7 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -43,7 +43,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
@@ -90,6 +89,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
*/
public static final String INDEX_EPOCH = "index.epoch";
+ private static final int DEFAULT_CACHE_SIZE = 4000;
+
private final Directory dir;
private final IndexWriter indexWriter;
private final boolean useOlderFormat;
@@ -139,9 +140,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* APPEND_OR_CREATE</code> appends to an existing index if there is one, otherwise it creates
* a new index.
* @param cache A {@link TaxonomyWriterCache} implementation which determines the in-memory
- * caching policy. See for example {@link LruTaxonomyWriterCache} and {@link
- * UTF8TaxonomyWriterCache}. If null or missing, {@link #defaultTaxonomyWriterCache()} is
- * used.
+ * caching policy. See for example {@link LruTaxonomyWriterCache}. If null or missing, {@link
+ * #defaultTaxonomyWriterCache()} is used.
* @throws CorruptIndexException if the taxonomy is corrupted.
* @throws LockObtainFailedException if the taxonomy is locked by another writer.
* @throws IOException if another error occurred.
@@ -296,11 +296,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* Defines the default {@link TaxonomyWriterCache} to use in constructors which do not specify
* one.
*
- * <p>The current default is {@link UTF8TaxonomyWriterCache}, i.e., the entire taxonomy is cached
- * in memory while building it.
+ * <p>The current default is {@link LruTaxonomyWriterCache}
*/
public static TaxonomyWriterCache defaultTaxonomyWriterCache() {
- return new UTF8TaxonomyWriterCache();
+ return new LruTaxonomyWriterCache(DEFAULT_CACHE_SIZE);
}
/** Create this with {@code OpenMode.CREATE_OR_APPEND}. */
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
index 9d3c3d61970..dd91ba833ab 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
@@ -28,7 +28,12 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
-/** A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding). */
+/**
+ * A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding).
+ *
+ * @deprecated Use {@link LruTaxonomyWriterCache} instead.
+ */
+@Deprecated
public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accountable {
private final ThreadLocal<BytesRefBuilder> bytes =
new ThreadLocal<BytesRefBuilder>() {
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
index c7909001fdb..e3aab92be83 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
@@ -27,7 +27,6 @@ import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
@@ -79,8 +78,8 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
static TaxonomyWriterCache newTaxoWriterCache(int ndocs) {
final double d = random().nextDouble();
if (d < 0.7) {
- // this is the fastest, yet most memory consuming
- return new UTF8TaxonomyWriterCache();
+ // same as LruTaxonomyWriterCache but with the default cache size
+ return DirectoryTaxonomyWriter.defaultTaxonomyWriterCache();
} else if (TEST_NIGHTLY && d > 0.98) {
// this is the slowest, but tests the writer concurrency when no caching is done.
// only pick it during NIGHTLY tests, and even then, with very low chances.
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
index 13115864eb9..eb098f08524 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
@@ -33,7 +33,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -170,7 +169,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
// Verifies that if rollback is called, DTW is closed.
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
- assertTrue(dtw.getCache() instanceof UTF8TaxonomyWriterCache);
+ assertTrue(dtw.getCache() instanceof LruTaxonomyWriterCache);
dtw.addCategory(new FacetLabel("a"));
dtw.rollback();
// should not have succeeded to add a category following rollback.
@@ -300,8 +299,8 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
final double d = random().nextDouble();
final TaxonomyWriterCache cache;
if (d < 0.7) {
- // this is the fastest, yet most memory consuming
- cache = new UTF8TaxonomyWriterCache();
+ // same as LruTaxonomyWriterCache but with the default cache size
+ cache = DirectoryTaxonomyWriter.defaultTaxonomyWriterCache();
} else if (TEST_NIGHTLY && d > 0.98) {
// this is the slowest, but tests the writer concurrency when no caching is done.
// only pick it during NIGHTLY tests, and even then, with very low chances.
@@ -506,8 +505,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
Directory indexDir = newDirectory(), taxoDir = newDirectory();
IndexWriter indexWriter =
new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
- DirectoryTaxonomyWriter taxoWriter =
- new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new UTF8TaxonomyWriterCache());
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
// Add one huge label: