You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by vi...@apache.org on 2023/01/18 23:12:20 UTC

[lucene] branch branch_9x updated: Deprecate support for UTF8TaxonomyWriterCache (#12093)

This is an automated email from the ASF dual-hosted git repository.

vigyasharma pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 9d6173148d7 Deprecate support for  UTF8TaxonomyWriterCache (#12093)
9d6173148d7 is described below

commit 9d6173148d7a7c25128f86ee61fca00ac88baaf7
Author: Vigya Sharma <vi...@gmail.com>
AuthorDate: Wed Jan 18 15:12:13 2023 -0800

    Deprecate support for  UTF8TaxonomyWriterCache (#12093)
    
    Deprecate support for UTF8TaxonomyWriterCache and change the default to LruTaxonomyWriterCache.
---
 lucene/CHANGES.txt                                          |  3 +++
 .../facet/taxonomy/directory/DirectoryTaxonomyWriter.java   | 13 ++++++-------
 .../facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java |  7 ++++++-
 .../taxonomy/directory/TestConcurrentFacetedIndexing.java   |  5 ++---
 .../taxonomy/directory/TestDirectoryTaxonomyWriter.java     | 10 ++++------
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a49ea36a082..838452a56ec 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,6 +7,9 @@ http://s.apache.org/luceneversions
 
 API Changes
 ---------------------
+* GITHUB#12093: Deprecate support for UTF8TaxonomyWriterCache and changed default to LruTaxonomyWriterCache.
+  Please use LruTaxonomyWriterCache instead. (Vigya Sharma)
+
 * GITHUB#11998: Add new stored fields and termvectors interfaces: IndexReader.storedFields()
   and IndexReader.termVectors(). Deprecate IndexReader.document() and IndexReader.getTermVector().
   The new APIs do not rely upon ThreadLocal storage for each index segment, which can greatly
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index de5a78bbf17..d55010019a7 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -43,7 +43,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
 import org.apache.lucene.index.CorruptIndexException; // javadocs
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -90,6 +89,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    */
   public static final String INDEX_EPOCH = "index.epoch";
 
+  private static final int DEFAULT_CACHE_SIZE = 4000;
+
   private final Directory dir;
   private final IndexWriter indexWriter;
   private final boolean useOlderFormat;
@@ -139,9 +140,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    *     APPEND_OR_CREATE</code> appends to an existing index if there is one, otherwise it creates
    *     a new index.
    * @param cache A {@link TaxonomyWriterCache} implementation which determines the in-memory
-   *     caching policy. See for example {@link LruTaxonomyWriterCache} and {@link
-   *     UTF8TaxonomyWriterCache}. If null or missing, {@link #defaultTaxonomyWriterCache()} is
-   *     used.
+   *     caching policy. See for example {@link LruTaxonomyWriterCache}. If null or missing, {@link
+   *     #defaultTaxonomyWriterCache()} is used.
    * @throws CorruptIndexException if the taxonomy is corrupted.
    * @throws LockObtainFailedException if the taxonomy is locked by another writer.
    * @throws IOException if another error occurred.
@@ -296,11 +296,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    * Defines the default {@link TaxonomyWriterCache} to use in constructors which do not specify
    * one.
    *
-   * <p>The current default is {@link UTF8TaxonomyWriterCache}, i.e., the entire taxonomy is cached
-   * in memory while building it.
+   * <p>The current default is {@link LruTaxonomyWriterCache}
    */
   public static TaxonomyWriterCache defaultTaxonomyWriterCache() {
-    return new UTF8TaxonomyWriterCache();
+    return new LruTaxonomyWriterCache(DEFAULT_CACHE_SIZE);
   }
 
   /** Create this with {@code OpenMode.CREATE_OR_APPEND}. */
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
index 9d3c3d61970..dd91ba833ab 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
@@ -28,7 +28,12 @@ import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.UnicodeUtil;
 
-/** A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding). */
+/**
+ * A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding).
+ *
+ * @deprecated Use {@link LruTaxonomyWriterCache} instead.
+ */
+@Deprecated
 public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accountable {
   private final ThreadLocal<BytesRefBuilder> bytes =
       new ThreadLocal<BytesRefBuilder>() {
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
index c7909001fdb..e3aab92be83 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
@@ -27,7 +27,6 @@ import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.Directory;
@@ -79,8 +78,8 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
   static TaxonomyWriterCache newTaxoWriterCache(int ndocs) {
     final double d = random().nextDouble();
     if (d < 0.7) {
-      // this is the fastest, yet most memory consuming
-      return new UTF8TaxonomyWriterCache();
+      // same as LruTaxonomyWriterCache but with the default cache size
+      return DirectoryTaxonomyWriter.defaultTaxonomyWriterCache();
     } else if (TEST_NIGHTLY && d > 0.98) {
       // this is the slowest, but tests the writer concurrency when no caching is done.
       // only pick it during NIGHTLY tests, and even then, with very low chances.
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
index 13115864eb9..eb098f08524 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
@@ -33,7 +33,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
 import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@@ -170,7 +169,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
     // Verifies that if rollback is called, DTW is closed.
     Directory dir = newDirectory();
     DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
-    assertTrue(dtw.getCache() instanceof UTF8TaxonomyWriterCache);
+    assertTrue(dtw.getCache() instanceof LruTaxonomyWriterCache);
     dtw.addCategory(new FacetLabel("a"));
     dtw.rollback();
     // should not have succeeded to add a category following rollback.
@@ -300,8 +299,8 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
     final double d = random().nextDouble();
     final TaxonomyWriterCache cache;
     if (d < 0.7) {
-      // this is the fastest, yet most memory consuming
-      cache = new UTF8TaxonomyWriterCache();
+      // same as LruTaxonomyWriterCache but with the default cache size
+      cache = DirectoryTaxonomyWriter.defaultTaxonomyWriterCache();
     } else if (TEST_NIGHTLY && d > 0.98) {
       // this is the slowest, but tests the writer concurrency when no caching is done.
       // only pick it during NIGHTLY tests, and even then, with very low chances.
@@ -506,8 +505,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
     Directory indexDir = newDirectory(), taxoDir = newDirectory();
     IndexWriter indexWriter =
         new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
-    DirectoryTaxonomyWriter taxoWriter =
-        new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new UTF8TaxonomyWriterCache());
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
     FacetsConfig config = new FacetsConfig();
 
     // Add one huge label: