You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/07/29 17:12:39 UTC

[lucene] 02/06: Use BDV or a StoredField based on the Lucene version that has created the last index commit

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 162131ecf82bf84cb113d934a1af51842c78bbdf
Author: Gautam Worah <ga...@amazon.com>
AuthorDate: Mon Jul 26 11:50:49 2021 -0700

    Use BDV or a StoredField based on the Lucene version that has created
    the last index commit
    
    If the Lucene version was < 9 then use a StringField or else
    if the index is fresh or if the index is was built using a
    version >= 9, then use a BDV field.
---
 .../lucene/facet/taxonomy/directory/Consts.java       |   1 -
 .../taxonomy/directory/DirectoryTaxonomyReader.java   |   2 +-
 .../taxonomy/directory/DirectoryTaxonomyWriter.java   |  15 ++++++++++++++-
 .../directory/TestBackwardsCompatibility.java         |  18 ++++++++++--------
 .../facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip  | Bin 0 -> 3092 bytes
 .../facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip   | Bin 3058 -> 0 bytes
 6 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
index 5bc253f..104bfdf 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
@@ -21,7 +21,6 @@ import org.apache.lucene.util.BytesRef;
 /** @lucene.experimental */
 abstract class Consts {
   static final String FULL = "$full_path$";
-  static final String FULL_BINARY = "$full_path_binary$";
   static final String FIELD_PAYLOADS = "$payloads$";
   static final String PAYLOAD_PARENT = "p";
   static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
index 71d3481..ea38d8c 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@@ -335,7 +335,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
     int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves());
     LeafReader leafReader = indexReader.leaves().get(readerIndex).reader();
     // TODO: Use LUCENE-9476 to get the bulk lookup API for extracting BinaryDocValues
-    BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL_BINARY);
+    BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL);
 
     FacetLabel ret;
 
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index 53445b1..f32fa27 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -62,6 +62,7 @@ import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
 
 /**
  * {@link TaxonomyWriter} which uses a {@link Directory} to store the taxonomy information on disk,
@@ -475,8 +476,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
 
     String fieldPath = FacetsConfig.pathToString(categoryPath.components, categoryPath.length);
     fullPathField.setStringValue(fieldPath);
+
+    boolean commitExists = indexWriter.getLiveCommitData().iterator().hasNext();
+    /* no commits so this is a fresh index, or the old index was built using a Lucene 9 or greater version */
+    if ((commitExists == false)
+        || (SegmentInfos.readLatestCommit(dir)
+            .getMinSegmentLuceneVersion()
+            .onOrAfter(Version.LUCENE_9_0_0))) {
+      /* Lucene 9 introduces BinaryDocValuesField for storing taxonomy categories */
+      d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));
+    } else {
+      fullPathField = new StringField(Consts.FULL, fieldPath, Field.Store.YES);
+    }
+
     d.add(fullPathField);
-    d.add(new BinaryDocValuesField(Consts.FULL_BINARY, new BytesRef(fieldPath)));
 
     // Note that we do no pass an Analyzer here because the fields that are
     // added to the Document are untokenized or contains their own TokenStream.
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
index 76138bd..0f05d32 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@@ -49,12 +49,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
   //
   // Then move the zip file to your trunk checkout and use it in your test cases
 
-  public static final String oldTaxonomyIndexName = "taxonomy.8.6.3-cfs";
+  public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
 
-  // LUCENE-9334 requires consistency of field data structures between documents.
-  // Old taxonomy index had $full_path$ field indexed only with postings,
-  // It is not allowed to add the same field $full_path$ indexed with BinaryDocValues
-  // for a new segment, that this test is trying to do.
   public void testCreateNewTaxonomy() throws IOException {
     createNewTaxonomyIndex(oldTaxonomyIndexName);
   }
@@ -67,8 +63,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
 
     DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
 
-    FacetLabel cp_b = new FacetLabel("b");
-    writer.addCategory(cp_b);
+    FacetLabel cp_c = new FacetLabel("c");
+    writer.addCategory(cp_c);
     writer.getInternalIndexWriter().forceMerge(1);
     writer.commit();
 
@@ -79,10 +75,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
     // Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
     assertNotNull(reader.getPath(ord1));
 
-    int ord2 = reader.getOrdinal(cp_b);
+    int ord2 = reader.getOrdinal(new FacetLabel("b"));
     assert ord2 != TaxonomyReader.INVALID_ORDINAL;
+    // Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
     assertNotNull(reader.getPath(ord2));
 
+    int ord3 = reader.getOrdinal(cp_c);
+    assert ord3 != TaxonomyReader.INVALID_ORDINAL;
+    assertNotNull(reader.getPath(ord3));
+
     reader.close();
     writer.close();
     dir.close();
@@ -102,6 +103,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
     TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
 
     writer.addCategory(new FacetLabel("a"));
+    writer.addCategory(new FacetLabel("b"));
     writer.commit();
     writer.close();
     dir.close();
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip
new file mode 100644
index 0000000..a412ab2
Binary files /dev/null and b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip differ
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip
deleted file mode 100644
index d04c706..0000000
Binary files a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip and /dev/null differ