You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/07/29 17:12:39 UTC
[lucene] 02/06: Use BDV or a StoredField based on the Lucene
version that has created the last index commit
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 162131ecf82bf84cb113d934a1af51842c78bbdf
Author: Gautam Worah <ga...@amazon.com>
AuthorDate: Mon Jul 26 11:50:49 2021 -0700
Use BDV or a StoredField based on the Lucene version that has created
the last index commit
If the Lucene version was < 9 then use a StringField or else
if the index is fresh or if the index is was built using a
version >= 9, then use a BDV field.
---
.../lucene/facet/taxonomy/directory/Consts.java | 1 -
.../taxonomy/directory/DirectoryTaxonomyReader.java | 2 +-
.../taxonomy/directory/DirectoryTaxonomyWriter.java | 15 ++++++++++++++-
.../directory/TestBackwardsCompatibility.java | 18 ++++++++++--------
.../facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip | Bin 0 -> 3092 bytes
.../facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip | Bin 3058 -> 0 bytes
6 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
index 5bc253f..104bfdf 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
@@ -21,7 +21,6 @@ import org.apache.lucene.util.BytesRef;
/** @lucene.experimental */
abstract class Consts {
static final String FULL = "$full_path$";
- static final String FULL_BINARY = "$full_path_binary$";
static final String FIELD_PAYLOADS = "$payloads$";
static final String PAYLOAD_PARENT = "p";
static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
index 71d3481..ea38d8c 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@@ -335,7 +335,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves());
LeafReader leafReader = indexReader.leaves().get(readerIndex).reader();
// TODO: Use LUCENE-9476 to get the bulk lookup API for extracting BinaryDocValues
- BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL_BINARY);
+ BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL);
FacetLabel ret;
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
index 53445b1..f32fa27 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@@ -62,6 +62,7 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
/**
* {@link TaxonomyWriter} which uses a {@link Directory} to store the taxonomy information on disk,
@@ -475,8 +476,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
String fieldPath = FacetsConfig.pathToString(categoryPath.components, categoryPath.length);
fullPathField.setStringValue(fieldPath);
+
+ boolean commitExists = indexWriter.getLiveCommitData().iterator().hasNext();
+ /* no commits so this is a fresh index, or the old index was built using a Lucene 9 or greater version */
+ if ((commitExists == false)
+ || (SegmentInfos.readLatestCommit(dir)
+ .getMinSegmentLuceneVersion()
+ .onOrAfter(Version.LUCENE_9_0_0))) {
+ /* Lucene 9 introduces BinaryDocValuesField for storing taxonomy categories */
+ d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));
+ } else {
+ fullPathField = new StringField(Consts.FULL, fieldPath, Field.Store.YES);
+ }
+
d.add(fullPathField);
- d.add(new BinaryDocValuesField(Consts.FULL_BINARY, new BytesRef(fieldPath)));
// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
index 76138bd..0f05d32 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@@ -49,12 +49,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
//
// Then move the zip file to your trunk checkout and use it in your test cases
- public static final String oldTaxonomyIndexName = "taxonomy.8.6.3-cfs";
+ public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
- // LUCENE-9334 requires consistency of field data structures between documents.
- // Old taxonomy index had $full_path$ field indexed only with postings,
- // It is not allowed to add the same field $full_path$ indexed with BinaryDocValues
- // for a new segment, that this test is trying to do.
public void testCreateNewTaxonomy() throws IOException {
createNewTaxonomyIndex(oldTaxonomyIndexName);
}
@@ -67,8 +63,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
- FacetLabel cp_b = new FacetLabel("b");
- writer.addCategory(cp_b);
+ FacetLabel cp_c = new FacetLabel("c");
+ writer.addCategory(cp_c);
writer.getInternalIndexWriter().forceMerge(1);
writer.commit();
@@ -79,10 +75,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord1));
- int ord2 = reader.getOrdinal(cp_b);
+ int ord2 = reader.getOrdinal(new FacetLabel("b"));
assert ord2 != TaxonomyReader.INVALID_ORDINAL;
+ // Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord2));
+ int ord3 = reader.getOrdinal(cp_c);
+ assert ord3 != TaxonomyReader.INVALID_ORDINAL;
+ assertNotNull(reader.getPath(ord3));
+
reader.close();
writer.close();
dir.close();
@@ -102,6 +103,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
writer.addCategory(new FacetLabel("a"));
+ writer.addCategory(new FacetLabel("b"));
writer.commit();
writer.close();
dir.close();
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip
new file mode 100644
index 0000000..a412ab2
Binary files /dev/null and b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip differ
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip
deleted file mode 100644
index d04c706..0000000
Binary files a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.6.3-cfs.zip and /dev/null differ