You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/08/06 04:15:59 UTC
[26/48] lucene-solr:jira/http2: LUCENE-8441: IndexWriter now checks
doc value type of index sort fields and fails the document if they are not
compatible.
LUCENE-8441: IndexWriter now checks doc value type of index sort fields and fails the document if they are not compatible.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/679b4aa7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/679b4aa7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/679b4aa7
Branch: refs/heads/jira/http2
Commit: 679b4aa71d205ac58621f6b2bad64637f6bd7d67
Parents: 1133bf9
Author: Jim Ferenczi <ji...@apache.org>
Authored: Wed Aug 1 18:28:51 2018 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Wed Aug 1 18:28:51 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../lucene/index/DefaultIndexingChain.java | 49 ++++++++++++++++++++
.../index/SortingStoredFieldsConsumer.java | 6 ++-
.../index/SortingTermVectorsConsumer.java | 6 ++-
.../apache/lucene/index/TestIndexSorting.java | 40 ++++++++++++++++
5 files changed, 100 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fdc8c98..b76cc6f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -199,6 +199,9 @@ Bug Fixes:
* LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
overflows by enforcing a maximum term length. (Adrien Grand)
+* LUCENE-8441: IndexWriter now checks doc value type for index sort fields
+ and fails the document if they are not compatible. (Jim Ferenczi, Mike McCandless)
+
Changes in Runtime Behavior:
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
index e552516..d0a6974 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
@@ -524,6 +526,48 @@ final class DefaultIndexingChain extends DocConsumer {
fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
}
+ private void validateIndexSortDVType(Sort indexSort, String fieldName, DocValuesType dvType) {
+ for (SortField sortField : indexSort.getSort()) {
+ if (sortField.getField().equals(fieldName)) {
+ switch (dvType) {
+ case NUMERIC:
+ if (sortField.getType().equals(SortField.Type.INT) == false &&
+ sortField.getType().equals(SortField.Type.LONG) == false &&
+ sortField.getType().equals(SortField.Type.FLOAT) == false &&
+ sortField.getType().equals(SortField.Type.DOUBLE) == false) {
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+ }
+ break;
+
+ case BINARY:
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+
+ case SORTED:
+ if (sortField.getType().equals(SortField.Type.STRING) == false) {
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+ }
+ break;
+
+ case SORTED_NUMERIC:
+ if (sortField instanceof SortedNumericSortField == false) {
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+ }
+ break;
+
+ case SORTED_SET:
+ if (sortField instanceof SortedSetSortField == false) {
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+ }
+ break;
+
+ default:
+ throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+ }
+ break;
+ }
+ }
+ }
+
/** Called from processDocument to index one field's doc value */
private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException {
@@ -531,7 +575,12 @@ final class DefaultIndexingChain extends DocConsumer {
// This is the first time we are seeing this field indexed with doc values, so we
// now record the DV type so that any future attempt to (illegally) change
// the DV type of this field, will throw an IllegalArgExc:
+ if (docWriter.getSegmentInfo().getIndexSort() != null) {
+ final Sort indexSort = docWriter.getSegmentInfo().getIndexSort();
+ validateIndexSortDVType(indexSort, fp.fieldInfo.name, dvType);
+ }
fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType);
+
}
fp.fieldInfo.setDocValuesType(dvType);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
index e5443b2..97253a5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
@@ -83,8 +83,10 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
try {
super.abort();
} finally {
- IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
- tmpDirectory.getTemporaryFiles().values());
+ if (tmpDirectory != null) {
+ IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
+ tmpDirectory.getTemporaryFiles().values());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
index 054ca50..955dd8a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
@@ -83,8 +83,10 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
try {
super.abort();
} finally {
- IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
- tmpDirectory.getTemporaryFiles().values());
+ if (tmpDirectory != null) {
+ IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
+ tmpDirectory.getTemporaryFiles().values());
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 9711a35..674972f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -84,6 +84,7 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.junit.internal.matchers.StringContains.containsString;
public class TestIndexSorting extends LuceneTestCase {
static class AssertingNeedsIndexSortCodec extends FilterCodec {
@@ -2472,4 +2473,43 @@ public class TestIndexSorting extends LuceneTestCase {
IOUtils.close(r, w, dir);
}
+ public void testWrongSortFieldType() throws Exception {
+ Directory dir = newDirectory();
+ List<Field> dvs = new ArrayList<>();
+ dvs.add(new SortedDocValuesField("field", new BytesRef("")));
+ dvs.add(new SortedSetDocValuesField("field", new BytesRef("")));
+ dvs.add(new NumericDocValuesField("field", 42));
+ dvs.add(new SortedNumericDocValuesField("field", 42));
+
+ List<SortField> sortFields = new ArrayList<>();
+ sortFields.add(new SortField("field", SortField.Type.STRING));
+ sortFields.add(new SortedSetSortField("field", false));
+ sortFields.add(new SortField("field", SortField.Type.INT));
+ sortFields.add(new SortedNumericSortField("field", SortField.Type.INT));
+
+ for (int i = 0; i < sortFields.size(); i++) {
+ for (int j = 0; j < dvs.size(); j++) {
+ if (i == j) {
+ continue;
+ }
+ Sort indexSort = new Sort(sortFields.get(i));
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(dvs.get(j));
+ IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
+ assertThat(exc.getMessage(), containsString("invalid doc value type"));
+ doc.clear();
+ doc.add(dvs.get(i));
+ w.addDocument(doc);
+ doc.add(dvs.get(j));
+ exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
+ assertThat(exc.getMessage(), containsString("cannot change DocValues type"));
+ w.rollback();
+ IOUtils.close(w);
+ }
+ }
+ IOUtils.close(dir);
+ }
}