You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/08/06 04:15:59 UTC

[26/48] lucene-solr:jira/http2: LUCENE-8441: IndexWriter now checks doc value type of index sort fields and fails the document if they are not compatible.

LUCENE-8441: IndexWriter now checks doc value type of index sort fields and fails the document if they are not compatible.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/679b4aa7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/679b4aa7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/679b4aa7

Branch: refs/heads/jira/http2
Commit: 679b4aa71d205ac58621f6b2bad64637f6bd7d67
Parents: 1133bf9
Author: Jim Ferenczi <ji...@apache.org>
Authored: Wed Aug 1 18:28:51 2018 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Wed Aug 1 18:28:51 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../lucene/index/DefaultIndexingChain.java      | 49 ++++++++++++++++++++
 .../index/SortingStoredFieldsConsumer.java      |  6 ++-
 .../index/SortingTermVectorsConsumer.java       |  6 ++-
 .../apache/lucene/index/TestIndexSorting.java   | 40 ++++++++++++++++
 5 files changed, 100 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fdc8c98..b76cc6f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -199,6 +199,9 @@ Bug Fixes:
 * LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
   overflows by enforcing a maximum term length. (Adrien Grand)
 
+* LUCENE-8441: IndexWriter now checks doc value type for index sort fields
+  and fails the document if they are not compatible. (Jim Ferenczi, Mike McCandless)
+
 Changes in Runtime Behavior:
 
 * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
index e552516..d0a6974 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldType;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSortField;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.ArrayUtil;
@@ -524,6 +526,48 @@ final class DefaultIndexingChain extends DocConsumer {
     fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
   }
 
+  private void validateIndexSortDVType(Sort indexSort, String fieldName, DocValuesType dvType) {
+    for (SortField sortField : indexSort.getSort()) {
+      if (sortField.getField().equals(fieldName)) {
+        switch (dvType) {
+          case NUMERIC:
+            if (sortField.getType().equals(SortField.Type.INT) == false &&
+                  sortField.getType().equals(SortField.Type.LONG) == false &&
+                  sortField.getType().equals(SortField.Type.FLOAT) == false &&
+                  sortField.getType().equals(SortField.Type.DOUBLE) == false) {
+              throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+            }
+            break;
+
+          case BINARY:
+            throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+
+          case SORTED:
+            if (sortField.getType().equals(SortField.Type.STRING) == false) {
+              throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+            }
+            break;
+
+          case SORTED_NUMERIC:
+            if (sortField instanceof SortedNumericSortField == false) {
+              throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+            }
+            break;
+
+          case SORTED_SET:
+            if (sortField instanceof SortedSetSortField == false) {
+              throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+            }
+            break;
+
+          default:
+            throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
+        }
+        break;
+      }
+    }
+  }
+
   /** Called from processDocument to index one field's doc value */
   private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException {
 
@@ -531,7 +575,12 @@ final class DefaultIndexingChain extends DocConsumer {
       // This is the first time we are seeing this field indexed with doc values, so we
       // now record the DV type so that any future attempt to (illegally) change
       // the DV type of this field, will throw an IllegalArgExc:
+      if (docWriter.getSegmentInfo().getIndexSort() != null) {
+        final Sort indexSort = docWriter.getSegmentInfo().getIndexSort();
+        validateIndexSortDVType(indexSort, fp.fieldInfo.name, dvType);
+      }
       fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType);
+
     }
     fp.fieldInfo.setDocValuesType(dvType);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
index e5443b2..97253a5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
@@ -83,8 +83,10 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
     try {
       super.abort();
     } finally {
-      IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
-          tmpDirectory.getTemporaryFiles().values());
+      if (tmpDirectory != null) {
+        IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
+            tmpDirectory.getTemporaryFiles().values());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
index 054ca50..955dd8a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
@@ -83,8 +83,10 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
     try {
       super.abort();
     } finally {
-      IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
-          tmpDirectory.getTemporaryFiles().values());
+      if (tmpDirectory != null) {
+        IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
+            tmpDirectory.getTemporaryFiles().values());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/679b4aa7/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 9711a35..674972f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -84,6 +84,7 @@ import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.TestUtil;
 
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.junit.internal.matchers.StringContains.containsString;
 
 public class TestIndexSorting extends LuceneTestCase {
   static class AssertingNeedsIndexSortCodec extends FilterCodec {
@@ -2472,4 +2473,43 @@ public class TestIndexSorting extends LuceneTestCase {
     IOUtils.close(r, w, dir);
   }
 
+  public void testWrongSortFieldType() throws Exception {
+    Directory dir = newDirectory();
+    List<Field> dvs = new ArrayList<>();
+    dvs.add(new SortedDocValuesField("field", new BytesRef("")));
+    dvs.add(new SortedSetDocValuesField("field", new BytesRef("")));
+    dvs.add(new NumericDocValuesField("field", 42));
+    dvs.add(new SortedNumericDocValuesField("field", 42));
+
+    List<SortField> sortFields = new ArrayList<>();
+    sortFields.add(new SortField("field", SortField.Type.STRING));
+    sortFields.add(new SortedSetSortField("field", false));
+    sortFields.add(new SortField("field", SortField.Type.INT));
+    sortFields.add(new SortedNumericSortField("field", SortField.Type.INT));
+
+    for (int i = 0; i < sortFields.size(); i++) {
+      for (int j = 0; j < dvs.size(); j++) {
+        if (i == j) {
+          continue;
+        }
+        Sort indexSort = new Sort(sortFields.get(i));
+        IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+        iwc.setIndexSort(indexSort);
+        IndexWriter w = new IndexWriter(dir, iwc);
+        Document doc = new Document();
+        doc.add(dvs.get(j));
+        IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
+        assertThat(exc.getMessage(), containsString("invalid doc value type"));
+        doc.clear();
+        doc.add(dvs.get(i));
+        w.addDocument(doc);
+        doc.add(dvs.get(j));
+        exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
+        assertThat(exc.getMessage(), containsString("cannot change DocValues type"));
+        w.rollback();
+        IOUtils.close(w);
+      }
+    }
+    IOUtils.close(dir);
+  }
 }