You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/08/30 20:01:59 UTC
[1/3] lucene-solr:branch_7x: LUCENE-765: Improved oal.index javadocs.
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x cac78c1e7 -> 499792352
LUCENE-765: Improved oal.index javadocs.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/de381da5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/de381da5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/de381da5
Branch: refs/heads/branch_7x
Commit: de381da555e78b26d302abbf46ad6f35ff58e6c9
Parents: cac78c1
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Aug 30 11:54:37 2018 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 30 17:45:59 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 2 +
.../org/apache/lucene/index/package-info.java | 230 +++++++++++++------
2 files changed, 165 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/de381da5/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 541a754..5890649 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -150,6 +150,8 @@ Other:
* LUCENE-8456: Upgrade Apache Commons Compress to v1.18 (Steve Rowe)
+* LUCENE-765: Improved org.apache.lucene.index javadocs. (Mike Sokolov)
+
======================= Lucene 7.4.1 =======================
Bug Fixes:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/de381da5/lucene/core/src/java/org/apache/lucene/index/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/package-info.java b/lucene/core/src/java/org/apache/lucene/index/package-info.java
index f5a86d1..eb7219b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/index/package-info.java
@@ -6,7 +6,7 @@
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,34 +17,130 @@
/**
* Code to maintain and access indices.
- * <!-- TODO: add IndexWriter, IndexWriterConfig, DocValues, etc etc -->
* <h2>Table Of Contents</h2>
- * <ol>
- * <li><a href="#postings">Postings APIs</a>
- * <ul>
- * <li><a href="#fields">Fields</a></li>
- * <li><a href="#terms">Terms</a></li>
- * <li><a href="#documents">Documents</a></li>
- * <li><a href="#positions">Positions</a></li>
- * </ul>
- * </li>
- * <li><a href="#stats">Index Statistics</a>
- * <ul>
- * <li><a href="#termstats">Term-level</a></li>
- * <li><a href="#fieldstats">Field-level</a></li>
- * <li><a href="#segmentstats">Segment-level</a></li>
- * <li><a href="#documentstats">Document-level</a></li>
- * </ul>
- * </li>
- * </ol>
+ * <ol>
+ * <li><a href="#index">Index APIs</a>
+ * <ul>
+ * <li><a href="#writer">IndexWriter</a></li>
+ * <li><a href="#reader">IndexReader</a></li>
+ * <li><a href="#segments">Segments and docids</a></li>
+ * </ul>
+ * </li>
+ * <li><a href="#field_types">Field types</a>
+ * <ul>
+ * <li><a href="#postings-desc">Postings</a></li>
+ * <li><a href="#stored-fields">Stored Fields</a></li>
+ * <li><a href="#docvalues">DocValues</a></li>
+ * <li><a href="#points">Points</a></li>
+ * </ul>
+ * </li>
+ * <li><a href="#postings">Postings APIs</a>
+ * <ul>
+ * <li><a href="#fields">Fields</a></li>
+ * <li><a href="#terms">Terms</a></li>
+ * <li><a href="#documents">Documents</a></li>
+ * <li><a href="#positions">Positions</a></li>
+ * </ul>
+ * </li>
+ * <li><a href="#stats">Index Statistics</a>
+ * <ul>
+ * <li><a href="#termstats">Term-level</a></li>
+ * <li><a href="#fieldstats">Field-level</a></li>
+ * <li><a href="#segmentstats">Segment-level</a></li>
+ * <li><a href="#documentstats">Document-level</a></li>
+ * </ul>
+ * </li>
+ * </ol>
+ * <a name="index"></a>
+ * <h2>Index APIs</h2>
+
+ * <a name="writer"></a>
+ * <h3>IndexWriter</h3>
+
+ * <p>{@link org.apache.lucene.index.IndexWriter} is used to create an index, and to add, update and
+ * delete documents. The IndexWriter class is thread safe, and enforces a single instance per
+ * index. Creating an IndexWriter creates a new index or opens an existing index for writing, in a
+ * {@link org.apache.lucene.store.Directory}, depending on the configuration in {@link
+ * org.apache.lucene.index.IndexWriterConfig}. A Directory is an abstraction that typically
+ * represents a local file-system directory (see various implementations of {@link
+ * org.apache.lucene.store.FSDirectory}), but it may also stand for some other storage, such as
+ * RAM.</p>
+
+ * <a name="reader"></a>
+ * <h3>IndexReader</h3>
+
+ * <p>{@link org.apache.lucene.index.IndexReader} is used to read data from the index, and supports
+ * searching. Many thread-safe readers may be {@link org.apache.lucene.index.DirectoryReader#open}
+ * concurrently with a single (or no) writer. Each reader maintains a consistent "point in time"
+ * view of an index and must be explicitly refreshed (see {@link
+ * org.apache.lucene.index.DirectoryReader#openIfChanged}) in order to incorporate writes that may
+ * occur after it is opened.</p>
+
+ * <a name="segments"></a>
+ * <h3>Segments and docids</h3>
+
+ * <p>Lucene's index is composed of segments, each of which contains a subset of all the documents
+ * in the index, and is a complete searchable index in itself, over that subset. As documents are
+ * written to the index, new segments are created and flushed to directory storage. Segments are
+ * immutable; updates and deletions may only create new segments and do not modify existing
+ * ones. Over time, the writer merges groups of smaller segments into single larger ones in order to
+ * maintain an index that is efficient to search, and to reclaim dead space left behind by deleted
+ * (and updated) documents.</p>
+
+ * <p>Each document is identified by a 32-bit number, its "docid," and is composed of a collection
+ * of Field values of diverse types (postings, stored fields, doc values, and points). Docids come
+ * in two flavors: global and per-segment. A document's global docid is just the sum of its
+ * per-segment docid and that segment's base docid offset. External, high-level APIs only handle
+ * global docids, but internal APIs that reference a {@link org.apache.lucene.index.LeafReader},
+ * which is a reader for a single segment, deal in per-segment docids.</p>
+ *
+ * <p>Docids are assigned sequentially within each segment (starting at 0). Thus the number of
+ * documents in a segment is the same as its maximum docid; some may be deleted, but their docids
+ * are retained until the segment is merged. When segments merge, their documents are assigned new
+ * sequential docids. Accordingly, docid values must always be treated as internal implementation,
+ * not exposed as part of an application, nor stored or referenced outside of Lucene's internal
+ * APIs.</p>
+
+ * <a name="field_types"></a>
+ * <h2>Field Types</h2>
+ *
+ * <a name="postings-desc"></a>
+ *
+ * <p>Lucene supports a variety of different document field data structures. Lucene's core, the
+ * inverted index, is comprised of "postings." The postings, with their term dictionary, can be
+ * thought of as a map that provides efficient lookup given a {@link org.apache.lucene.index.Term}
+ * (roughly, a word or token), to (the ordered list of) {@link org.apache.lucene.document.Document}s
+ * containing that Term. Postings do not provide any way of retrieving terms given a document,
+ * short of scanning the entire index.</p>
+ *
+ * <a name="stored-fields"></a>
+ * <p>Stored fields are essentially the opposite of postings, providing efficient retrieval of field
+ * values given a docid. All stored field values for a document are stored together in a
+ * block. Different types of stored field provide high-level datatypes such as strings and numbers
+ * on top of the underlying bytes. Stored field values are usually retrieved by the searcher using
+ * an implementation of {@link org.apache.lucene.index.StoredFieldVisitor}.</p>
+
+ * <a name="docvalues"></a>
+ * <p>{@link org.apache.lucene.index.DocValues} fields are what are sometimes referred to as
+ * columnar, or column-stride fields, by analogy to relational database terminology, in which
+ * documents are considered as rows, and fields, columns. DocValues fields store values per-field: a
+ * value for every document is held in a single data structure, providing for rapid, sequential
+ * lookup of a field-value given a docid. These fields are used for efficient value-based sorting,
+ * and for faceting, but they are not useful for filtering.</p>
+
+ * <a name="points"></a>
+ * <p>{@link org.apache.lucene.index.PointValues} represent numeric values using a kd-tree data
+ * structure. Efficient 1- and higher dimensional implementations make these the choice for numeric
+ * range and interval queries, and geo-spatial queries.</p>
+
* <a name="postings"></a>
* <h2>Postings APIs</h2>
* <a name="fields"></a>
* <h3>
- * Fields
+ * Fields
* </h3>
* <p>
- * {@link org.apache.lucene.index.Fields} is the initial entry point into the
+ * {@link org.apache.lucene.index.Fields} is the initial entry point into the
* postings APIs, this can be obtained in several ways:
* <pre class="prettyprint">
* // access indexed fields for an index segment
@@ -63,7 +159,7 @@
* </pre>
* <a name="terms"></a>
* <h3>
- * Terms
+ * Terms
* </h3>
* <p>
* {@link org.apache.lucene.index.Terms} represents the collection of terms
@@ -128,10 +224,10 @@
* System.out.println(docid);
* int freq = postings.freq();
* for (int i = 0; i < freq; i++) {
- * System.out.println(postings.nextPosition());
- * System.out.println(postings.startOffset());
- * System.out.println(postings.endOffset());
- * System.out.println(postings.getPayload());
+ * System.out.println(postings.nextPosition());
+ * System.out.println(postings.startOffset());
+ * System.out.println(postings.endOffset());
+ * System.out.println(postings.getPayload());
* }
* }
* </pre>
@@ -139,7 +235,7 @@
* <h2>Index Statistics</h2>
* <a name="termstats"></a>
* <h3>
- * Term statistics
+ * Term statistics
* </h3>
* <ul>
* <li>{@link org.apache.lucene.index.TermsEnum#docFreq}: Returns the number of
@@ -157,7 +253,7 @@
* </ul>
* <a name="fieldstats"></a>
* <h3>
- * Field statistics
+ * Field statistics
* </h3>
* <ul>
* <li>{@link org.apache.lucene.index.Terms#size}: Returns the number of
@@ -187,53 +283,53 @@
* </ul>
* <a name="segmentstats"></a>
* <h3>
- * Segment statistics
+ * Segment statistics
* </h3>
- * <ul>
- * <li>{@link org.apache.lucene.index.IndexReader#maxDoc}: Returns the number of
- * documents (including deleted documents) in the index.
- * <li>{@link org.apache.lucene.index.IndexReader#numDocs}: Returns the number
- * of live documents (excluding deleted documents) in the index.
- * <li>{@link org.apache.lucene.index.IndexReader#numDeletedDocs}: Returns the
- * number of deleted documents in the index.
- * <li>{@link org.apache.lucene.index.Fields#size}: Returns the number of indexed
- * fields.
- * </ul>
+ * <ul>
+ * <li>{@link org.apache.lucene.index.IndexReader#maxDoc}: Returns the number of
+ * documents (including deleted documents) in the index.
+ * <li>{@link org.apache.lucene.index.IndexReader#numDocs}: Returns the number
+ * of live documents (excluding deleted documents) in the index.
+ * <li>{@link org.apache.lucene.index.IndexReader#numDeletedDocs}: Returns the
+ * number of deleted documents in the index.
+ * <li>{@link org.apache.lucene.index.Fields#size}: Returns the number of indexed
+ * fields.
+ * </ul>
* <a name="documentstats"></a>
* <h3>
- * Document statistics
+ * Document statistics
* </h3>
* <p>
* Document statistics are available during the indexing process for an indexed field: typically
* a {@link org.apache.lucene.search.similarities.Similarity} implementation will store some
* of these values (possibly in a lossy way), into the normalization value for the document in
* its {@link org.apache.lucene.search.similarities.Similarity#computeNorm} method.
- * <ul>
- * <li>{@link org.apache.lucene.index.FieldInvertState#getLength}: Returns the number of
- * tokens for this field in the document. Note that this is just the number
- * of times that {@link org.apache.lucene.analysis.TokenStream#incrementToken} returned
- * true, and is unrelated to the values in
- * {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}.
- * <li>{@link org.apache.lucene.index.FieldInvertState#getNumOverlap}: Returns the number
- * of tokens for this field in the document that had a position increment of zero. This
- * can be used to compute a document length that discounts artificial tokens
- * such as synonyms.
- * <li>{@link org.apache.lucene.index.FieldInvertState#getPosition}: Returns the accumulated
- * position value for this field in the document: computed from the values of
- * {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute} and including
- * {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap}s across multivalued
- * fields.
- * <li>{@link org.apache.lucene.index.FieldInvertState#getOffset}: Returns the total
- * character offset value for this field in the document: computed from the values of
- * {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} returned by
- * {@link org.apache.lucene.analysis.TokenStream#end}, and including
- * {@link org.apache.lucene.analysis.Analyzer#getOffsetGap}s across multivalued
- * fields.
- * <li>{@link org.apache.lucene.index.FieldInvertState#getUniqueTermCount}: Returns the number
- * of unique terms encountered for this field in the document.
- * <li>{@link org.apache.lucene.index.FieldInvertState#getMaxTermFrequency}: Returns the maximum
- * frequency across all unique terms encountered for this field in the document.
- * </ul>
+ * <ul>
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getLength}: Returns the number of
+ * tokens for this field in the document. Note that this is just the number
+ * of times that {@link org.apache.lucene.analysis.TokenStream#incrementToken} returned
+ * true, and is unrelated to the values in
+ * {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}.
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getNumOverlap}: Returns the number
+ * of tokens for this field in the document that had a position increment of zero. This
+ * can be used to compute a document length that discounts artificial tokens
+ * such as synonyms.
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getPosition}: Returns the accumulated
+ * position value for this field in the document: computed from the values of
+ * {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute} and including
+ * {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap}s across multivalued
+ * fields.
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getOffset}: Returns the total
+ * character offset value for this field in the document: computed from the values of
+ * {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} returned by
+ * {@link org.apache.lucene.analysis.TokenStream#end}, and including
+ * {@link org.apache.lucene.analysis.Analyzer#getOffsetGap}s across multivalued
+ * fields.
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getUniqueTermCount}: Returns the number
+ * of unique terms encountered for this field in the document.
+ * <li>{@link org.apache.lucene.index.FieldInvertState#getMaxTermFrequency}: Returns the maximum
+ * frequency across all unique terms encountered for this field in the document.
+ * </ul>
* <p>
* Additional user-supplied statistics can be added to the document as DocValues fields and
* accessed via {@link org.apache.lucene.index.LeafReader#getNumericDocValues}.
[2/3] lucene-solr:branch_7x: LUCENE-8460: Better argument validation
in StoredField
Posted by jp...@apache.org.
LUCENE-8460: Better argument validation in StoredField
Signed-off-by: Namgyu Kim <kn...@gmail.com>
Signed-off-by: Adrien Grand <jp...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b1d165c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b1d165c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b1d165c6
Branch: refs/heads/branch_7x
Commit: b1d165c6e8d4d3e78a7c7222ff634779a9677d5b
Parents: de381da
Author: Namgyu Kim <kn...@gmail.com>
Authored: Wed Aug 29 00:46:49 2018 +0900
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 30 17:46:19 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 2 ++
.../java/org/apache/lucene/document/Field.java | 30 +++++++++++---------
.../org/apache/lucene/document/StoredField.java | 23 +++++++++------
3 files changed, 33 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b1d165c6/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5890649..f8dfd55 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -132,6 +132,8 @@ Improvements
* LUCENE-8446: The UnifiedHighlighter's DefaultPassageFormatter now treats overlapping matches in
the passage as merged (as if one larger match). (David Smiley)
+* LUCENE-8460: Better argument validation in StoredField. (Namgyu Kim)
+
Other:
* LUCENE-8366: Upgrade to ICU 62.1. Emoji handling now uses Unicode 11's
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b1d165c6/lucene/core/src/java/org/apache/lucene/document/Field.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java
index cbb559a..467fec7 100644
--- a/lucene/core/src/java/org/apache/lucene/document/Field.java
+++ b/lucene/core/src/java/org/apache/lucene/document/Field.java
@@ -169,9 +169,8 @@ public class Field implements IndexableField {
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param type field type
- * @throws IllegalArgumentException if the field name is null,
- * or the field's type is indexed()
- * @throws NullPointerException if the type is null
+ * @throws IllegalArgumentException if the field name, value or type
+ * is null, or the field's type is indexed().
*/
public Field(String name, byte[] value, IndexableFieldType type) {
this(name, value, 0, value.length, type);
@@ -187,12 +186,11 @@ public class Field implements IndexableField {
* @param offset starting position of the byte array
* @param length valid length of the byte array
* @param type field type
- * @throws IllegalArgumentException if the field name is null,
- * or the field's type is indexed()
- * @throws NullPointerException if the type is null
+ * @throws IllegalArgumentException if the field name, value or type
+ * is null, or the field's type is indexed().
*/
public Field(String name, byte[] value, int offset, int length, IndexableFieldType type) {
- this(name, new BytesRef(value, offset, length), type);
+ this(name, value != null ? new BytesRef(value, offset, length) : null, type);
}
/**
@@ -203,9 +201,8 @@ public class Field implements IndexableField {
* @param name field name
* @param bytes BytesRef pointing to binary content (not copied)
* @param type field type
- * @throws IllegalArgumentException if the field name is null,
- * or the field's type is indexed()
- * @throws NullPointerException if the type is null
+ * @throws IllegalArgumentException if the field name, bytes or type
+ * is null, or the field's type is indexed().
*/
public Field(String name, BytesRef bytes, IndexableFieldType type) {
if (name == null) {
@@ -214,9 +211,12 @@ public class Field implements IndexableField {
if (bytes == null) {
throw new IllegalArgumentException("bytes must not be null");
}
+ if (type == null) {
+ throw new IllegalArgumentException("type must not be null");
+ }
+ this.name = name;
this.fieldsData = bytes;
this.type = type;
- this.name = name;
}
// TODO: allow direct construction of int, long, float, double value too..?
@@ -226,10 +226,9 @@ public class Field implements IndexableField {
* @param name field name
* @param value string value
* @param type field type
- * @throws IllegalArgumentException if either the name or value
+ * @throws IllegalArgumentException if either the name, value or type
* is null, or if the field's type is neither indexed() nor stored(),
* or if indexed() is false but storeTermVectors() is true.
- * @throws NullPointerException if the type is null
*/
public Field(String name, String value, IndexableFieldType type) {
if (name == null) {
@@ -238,13 +237,16 @@ public class Field implements IndexableField {
if (value == null) {
throw new IllegalArgumentException("value must not be null");
}
+ if (type == null) {
+ throw new IllegalArgumentException("type must not be null");
+ }
if (!type.stored() && type.indexOptions() == IndexOptions.NONE) {
throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ "is neither indexed nor stored");
}
- this.type = type;
this.name = name;
this.fieldsData = value;
+ this.type = type;
}
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b1d165c6/lucene/core/src/java/org/apache/lucene/document/StoredField.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/StoredField.java b/lucene/core/src/java/org/apache/lucene/document/StoredField.java
index 12b529c..7dc5a99 100644
--- a/lucene/core/src/java/org/apache/lucene/document/StoredField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/StoredField.java
@@ -40,12 +40,13 @@ public class StoredField extends Field {
* FieldType}.
* @param name field name
* @param type custom {@link FieldType} for this field
- * @throws IllegalArgumentException if the field name is null.
+ * @throws IllegalArgumentException if the field name or type
+ * is null.
*/
protected StoredField(String name, FieldType type) {
super(name, type);
}
-
+
/**
* Expert: allows you to customize the {@link
* FieldType}.
@@ -54,7 +55,8 @@ public class StoredField extends Field {
* @param name field name
* @param bytes byte array pointing to binary content (not copied)
* @param type custom {@link FieldType} for this field
- * @throws IllegalArgumentException if the field name is null.
+ * @throws IllegalArgumentException if the field name, value or type
+ * is null.
*/
public StoredField(String name, BytesRef bytes, FieldType type) {
super(name, bytes, type);
@@ -66,7 +68,8 @@ public class StoredField extends Field {
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
- * @throws IllegalArgumentException if the field name is null.
+ * @throws IllegalArgumentException if the field name or value
+ * is null.
*/
public StoredField(String name, byte[] value) {
super(name, value, TYPE);
@@ -80,7 +83,8 @@ public class StoredField extends Field {
* @param value byte array pointing to binary content (not copied)
* @param offset starting position of the byte array
* @param length valid length of the byte array
- * @throws IllegalArgumentException if the field name is null.
+ * @throws IllegalArgumentException if the field name or value
+ * is null.
*/
public StoredField(String name, byte[] value, int offset, int length) {
super(name, value, offset, length, TYPE);
@@ -92,7 +96,8 @@ public class StoredField extends Field {
* not to change it until you're done with this field.
* @param name field name
* @param value BytesRef pointing to binary content (not copied)
- * @throws IllegalArgumentException if the field name is null.
+ * @throws IllegalArgumentException if the field name or value
+ * is null.
*/
public StoredField(String name, BytesRef value) {
super(name, value, TYPE);
@@ -102,7 +107,8 @@ public class StoredField extends Field {
* Create a stored-only field with the given string value.
* @param name field name
* @param value string value
- * @throws IllegalArgumentException if the field name or value is null.
+ * @throws IllegalArgumentException if the field name or value
+ * is null.
*/
public StoredField(String name, String value) {
super(name, value, TYPE);
@@ -114,7 +120,8 @@ public class StoredField extends Field {
* @param name field name
* @param value string value
* @param type custom {@link FieldType} for this field
- * @throws IllegalArgumentException if the field name or value is null.
+ * @throws IllegalArgumentException if the field name, value or type
+ * is null.
*/
public StoredField(String name, String value, FieldType type) {
super(name, value, type);
[3/3] lucene-solr:branch_7x: LUCENE-8432: TopFieldComparator stops
calling the comparator when only counting hits.
Posted by jp...@apache.org.
LUCENE-8432: TopFieldComparator stops calling the comparator when only counting hits.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/49979235
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/49979235
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/49979235
Branch: refs/heads/branch_7x
Commit: 4997923525270d4bd9b6f909a5b11d387b517822
Parents: b1d165c
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Aug 30 12:00:21 2018 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 30 17:59:33 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++
.../apache/lucene/search/TopFieldCollector.java | 56 ++++++++++++++------
2 files changed, 44 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f8dfd55..ee4296a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,10 @@ Improvements
* LUCENE-8460: Better argument validation in StoredField. (Namgyu Kim)
+* LUCENE-8432: TopFieldComparator stops comparing documents if the index is
+ sorted, even if hits still need to be visited to compute the hit count.
+ (Nikolay Khitrin)
+
Other:
* LUCENE-8366: Upgrade to ICU 62.1. Emoji handling now uses Unicode 11's
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 066c2c3..ffcb691 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -122,14 +122,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final LeafFieldComparator[] comparators = queue.getComparators(context);
final int[] reverseMul = queue.getReverseMul();
final Sort indexSort = context.reader().getMetaData().getSort();
+ final boolean canEarlyStopComparing = indexSort != null &&
+ canEarlyTerminate(sort, indexSort);
final boolean canEarlyTerminate = trackTotalHits == false &&
trackMaxScore == false &&
- indexSort != null &&
- canEarlyTerminate(sort, indexSort);
+ canEarlyStopComparing;
final int initialTotalHits = totalHits;
return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
+ boolean collectedAllCompetitiveHits = false;
+
@Override
public void collect(int doc) throws IOException {
float score = Float.NaN;
@@ -142,16 +145,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
++totalHits;
if (queueFull) {
+ if (collectedAllCompetitiveHits) {
+ return;
+ }
+
if (reverseMul * comparator.compareBottom(doc) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
- if (canEarlyTerminate) {
- // scale totalHits linearly based on the number of docs
- // and terminate collection
- totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
- earlyTerminated = true;
- throw new CollectionTerminatedException();
+ if (canEarlyStopComparing) {
+ if (canEarlyTerminate) {
+ // scale totalHits linearly based on the number of docs
+ // and terminate collection
+ totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+ earlyTerminated = true;
+ throw new CollectionTerminatedException();
+ } else {
+ collectedAllCompetitiveHits = true;
+ return;
+ }
} else {
// just move to the next doc
return;
@@ -232,13 +244,16 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
docBase = context.docBase;
final int afterDoc = after.doc - docBase;
final Sort indexSort = context.reader().getMetaData().getSort();
+ final boolean canEarlyStopComparing = indexSort != null &&
+ canEarlyTerminate(sort, indexSort);
final boolean canEarlyTerminate = trackTotalHits == false &&
trackMaxScore == false &&
- indexSort != null &&
- canEarlyTerminate(sort, indexSort);
+ canEarlyStopComparing;
final int initialTotalHits = totalHits;
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
+ boolean collectedAllCompetitiveHits = false;
+
@Override
public void collect(int doc) throws IOException {
//System.out.println(" collect doc=" + doc);
@@ -254,17 +269,26 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
if (queueFull) {
+ if (collectedAllCompetitiveHits) {
+ return;
+ }
+
// Fastmatch: return if this hit is no better than
// the worst hit currently in the queue:
final int cmp = reverseMul * comparator.compareBottom(doc);
if (cmp <= 0) {
// not competitive since documents are visited in doc id order
- if (canEarlyTerminate) {
- // scale totalHits linearly based on the number of docs
- // and terminate collection
- totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
- earlyTerminated = true;
- throw new CollectionTerminatedException();
+ if (canEarlyStopComparing) {
+ if (canEarlyTerminate) {
+ // scale totalHits linearly based on the number of docs
+ // and terminate collection
+ totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+ earlyTerminated = true;
+ throw new CollectionTerminatedException();
+ } else {
+ collectedAllCompetitiveHits = true;
+ return;
+ }
} else {
// just move to the next doc
return;