You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/12/07 11:52:05 UTC
svn commit: r1643659 [2/7] - in /lucene/dev/branches/lucene6005/lucene: ./
analysis/common/src/test/org/apache/lucene/analysis/core/
analysis/icu/src/test/org/apache/lucene/collation/
backward-codecs/src/test/org/apache/lucene/index/ benchmark/src/java...
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Sun Dec 7 10:52:03 2014
@@ -50,10 +50,12 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesRangeFilter;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.LeftZeroPadTermRangeFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
@@ -64,6 +66,7 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@@ -74,11 +77,10 @@ import org.apache.lucene.store.RAMInputS
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
// TODO
-// - explore what it'd be like to add other higher level types?
-// - BigInt, BigDecimal, IPV6
// - what about sparse fields... anything for us to do...
// - payloads just stay write once in their own way?
// - how to handle old indices w/ no field types yet?
@@ -93,8 +95,6 @@ import org.apache.lucene.util.Version;
// - numeric range queries "just work"
// - creating queries, catching invalid field names, no positions indexed, etc.
// - prox queries can verify field was indexed w/ positions
-// - move analyzer out of IW/IWC into Field/FieldType/s only?
-// - why does STS fill offset...
// tie into query parser
// default operators?
@@ -104,48 +104,45 @@ import org.apache.lucene.util.Version;
// tie into faceting
// tie into index sorting
-// nocommit sugar API to retrieve values from DVs or stored fields or whatever?
+// nocommit should we just default to StandardAnalyzer
-// nocommit how will future back-compat work? segment must store field types as of when it was written?
+// nocommit it's ridiculous to have the "sort missing as 0" option/default for numerics
-// nocommit how to make this more extensible? e.g. so I can say "this field will facet, hierarchical, etc."
+// nocommit how to allow extending this?
+// - geo
+// - expressions
+// - facets (FacetsConfig)
+// - doc blocks (nested) / joins
+// fold in compressing stored fields format params...how
-// nocommit expose DocValuesRangeFilter?
+// nocommit icu collation field is difficult now
-// nocommit PH should take this and validate highlighting was enabled?
-
-// nocommit a segment should store the field type as of when it was written? on upgrade/reindex we can use that?
+// nocommit sugar API to retrieve values from DVs or stored fields or whatever?
-// nocommit addStored should take numbers too?
+// nocommit how will future back-compat work? segment must store field types as of when it was written?
-// nocommit should we detect if we are used to change schema after the IW holding us is closed?
+// nocommit PH should take this and validate highlighting was enabled?
// nocommit run all monster tests
-// nocommit cutover AnalyzingInfixSuggester to binary atom for contexts
-
-// nocommit is it bad that you can no longer add e.g. a stored value separately from a custom token stream?
-
-// nocommit byte, short?
-
// nocommit iterator over all fields / types?
-// nocommit proxy sort field
-
// nocommit allow adding array of atom values? fieldnamesfield would use it?
// nocommit optimize field exists filter to MatchAllBits when all docs in the seg have the field; same opto as range query when min < terms.min & max > terms.max
-// nocomit should exists filter use docsWithField?
+// nocommit should exists filter use docsWithField?
+
+// nocommit xlog is easier w/ schema?
// nocommit use better pf when field is unique
// nocommit filter caching? parent docs filter?
-// nocommit do we allow mixing of binary and non-binary atom?
+// nocommit we could track here which fields are actually searched/filtered on ... and e.g. make use of this during warmers ...
-// nocommit index field names the doc has?
+// nocommit do we allow mixing of binary and non-binary atom?
// nocommit fix simple qp to optionally take this?
@@ -158,28 +155,25 @@ import org.apache.lucene.util.Version;
// nocommit move to oal.index?
-// nocommit per-field norms format? then we can commit these "tradeoffs"
-
-// nocommit default value?
+// NO
+// - dynamic fields
+// - can we somehow always store a "source"? can be handled above
+// - default value (value used if the field is null/missing): this seems silly wasteful, and layer above can handle it
+// - sort proxy field ("when I sort by X you should actually sort by Y"): can be handled above
-// nocommit can we have test infra that randomly reopens writer?
+// nocommit highlight proxy field (LUCENE-6061)
-// nocommit getTermFilter?
-// nocommit facets?
+// LATER
+// - can we have test infra that randomly reopens writer?
+// - newTermFilter?
// nocommit live values?
-// nocommit expr fields?
-
// nocommit default qp operator
// nocommit copy field?
-// nocommit sort proxy field?
-
-// nocommit highlight proxy field (LUCENE-6061)
-
// nocommit controlling compression of stored fields, norms
// nocommit can we somehow detect at search time if the field types you are using doesn't match the searcher you are now searching against?
@@ -190,8 +184,6 @@ import org.apache.lucene.util.Version;
// nocommit can/should we validate field names here?
-// nocommit can we somehow always store a "source"?
-
// nocommit make ValueType public? add setter so you can set that too?
// language for the field? (to default collator)
@@ -206,13 +198,11 @@ import org.apache.lucene.util.Version;
// nocommit can we require use of analyzer factories?
-// nocommit what schema options does solr/ES offer
-
// nocommit accent removal and lowercasing for wildcards should just work
// separate analyzer for phrase queries in suggesters
-// nocommit Index class? enforcing unique id, xlog?
+// nocommit Index class? xlog?
// nocommit how to randomize IWC? RIW?
@@ -220,6 +210,8 @@ import org.apache.lucene.util.Version;
// nocommit can we move multi-field-ness out of IW? so IW only gets a single instance of each field
+// nocommit should mulit-valued fields be added as straight array?
+
// nocommit nested/parent/child docs?
// nocommit "all" field:
@@ -230,8 +222,6 @@ import org.apache.lucene.util.Version;
// nocommit required? not null?
-// nocommit BigInt?
-
// nocommit BigDecimal?
/** Records how each field is indexed, stored, etc. This class persists
@@ -244,7 +234,7 @@ public class FieldTypes {
public static final int DEFAULT_OFFSET_GAP = 1;
- enum ValueType {
+ public enum ValueType {
NONE,
TEXT,
SHORT_TEXT,
@@ -259,7 +249,6 @@ public class FieldTypes {
BOOLEAN,
DATE,
INET_ADDRESS,
- // nocommit primary_key?
}
// nocommit should we have a "resolution" for Date field?
@@ -287,6 +276,8 @@ public class FieldTypes {
/** Used only in memory to record when something changed. */
private long changeCount;
+ private volatile boolean closed;
+
/** Just like current oal.document.FieldType, except for each setting it can also record "not-yet-set". */
class FieldType implements IndexableFieldType, Cloneable {
private final String name;
@@ -305,6 +296,7 @@ public class FieldTypes {
/** Copy constructor. */
FieldType(FieldType other) {
+ // nocommit how to make sure nothing is missing here? can/should we just use default clone impl?
this.name = other.name;
this.createdVersion = other.createdVersion;
this.valueType = other.valueType;
@@ -414,6 +406,7 @@ public class FieldTypes {
Locale sortLocale;
Collator sortCollator;
+ SortKey sortKey;
boolean validate() {
switch (valueType) {
@@ -437,11 +430,11 @@ public class FieldTypes {
}
if (valueType == ValueType.BIG_INT) {
if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET)) {
- illegalState(name, "type " + valueType + " must use SORTED or SORTED_SET docValuesType (got: " + docValuesType + ")");
+ illegalState(name, "type " + valueType + " must use SORTED or SORTED_SET docValuesType; got: " + docValuesType);
}
} else {
if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
- illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType (got: " + docValuesType + ")");
+ illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType; got: " + docValuesType);
}
}
if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
@@ -488,7 +481,7 @@ public class FieldTypes {
illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
}
if (docValuesType != DocValuesType.NONE && docValuesType != DocValuesType.BINARY && docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET) {
- illegalState(name, "type " + valueType + " must use BINARY, SORTED or SORTED_SET docValuesType (got: " + docValuesType + ")");
+ illegalState(name, "type " + valueType + " must use BINARY, SORTED or SORTED_SET docValuesType; got: " + docValuesType);
}
if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
// nocommit too anal?
@@ -529,7 +522,7 @@ public class FieldTypes {
illegalState(name, "type " + valueType + " cannot index norms");
}
if (docValuesType != DocValuesType.NONE && docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC) {
- illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType (got: " + docValuesType + ")");
+ illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType; got: " + docValuesType);
}
if (minTokenLength != null) {
illegalState(name, "type " + valueType + " cannot set min/max token length");
@@ -544,6 +537,10 @@ public class FieldTypes {
// nocommit more checks
+ if (sortKey != null && valueType != ValueType.ATOM) {
+ illegalState(name, "sortKey can only be set for ATOM fields; got value type=" + valueType);
+ }
+
if (multiValued == Boolean.TRUE &&
(docValuesType == DocValuesType.NUMERIC ||
docValuesType == DocValuesType.SORTED ||
@@ -597,10 +594,10 @@ public class FieldTypes {
}
} else {
if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT && indexAnalyzer != null) {
- illegalState(name, "can only setIndexAnalyzer for short text and large text fields; got valueType=" + valueType);
+ illegalState(name, "can only setIndexAnalyzer for short text and large text fields; got value type=" + valueType);
}
if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT && queryAnalyzer != null) {
- illegalState(name, "can only setQueryAnalyzer for short text and large text fields; got valueType=" + valueType);
+ illegalState(name, "can only setQueryAnalyzer for short text and large text fields; got value type=" + valueType);
}
if (isUnique == Boolean.TRUE && indexOptions != IndexOptions.DOCS) {
illegalState(name, "unique fields should be indexed with IndexOptions.DOCS; got indexOptions=" + indexOptions);
@@ -638,7 +635,7 @@ public class FieldTypes {
if (highlighted == Boolean.TRUE) {
if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT) {
- illegalState(name, "can only enable highlighting for TEXT or SHORT_TEXT fields; got valueType=" + valueType);
+ illegalState(name, "can only enable highlighting for TEXT or SHORT_TEXT fields; got value type=" + valueType);
}
if (indexOptions != IndexOptions.NONE && indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
illegalState(name, "must index with IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS when highlighting is enabled");
@@ -705,7 +702,7 @@ public class FieldTypes {
b.append("field \"");
b.append(name);
b.append("\":\n");
- b.append(" valueType: ");
+ b.append(" value type: ");
b.append(valueType);
b.append('\n');
@@ -931,7 +928,7 @@ public class FieldTypes {
out.writeByte((byte) 13);
break;
default:
- throw new AssertionError("missing ValueType in switch");
+ throw new AssertionError("missing value type in switch");
}
if (docValuesTypeSet == false) {
@@ -1074,7 +1071,7 @@ public class FieldTypes {
valueType = ValueType.INET_ADDRESS;
break;
default:
- throw new CorruptIndexException("invalid byte for ValueType: " + b, in);
+ throw new CorruptIndexException("invalid byte for value type: " + b, in);
}
b = in.readByte();
@@ -2406,7 +2403,7 @@ public class FieldTypes {
public synchronized void setDocValuesType(String fieldName, DocValuesType dvType) {
ensureWritable();
if (dvType == null) {
- throw new NullPointerException("docValueType cannot be null (field: \"" + fieldName + "\")");
+ throw new NullPointerException("docValuesType cannot be null (field: \"" + fieldName + "\")");
}
FieldType current = fields.get(fieldName);
if (current == null) {
@@ -2539,7 +2536,7 @@ public class FieldTypes {
}
}
- synchronized void recordLargeTextType(String fieldName, boolean allowStored, boolean indexed) {
+ synchronized void recordLargeTextType(String fieldName, boolean allowStored, boolean allowIndexed) {
ensureWritable();
indexedDocs = true;
FieldType current = fields.get(fieldName);
@@ -2547,14 +2544,14 @@ public class FieldTypes {
current = newFieldType(fieldName);
current.valueType = ValueType.TEXT;
fields.put(fieldName, current);
- setDefaults(current);
if (allowStored == false) {
current.stored = Boolean.FALSE;
}
- if (indexed == false) {
- current.indexOptions = IndexOptions.NONE;
+ if (allowIndexed == false) {
+ assert current.indexOptions == IndexOptions.NONE: "got " + current.indexOptions;
current.indexOptionsSet = true;
}
+ setDefaults(current);
changed();
} else if (current.valueType == ValueType.NONE) {
// This can happen if e.g. the app first calls FieldTypes.setStored(...)
@@ -2569,10 +2566,12 @@ public class FieldTypes {
current.stored = Boolean.FALSE;
}
}
- if (indexed == false) {
+ if (allowIndexed == false) {
if (current.indexOptionsSet == false) {
assert current.indexOptions == IndexOptions.NONE;
current.indexOptionsSet = true;
+ } else if (current.indexOptions != IndexOptions.NONE) {
+ illegalState(fieldName, "this field is already indexed with indexOptions=" + current.indexOptions);
}
}
current.validate();
@@ -2586,6 +2585,10 @@ public class FieldTypes {
changed();
} else if (current.valueType != ValueType.TEXT) {
illegalState(fieldName, "cannot change from value type " + current.valueType + " to " + ValueType.TEXT);
+ } else if (allowIndexed == false && current.indexOptionsSet && current.indexOptions != IndexOptions.NONE) {
+ illegalState(fieldName, "this field is already indexed with indexOptions=" + current.indexOptions);
+ } else if (allowStored == false && current.stored == Boolean.TRUE) {
+ illegalState(fieldName, "this field was already enabled for storing");
}
}
@@ -2602,7 +2605,25 @@ public class FieldTypes {
current.sortCollator = Collator.getInstance(locale);
changed();
}
+ }
+
+ public static interface SortKey {
+ Comparable getKey(Object o);
+ }
+ // nocommit enforce only ATOM:
+ /** NOTE: does not persist; you must set this each time you open a new reader. */
+ public void setSortKey(String fieldName, SortKey sortKey) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = newFieldType(fieldName);
+ current.sortKey = sortKey;
+ fields.put(fieldName, current);
+ } else if (current.valueType == ValueType.ATOM) {
+ current.sortKey = sortKey;
+ } else {
+ illegalState(fieldName, "sortKey can only be set for ATOM fields; got value type=" + current.valueType);
+ }
}
public Locale getSortLocale(String fieldName) {
@@ -2979,10 +3000,10 @@ public class FieldTypes {
switch (fieldType.valueType) {
case INT:
- bytes = Document.intToBytes(token);
+ bytes = NumericUtils.intToBytes(token);
break;
default:
- illegalState(fieldName, "cannot create int term query when valueType=" + fieldType.valueType);
+ illegalState(fieldName, "cannot create int term query when value type=" + fieldType.valueType);
// Dead code but javac disagrees:
bytes = null;
}
@@ -3009,10 +3030,10 @@ public class FieldTypes {
switch (fieldType.valueType) {
case LONG:
- bytes = Document.longToBytes(token);
+ bytes = NumericUtils.longToBytes(token);
break;
default:
- illegalState(fieldName, "cannot create long term query when valueType=" + fieldType.valueType);
+ illegalState(fieldName, "cannot create long term query when value type=" + fieldType.valueType);
// Dead code but javac disagrees:
bytes = null;
}
@@ -3038,7 +3059,7 @@ public class FieldTypes {
// Field must be binary:
if (fieldType.valueType != ValueType.BINARY && fieldType.valueType != ValueType.ATOM) {
- illegalState(fieldName, "binary term query must have valueType BINARY or ATOM; got " + fieldType.valueType);
+ illegalState(fieldName, "binary term query must have value type BINARY or ATOM; got " + fieldType.valueType);
}
return new TermQuery(new Term(fieldName, new BytesRef(token)));
@@ -3056,7 +3077,7 @@ public class FieldTypes {
// Field must be text:
if (fieldType.valueType != ValueType.TEXT && fieldType.valueType != ValueType.SHORT_TEXT && fieldType.valueType != ValueType.ATOM) {
- illegalState(fieldName, "string term query must have valueType TEXT, SHORT_TEXT or ATOM; got " + fieldType.valueType);
+ illegalState(fieldName, "string term query must have value type TEXT, SHORT_TEXT or ATOM; got " + fieldType.valueType);
}
return new TermQuery(new Term(fieldName, token));
@@ -3074,7 +3095,7 @@ public class FieldTypes {
// Field must be boolean:
if (fieldType.valueType != ValueType.BOOLEAN) {
- illegalState(fieldName, "boolean term query must have valueType BOOLEAN; got " + fieldType.valueType);
+ illegalState(fieldName, "boolean term query must have value type BOOLEAN; got " + fieldType.valueType);
}
byte[] value = new byte[1];
@@ -3097,17 +3118,309 @@ public class FieldTypes {
// Field must be InetAddress:
if (fieldType.valueType != ValueType.INET_ADDRESS) {
- illegalState(fieldName, "inet address term query must have valueType INET_ADDRESS; got " + fieldType.valueType);
+ illegalState(fieldName, "inet address term query must have value type INET_ADDRESS; got " + fieldType.valueType);
}
return new TermQuery(new Term(fieldName, new BytesRef(token.getAddress())));
}
+ // nocommit More, Less?
+
+ private String getRangeFilterDesc(FieldType fieldType, Object min, boolean minInclusive, Object max, boolean maxInclusive) {
+ StringBuilder sb = new StringBuilder();
+ if (minInclusive) {
+ sb.append('[');
+ } else {
+ sb.append('{');
+ }
+ if (min == null) {
+ sb.append('*');
+ } else {
+ sb.append(min);
+ }
+ sb.append(" TO ");
+ if (max == null) {
+ sb.append('*');
+ } else {
+ sb.append(max);
+ }
+ if (maxInclusive) {
+ sb.append(']');
+ } else {
+ sb.append('}');
+ }
+ return sb.toString();
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, Date min, boolean minInclusive, Date max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be DATE type:
+ if (fieldType.valueType != ValueType.DATE) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed as value type DATE; got: " + fieldType.valueType);
+ }
+
+ // Field must have doc values:
+ if (fieldType.docValuesType != DocValuesType.NUMERIC) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed with NUMERIC doc values; got: " + fieldType.docValuesType);
+ }
+
+ return DocValuesRangeFilter.newLongRange(fieldName,
+ min == null ? null : min.getTime(),
+ max == null ? null : max.getTime(),
+ minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must have doc values:
+ if (fieldType.docValuesType != DocValuesType.NUMERIC) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed with NUMERIC doc values; got: " + fieldType.docValuesType);
+ }
+
+ String desc = getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive);
+
+ switch (fieldType.valueType) {
+ case INT:
+ return DocValuesRangeFilter.newIntRange(fieldName,
+ min == null ? null : min.intValue(),
+ max == null ? null : max.intValue(),
+ minInclusive,
+ maxInclusive,
+ desc);
+ case LONG:
+ return DocValuesRangeFilter.newLongRange(fieldName,
+ min == null ? null : min.longValue(),
+ max == null ? null : max.longValue(),
+ minInclusive,
+ maxInclusive,
+ desc);
+ case FLOAT:
+ return DocValuesRangeFilter.newFloatRange(fieldName,
+ min == null ? null : min.floatValue(),
+ max == null ? null : max.floatValue(),
+ minInclusive,
+ maxInclusive,
+ desc);
+ case DOUBLE:
+ return DocValuesRangeFilter.newDoubleRange(fieldName,
+ min == null ? null : min.doubleValue(),
+ max == null ? null : max.doubleValue(),
+ minInclusive,
+ maxInclusive,
+ desc);
+ case HALF_FLOAT:
+ return DocValuesRangeFilter.newHalfFloatRange(fieldName,
+ min == null ? null : min.floatValue(),
+ max == null ? null : max.floatValue(),
+ minInclusive,
+ maxInclusive,
+ desc);
+
+ default:
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed with a numeric value type; got: " + fieldType.valueType);
+ return null;
+ }
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, byte[] minTerm, boolean minInclusive, byte[] maxTerm, boolean maxInclusive) {
+ return newDocValuesRangeFilter(fieldName, minTerm == null ? null : new BytesRef(minTerm), minInclusive, maxTerm == null ? null : new BytesRef(maxTerm), maxInclusive);
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, String minTerm, boolean minInclusive, String maxTerm, boolean maxInclusive) {
+ return newDocValuesRangeFilter(fieldName, minTerm == null ? null : new BytesRef(minTerm), minInclusive, maxTerm == null ? null : new BytesRef(maxTerm), maxInclusive);
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, BytesRef minTerm, boolean minInclusive, BytesRef maxTerm, boolean maxInclusive) {
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must have sorted doc values:
+ if (fieldType.docValuesType != DocValuesType.SORTED && fieldType.docValuesType != DocValuesType.SORTED_SET) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed with SORTED or SORTED_SET doc values; got: " + fieldType.docValuesType);
+ }
+
+ if (fieldType.valueType != ValueType.ATOM && fieldType.valueType != ValueType.BINARY) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed as value type ATOM or BINARY; got: " + fieldType.valueType);
+ }
+
+ return DocValuesRangeFilter.newBytesRefRange(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, minTerm, minInclusive, maxTerm, maxInclusive));
+ }
+
+ public Filter newDocValuesRangeFilter(String fieldName, InetAddress min, boolean minInclusive, InetAddress max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be InetAddress type:
+ if (fieldType.valueType != ValueType.INET_ADDRESS) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed as value type INET_ADDRESS; got: " + fieldType.valueType);
+ }
+
+ // Field must have sorted doc values:
+ if (fieldType.docValuesType != DocValuesType.SORTED && fieldType.docValuesType != DocValuesType.SORTED_SET) {
+ illegalState(fieldName, "cannot create doc values range query: this field was not indexed with SORTED or SORTED_SET doc values; got: " + fieldType.docValuesType);
+ }
+
+ BytesRef minTerm = min == null ? null : new BytesRef(min.getAddress());
+ BytesRef maxTerm = max == null ? null : new BytesRef(max.getAddress());
+
+ return DocValuesRangeFilter.newBytesRefRange(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
// nocommit split to newInt/Float/etc./Range
- // nocommit More, Less?
+ public Filter newIntRangeFilter(String fieldName, Integer min, boolean minInclusive, Integer max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.INT) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type INT; got: " + fieldType.valueType);
+ }
+
+ BytesRef minTerm = min == null ? null : NumericUtils.intToBytes(min.intValue());
+ BytesRef maxTerm = max == null ? null : NumericUtils.intToBytes(max.intValue());
+
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newLongRangeFilter(String fieldName, Long min, boolean minInclusive, Long max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.LONG) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type LONG; got: " + fieldType.valueType);
+ }
- // nocommit not great that the toString of the filter returned here is ... not easy to understand
+ BytesRef minTerm = min == null ? null : NumericUtils.longToBytes(min.longValue());
+ BytesRef maxTerm = max == null ? null : NumericUtils.longToBytes(max.longValue());
+
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newBigIntRangeFilter(String fieldName, BigInteger min, boolean minInclusive, BigInteger max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.BIG_INT) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type BIG_INT; got: " + fieldType.valueType);
+ }
+
+ BytesRef minTerm = min == null ? null : NumericUtils.bigIntToBytes(min);
+ BytesRef maxTerm = max == null ? null : NumericUtils.bigIntToBytes(max);
+
+ return new LeftZeroPadTermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newHalfFloatRangeFilter(String fieldName, Float min, boolean minInclusive, Float max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.HALF_FLOAT) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type HALF_FLOAT; got: " + fieldType.valueType);
+ }
+
+ BytesRef minTerm = min == null ? null : NumericUtils.halfFloatToBytes(min.floatValue());
+ BytesRef maxTerm = max == null ? null : NumericUtils.halfFloatToBytes(max.floatValue());
+
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newFloatRangeFilter(String fieldName, Float min, boolean minInclusive, Float max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.FLOAT) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type FLOAT; got: " + fieldType.valueType);
+ }
+
+ BytesRef minTerm = min == null ? null : NumericUtils.floatToBytes(min.floatValue());
+ BytesRef maxTerm = max == null ? null : NumericUtils.floatToBytes(max.floatValue());
+
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ public Filter newDoubleRangeFilter(String fieldName, Double min, boolean minInclusive, Double max, boolean maxInclusive) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == IndexOptions.NONE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
+ }
+
+ if (fieldType.fastRanges != Boolean.TRUE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
+ }
+
+ if (fieldType.valueType != ValueType.DOUBLE) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type DOUBLE; got: " + fieldType.valueType);
+ }
+
+ BytesRef minTerm = min == null ? null : NumericUtils.doubleToBytes(min.doubleValue());
+ BytesRef maxTerm = max == null ? null : NumericUtils.doubleToBytes(max.doubleValue());
+
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
+
+ /*
public Filter newRangeFilter(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
// Field must exist:
@@ -3115,11 +3428,11 @@ public class FieldTypes {
// Field must be indexed:
if (fieldType.indexOptions == IndexOptions.NONE) {
- illegalState(fieldName, "cannot create range query: this field was not indexed");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
}
if (fieldType.fastRanges != Boolean.TRUE) {
- illegalState(fieldName, "this field was not indexed for fast ranges");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
}
// nocommit should we really take Number here? it's too weakly typed? you could ask for float range on an int field? should we
@@ -3131,89 +3444,85 @@ public class FieldTypes {
switch (fieldType.valueType) {
case INT:
- minTerm = min == null ? null : Document.intToBytes(min.intValue());
- maxTerm = max == null ? null : Document.intToBytes(max.intValue());
+ minTerm = min == null ? null : NumericUtils.intToBytes(min.intValue());
+ maxTerm = max == null ? null : NumericUtils.intToBytes(max.intValue());
break;
case HALF_FLOAT:
- minTerm = min == null ? null : Document.halfFloatToSortableBytes(min.floatValue());
- maxTerm = max == null ? null : Document.halfFloatToSortableBytes(max.floatValue());
+ minTerm = min == null ? null : NumericUtils.halfFloatToBytes(min.floatValue());
+ maxTerm = max == null ? null : NumericUtils.halfFloatToBytes(max.floatValue());
break;
case FLOAT:
- minTerm = min == null ? null : Document.floatToSortableBytes(min.floatValue());
- maxTerm = max == null ? null : Document.floatToSortableBytes(max.floatValue());
+ minTerm = min == null ? null : NumericUtils.floatToBytes(min.floatValue());
+ maxTerm = max == null ? null : NumericUtils.floatToBytes(max.floatValue());
break;
case LONG:
- minTerm = min == null ? null : Document.longToBytes(min.longValue());
- maxTerm = max == null ? null : Document.longToBytes(max.longValue());
+ minTerm = min == null ? null : NumericUtils.longToBytes(min.longValue());
+ maxTerm = max == null ? null : NumericUtils.longToBytes(max.longValue());
break;
case DOUBLE:
- minTerm = min == null ? null : Document.doubleToSortableBytes(min.doubleValue());
- maxTerm = max == null ? null : Document.doubleToSortableBytes(max.doubleValue());
+ minTerm = min == null ? null : NumericUtils.doubleToBytes(min.doubleValue());
+ maxTerm = max == null ? null : NumericUtils.doubleToBytes(max.doubleValue());
break;
case BIG_INT:
- minTerm = min == null ? null : new BytesRef(((BigInteger) min).toByteArray());
- maxTerm = max == null ? null : new BytesRef(((BigInteger) max).toByteArray());
+ minTerm = min == null ? null : NumericUtils.bigIntToBytes((BigInteger) min);
+ maxTerm = max == null ? null : NumericUtils.bigIntToBytes((BigInteger) max);
break;
default:
- illegalState(fieldName, "cannot create numeric range query on non-numeric field; got valueType=" + fieldType.valueType);
+ illegalState(fieldName, "cannot create numeric range filter on non-numeric field; got value type=" + fieldType.valueType);
// Dead code but javac disagrees:
return null;
}
- StringBuilder sb = new StringBuilder();
- sb.append(fieldType.valueType);
- sb.append(':');
- if (min != null) {
- sb.append(min);
- sb.append(" (");
- sb.append(minInclusive ? "incl" : "excl");
- sb.append(')');
- }
- sb.append(" to ");
- if (max != null) {
- sb.append(max);
- sb.append(" (");
- sb.append(maxInclusive ? "incl" : "excl");
- sb.append(')');
- }
- return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive, sb.toString());
+ if (fieldType.valueType == ValueType.BIG_INT) {
+ return new LeftZeroPadTermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ } else {
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
+ }
}
+ */
- public Filter newRangeFilter(String fieldName, byte[] minTerm, boolean minInclusive, byte[] maxTerm, boolean maxInclusive) {
- return newRangeFilter(fieldName, new BytesRef(minTerm), minInclusive, new BytesRef(maxTerm), maxInclusive);
+ public Filter newBinaryRangeFilter(String fieldName, byte[] minTerm, boolean minInclusive, byte[] maxTerm, boolean maxInclusive) {
+ return newBinaryRangeFilter(fieldName, minTerm == null ? null : new BytesRef(minTerm), minInclusive, maxTerm == null ? null : new BytesRef(maxTerm), maxInclusive);
}
- public Filter newRangeFilter(String fieldName, String minTerm, boolean minInclusive, String maxTerm, boolean maxInclusive) {
- return newRangeFilter(fieldName, new BytesRef(minTerm), minInclusive, new BytesRef(maxTerm), maxInclusive);
+ public Filter newStringRangeFilter(String fieldName, String minTerm, boolean minInclusive, String maxTerm, boolean maxInclusive) {
+ return newBinaryRangeFilter(fieldName, minTerm == null ? null : new BytesRef(minTerm), minInclusive, maxTerm == null ? null : new BytesRef(maxTerm), maxInclusive);
}
- public Filter newRangeFilter(String fieldName, BytesRef minTerm, boolean minInclusive, BytesRef maxTerm, boolean maxInclusive) {
+ public Filter newBinaryRangeFilter(String fieldName, BytesRef minTerm, boolean minInclusive, BytesRef maxTerm, boolean maxInclusive) {
// Field must exist:
FieldType fieldType = getFieldType(fieldName);
// Field must be indexed:
if (fieldType.indexOptions == IndexOptions.NONE) {
- illegalState(fieldName, "cannot create range query: this field was not indexed");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
}
if (fieldType.fastRanges != Boolean.TRUE) {
- illegalState(fieldName, "this field was not indexed for fast ranges");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
}
- // nocommit verify type is BINARY or ATOM?
+ if (fieldType.valueType != ValueType.ATOM && fieldType.valueType != ValueType.BINARY) {
+ illegalState(fieldName, "cannot create range filter: this field was not indexed as value type ATOM or BINARY; got: " + fieldType.valueType);
+ }
- return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, minTerm, minInclusive, maxTerm, maxInclusive));
}
- // nocommit Date sugar for a range query matching a specific hour/day/month/year/etc.? need locale/timezone... should we use DateTools?
+ // nocommit newRangeFilter(Date)
+
+ // nocommit Date sugar for a range filter matching a specific hour/day/month/year/etc.? need locale/timezone... should we use DateTools?
public Filter newRangeFilter(String fieldName, Date min, boolean minInclusive, Date max, boolean maxInclusive) {
@@ -3222,21 +3531,22 @@ public class FieldTypes {
// Field must be indexed:
if (fieldType.indexOptions == IndexOptions.NONE) {
- illegalState(fieldName, "cannot create range query: this field was not indexed");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
}
if (fieldType.valueType != ValueType.DATE) {
- illegalState(fieldName, "cannot create range query: expected valueType=DATE but got: " + fieldType.valueType);
+ illegalState(fieldName, "cannot create range filter: expected value type=DATE but got: " + fieldType.valueType);
}
if (fieldType.fastRanges != Boolean.TRUE) {
- illegalState(fieldName, "this field was not indexed for fast ranges");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
}
- BytesRef minTerm = min == null ? null : Document.longToBytes(min.getTime());
- BytesRef maxTerm = max == null ? null : Document.longToBytes(max.getTime());
+ BytesRef minTerm = min == null ? null : NumericUtils.longToBytes(min.getTime());
+ BytesRef maxTerm = max == null ? null : NumericUtils.longToBytes(max.getTime());
- return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
}
// nocommit also add "range filter using net mask" sugar version
@@ -3247,21 +3557,22 @@ public class FieldTypes {
// Field must be indexed:
if (fieldType.indexOptions == IndexOptions.NONE) {
- illegalState(fieldName, "cannot create range query: this field was not indexed");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed");
}
if (fieldType.valueType != ValueType.INET_ADDRESS) {
- illegalState(fieldName, "cannot create range query: expected valueType=INET_ADDRESS but got: " + fieldType.valueType);
+ illegalState(fieldName, "cannot create range filter: expected value type=INET_ADDRESS but got: " + fieldType.valueType);
}
if (fieldType.fastRanges != Boolean.TRUE) {
- illegalState(fieldName, "this field was not indexed for fast ranges");
+ illegalState(fieldName, "cannot create range filter: this field was not indexed for fast ranges");
}
BytesRef minTerm = min == null ? null : new BytesRef(min.getAddress());
BytesRef maxTerm = max == null ? null : new BytesRef(max.getAddress());
- return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive,
+ getRangeFilterDesc(fieldType, min, minInclusive, max, maxInclusive));
}
// nocommit newPhraseQuery?
@@ -3373,7 +3684,7 @@ public class FieldTypes {
sortField = new SortField(fieldName, compSource, reverse) {
@Override
public String toString() {
- return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+ return "<halffloat" + ": \"" + fieldName + "\" missingLast=" + fieldType.sortMissingLast + ">";
}
};
@@ -3465,61 +3776,33 @@ public class FieldTypes {
case BIG_INT:
// nocommit fixme
+ throw new UnsupportedOperationException();
+
+ case SHORT_TEXT:
+ case ATOM:
+ case BINARY:
+ case BOOLEAN:
+ case INET_ADDRESS:
+ SortField sortField;
{
- SortField sortField;
if (fieldType.multiValued == Boolean.TRUE) {
- // nocommit todo
- throw new UnsupportedOperationException();
- } else {
-
- final Float missingValue;
-
- if (fieldType.sortMissingLast == Boolean.TRUE) {
- if (reverse.booleanValue()) {
- missingValue = Float.NEGATIVE_INFINITY;
- } else {
- missingValue = Float.POSITIVE_INFINITY;
- }
- } else {
- assert fieldType.sortMissingLast == Boolean.FALSE;
- if (reverse.booleanValue()) {
- missingValue = Float.POSITIVE_INFINITY;
- } else {
- missingValue = Float.NEGATIVE_INFINITY;
- }
- }
-
+ // nocommit need to be able to set selector...
+ sortField = new SortedSetSortField(fieldName, reverse);
+ } else if (fieldType.sortKey != null) {
FieldComparatorSource compSource = new FieldComparatorSource() {
@Override
- public FieldComparator<Float> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
- return new HalfFloatComparator(numHits, fieldName, missingValue);
+ public FieldComparator<BytesRef> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+ return new SortKeyComparator(numHits, fieldName, fieldType.sortMissingLast == Boolean.TRUE, fieldType.sortKey);
}
};
sortField = new SortField(fieldName, compSource, reverse) {
@Override
public String toString() {
- return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+ return "<custom-sort-key" + ": \"" + fieldName + "\" missingLast=" + fieldType.sortMissingLast + ">";
}
};
- // nocommit not needed?
- sortField.setMissingValue(missingValue);
- }
-
- return sortField;
- }
-
- case SHORT_TEXT:
- case ATOM:
- case BINARY:
- case BOOLEAN:
- case INET_ADDRESS:
- SortField sortField;
- {
- if (fieldType.multiValued == Boolean.TRUE) {
- // nocommit need to be able to set selector...
- sortField = new SortedSetSortField(fieldName, reverse);
} else if (fieldType.docValuesType == DocValuesType.BINARY) {
sortField = new SortField(fieldName, SortField.Type.STRING_VAL, reverse);
} else {
@@ -3545,7 +3828,7 @@ public class FieldTypes {
default:
// BUG
- illegalState(fieldName, "unhandled sort case, valueType=" + fieldType.valueType);
+ illegalState(fieldName, "unhandled sort case, value type=" + fieldType.valueType);
// Dead code but javac disagrees:
return null;
@@ -3577,8 +3860,16 @@ public class FieldTypes {
if (readOnly) {
throw new IllegalStateException("cannot make changes to a read-only FieldTypes (it was opened from an IndexReader, not an IndexWriter)");
}
+ if (closed) {
+ throw new AlreadyClosedException("this FieldTypes has been closed");
+ }
}
+ // nocommit make this private, ie only IW can invoke it
+ public void close() {
+ closed = true;
+ }
+
static void illegalState(String fieldName, String message) {
throw new IllegalStateException("field \"" + fieldName + "\": " + message);
}
@@ -3797,5 +4088,18 @@ public class FieldTypes {
}
}
+ /** Returns true if terms should be left-zero-padded (sorted as if they were right-justified). */
+ public boolean rightJustifyTerms(String fieldName) {
+ FieldType fieldType = fields.get(fieldName);
+ return fieldType != null && fieldType.valueType == ValueType.BIG_INT;
+ }
+
+ public ValueType getValueType(String fieldName) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ return fieldType.valueType;
+ }
+
// nocommit add sugar to wrap long NDVs as float/double?
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java Sun Dec 7 10:52:03 2014
@@ -18,6 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.util.HalfFloat;
+import org.apache.lucene.util.NumericUtils;
// nocommit can't we do all numeric comparators this way? if we fix all numeric dv to write sortable versions?
class HalfFloatComparator extends FieldComparator.NumericComparator<Float> {
@@ -33,7 +35,7 @@ class HalfFloatComparator extends FieldC
public HalfFloatComparator(int numHits, String field, Float missingValue) {
super(field, missingValue);
values = new short[numHits];
- missingShortValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(missingValue));
+ missingShortValue = (short) NumericUtils.sortableHalfFloatBits(HalfFloat.floatToShortBits(missingValue));
}
@Override
@@ -73,12 +75,12 @@ class HalfFloatComparator extends FieldC
@Override
public void setTopValue(Float value) {
- topValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(value));
+ topValue = (short) NumericUtils.sortableHalfFloatBits(HalfFloat.floatToShortBits(value));
}
@Override
public Float value(int slot) {
- return Document.sortableShortToFloat(values[slot]);
+ return NumericUtils.shortToHalfFloat(values[slot]);
}
@Override
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/SortKeyComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/SortKeyComparator.java?rev=1643659&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/SortKeyComparator.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/SortKeyComparator.java Sun Dec 7 10:52:03 2014
@@ -0,0 +1,143 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+
+/** Sorts a field by a provided deferenced sort key. */
+class SortKeyComparator extends FieldComparator<BytesRef> {
+
+ // TODO: we could cache the sort keys...
+ private final BytesRef[] values;
+ private final BytesRefBuilder[] tempBRs;
+ private BinaryDocValues docTerms;
+ private Bits docsWithField;
+ private final String field;
+ private BytesRef bottom;
+ private BytesRef topValue;
+ private final int missingSortCmp;
+ private final FieldTypes.SortKey sortKey;
+
+ /** Sole constructor. */
+ public SortKeyComparator(int numHits, String field, boolean sortMissingLast, FieldTypes.SortKey sortKey) {
+ values = new BytesRef[numHits];
+ tempBRs = new BytesRefBuilder[numHits];
+ this.sortKey = sortKey;
+ this.field = field;
+ missingSortCmp = sortMissingLast ? 1 : -1;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ return compareValues(values[slot1], values[slot2]);
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
+ return compareValues(bottom, comparableBytes);
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
+ if (comparableBytes == null) {
+ values[slot] = null;
+ } else {
+ if (tempBRs[slot] == null) {
+ tempBRs[slot] = new BytesRefBuilder();
+ }
+ tempBRs[slot].copyBytes(comparableBytes);
+ values[slot] = tempBRs[slot].get();
+ }
+ }
+
+ /** Check whether the given value represents <tt>null</tt>. This can be
+ * useful if the {@link BinaryDocValues} returned by {@link #getBinaryDocValues}
+ * use a special value as a sentinel. The default implementation checks
+ * {@link #getDocsWithField}.
+ * <p>NOTE: The null value can only be an EMPTY {@link BytesRef}. */
+ protected boolean isNull(int doc, BytesRef term) {
+ return docsWithField != null && docsWithField.get(doc) == false;
+ }
+
+ @Override
+ public FieldComparator<BytesRef> setNextReader(LeafReaderContext context) throws IOException {
+ docTerms = DocValues.getBinary(context.reader(), field);
+ docsWithField = DocValues.getDocsWithField(context.reader(), field);
+ if (docsWithField instanceof Bits.MatchAllBits) {
+ docsWithField = null;
+ }
+ return this;
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public void setTopValue(BytesRef value) {
+ // null is fine: it means the last doc of the prior
+ // search was missing this value
+ topValue = value;
+ }
+
+ @Override
+ public BytesRef value(int slot) {
+ return values[slot];
+ }
+
+ @Override
+ public int compareValues(BytesRef val1, BytesRef val2) {
+ if (val1 == null) {
+ if (val2 == null) {
+ return 0;
+ }
+ return missingSortCmp;
+ } else if (val2 == null) {
+ return -missingSortCmp;
+ }
+ return sortKey.getKey(val1).compareTo(sortKey.getKey(val2));
+ }
+
+ @Override
+ public int compareTop(int doc) {
+ final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
+ return compareValues(topValue, comparableBytes);
+ }
+
+ /**
+ * Given a document and a term, return the term itself if it exists or
+ * <tt>null</tt> otherwise.
+ */
+ private BytesRef getComparableBytes(int doc, BytesRef term) {
+ if (term.length == 0 && docsWithField != null && docsWithField.get(doc) == false) {
+ return null;
+ }
+ return term;
+ }
+}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Sun Dec 7 10:52:03 2014
@@ -574,7 +574,7 @@ final class DefaultIndexingChain extends
void setInvertState() {
invertState = new FieldInvertState(fieldInfo.name);
- termsHashPerField = termsHash.addField(invertState, fieldInfo);
+ termsHashPerField = termsHash.addField(invertState, fieldInfo, docWriter.writer.rightJustifyTerms(fieldInfo.name));
if (fieldInfo.omitsNorms() == false) {
assert norms == null;
// Even if no documents actually succeed in setting a norm, we still write norms for this segment:
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java Sun Dec 7 10:52:03 2014
@@ -144,6 +144,7 @@ class FreqProxFields extends Fields {
final int[] sortedTermIDs;
final FreqProxPostingsArray postingsArray;
final BytesRef scratch = new BytesRef();
+ final byte[] scratchBytes;
final int numTerms;
final Bits liveDocs;
int ord;
@@ -155,6 +156,11 @@ class FreqProxFields extends Fields {
sortedTermIDs = terms.sortedTermIDs;
assert sortedTermIDs != null;
postingsArray = (FreqProxPostingsArray) terms.postingsArray;
+ if (terms.rightJustifyTerms) {
+ scratchBytes = new byte[terms.maxTermLength];
+ } else {
+ scratchBytes = null;
+ }
}
public void reset() {
@@ -172,6 +178,7 @@ class FreqProxFields extends Fields {
int mid = (lo + hi) >>> 1;
int textStart = postingsArray.textStarts[sortedTermIDs[mid]];
terms.bytePool.setBytesRef(scratch, textStart);
+ maybeLeftZeroPad();
int cmp = scratch.compareTo(text);
if (cmp < 0) {
lo = mid + 1;
@@ -192,15 +199,30 @@ class FreqProxFields extends Fields {
} else {
int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
terms.bytePool.setBytesRef(scratch, textStart);
+ maybeLeftZeroPad();
assert term().compareTo(text) > 0;
return SeekStatus.NOT_FOUND;
}
}
+ private void maybeLeftZeroPad() {
+ if (terms.rightJustifyTerms) {
+ int prefix = terms.maxTermLength - scratch.length;
+ for(int i=0;i<prefix;i++) {
+ scratchBytes[i] = 0;
+ }
+ System.arraycopy(scratch.bytes, scratch.offset, scratchBytes, prefix, scratch.length);
+ scratch.bytes = scratchBytes;
+ scratch.offset = 0;
+ scratch.length = terms.maxTermLength;
+ }
+ }
+
public void seekExact(long ord) {
this.ord = (int) ord;
int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
terms.bytePool.setBytesRef(scratch, textStart);
+ maybeLeftZeroPad();
}
@Override
@@ -211,6 +233,7 @@ class FreqProxFields extends Fields {
} else {
int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
terms.bytePool.setBytesRef(scratch, textStart);
+ maybeLeftZeroPad();
return scratch;
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sun Dec 7 10:52:03 2014
@@ -121,7 +121,7 @@ final class FreqProxTermsWriter extends
}
@Override
- public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
- return new FreqProxTermsWriterPerField(invertState, this, fieldInfo, nextTermsHash.addField(invertState, fieldInfo));
+ public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo, boolean rightJustifyTerms) {
+ return new FreqProxTermsWriterPerField(invertState, this, fieldInfo, nextTermsHash.addField(invertState, fieldInfo, rightJustifyTerms), rightJustifyTerms);
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Sun Dec 7 10:52:03 2014
@@ -30,7 +30,6 @@ import org.apache.lucene.util.RamUsageEs
final class FreqProxTermsWriterPerField extends TermsHashPerField {
private FreqProxPostingsArray freqProxPostingsArray;
-
final boolean hasFreq;
final boolean hasProx;
final boolean hasOffsets;
@@ -46,8 +45,8 @@ final class FreqProxTermsWriterPerField
* segment. */
boolean sawPayloads;
- public FreqProxTermsWriterPerField(FieldInvertState invertState, TermsHash termsHash, FieldInfo fieldInfo, TermsHashPerField nextPerField) {
- super(fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? 2 : 1, invertState, termsHash, nextPerField, fieldInfo);
+ public FreqProxTermsWriterPerField(FieldInvertState invertState, TermsHash termsHash, FieldInfo fieldInfo, TermsHashPerField nextPerField, boolean rightJustifyTerms) {
+ super(fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? 2 : 1, invertState, termsHash, nextPerField, fieldInfo, rightJustifyTerms);
IndexOptions indexOptions = fieldInfo.getIndexOptions();
assert indexOptions != IndexOptions.NONE;
hasFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java Sun Dec 7 10:52:03 2014
@@ -27,7 +27,7 @@ import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Document2StoredFieldVisitor;
+import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.Bits; // javadocs
@@ -374,7 +374,7 @@ public abstract class IndexReader implem
// Document returned here contains that class not
// IndexableField
public final Document document(int docID) throws IOException {
- final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes());
+ final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(getFieldTypes());
document(docID, visitor);
return visitor.getDocument();
}
@@ -386,7 +386,7 @@ public abstract class IndexReader implem
*/
public final Document document(int docID, Set<String> fieldsToLoad)
throws IOException {
- final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes(), fieldsToLoad);
+ final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(getFieldTypes(), fieldsToLoad);
document(docID, visitor);
return visitor.getDocument();
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Sun Dec 7 10:52:03 2014
@@ -889,6 +889,7 @@ public class IndexWriter implements Clos
}
public Document newDocument() {
+ ensureOpen();
return new Document(fieldTypes);
}
@@ -1939,6 +1940,7 @@ public class IndexWriter implements Clos
bufferedUpdatesStream.clear();
docWriter.close(); // mark it as closed first to prevent subsequent indexing actions/flushes
docWriter.abort(this); // don't sync on IW here
+ fieldTypes.close();
synchronized(this) {
if (pendingCommit != null) {
@@ -4618,13 +4620,13 @@ public class IndexWriter implements Clos
// nocommit explore other optos once we know field is unique
- synchronized LiveUniqueValues getUniqueValues(String uidFieldName) {
+ synchronized LiveUniqueValues getUniqueValues(String fieldName) {
LiveUniqueValues v;
- if (fieldTypes.getIsUnique(uidFieldName)) {
- v = uniqueValues.get(uidFieldName);
+ if (fieldTypes.getIsUnique(fieldName)) {
+ v = uniqueValues.get(fieldName);
if (v == null) {
- v = new LiveUniqueValues(uidFieldName, readerManager);
- uniqueValues.put(uidFieldName, v);
+ v = new LiveUniqueValues(fieldName, readerManager);
+ uniqueValues.put(fieldName, v);
}
} else {
v = null;
@@ -4632,4 +4634,8 @@ public class IndexWriter implements Clos
return v;
}
+
+ boolean rightJustifyTerms(String fieldName) {
+ return fieldTypes.rightJustifyTerms(fieldName);
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java Sun Dec 7 10:52:03 2014
@@ -521,7 +521,7 @@ public class MultiDocValues {
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
}
- MultiTermsEnum mte = new MultiTermsEnum(slices);
+ MultiTermsEnum mte = new MultiTermsEnum(slices, -1);
mte.reset(indexes);
long globalOrd = 0;
while (mte.next() != null) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java Sun Dec 7 10:52:03 2014
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MergedIterator;
@@ -79,7 +80,7 @@ public final class MultiFields extends F
return fields.get(0);
} else {
return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
- slices.toArray(ReaderSlice.EMPTY_ARRAY));
+ slices.toArray(ReaderSlice.EMPTY_ARRAY));
}
}
}
@@ -185,6 +186,12 @@ public final class MultiFields extends F
this.subSlices = subSlices;
}
+ private FieldTypes fieldTypes;
+
+ public void setFieldTypes(FieldTypes fieldTypes) {
+ this.fieldTypes = fieldTypes;
+ }
+
@SuppressWarnings({"unchecked","rawtypes"})
@Override
public Iterator<String> iterator() {
@@ -201,7 +208,6 @@ public final class MultiFields extends F
if (result != null)
return result;
-
// Lazy init: first time this field is requested, we
// create & add to terms:
final List<Terms> subs2 = new ArrayList<>();
@@ -221,7 +227,7 @@ public final class MultiFields extends F
// is unbounded.
} else {
result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
- slices2.toArray(ReaderSlice.EMPTY_ARRAY));
+ slices2.toArray(ReaderSlice.EMPTY_ARRAY), fieldTypes != null && fieldTypes.rightJustifyTerms(field));
terms.put(field, result);
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Sun Dec 7 10:52:03 2014
@@ -39,6 +39,9 @@ public final class MultiTerms extends Te
private final boolean hasOffsets;
private final boolean hasPositions;
private final boolean hasPayloads;
+ private final boolean rightJustifyTerms;
+ private final int[] termLengths;
+ private final int maxTermLength;
/** Sole constructor.
*
@@ -46,22 +49,30 @@ public final class MultiTerms extends Te
* @param subSlices A parallel array (matching {@code
* subs}) describing the sub-reader slices.
*/
- public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
+ public MultiTerms(Terms[] subs, ReaderSlice[] subSlices, boolean rightJustifyTerms) throws IOException {
this.subs = subs;
this.subSlices = subSlices;
+ this.rightJustifyTerms = rightJustifyTerms;
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
boolean _hasFreqs = true;
boolean _hasOffsets = true;
boolean _hasPositions = true;
boolean _hasPayloads = false;
+ termLengths = new int[subs.length];
+ int maxTermLength = Integer.MIN_VALUE;
for(int i=0;i<subs.length;i++) {
_hasFreqs &= subs[i].hasFreqs();
_hasOffsets &= subs[i].hasOffsets();
_hasPositions &= subs[i].hasPositions();
_hasPayloads |= subs[i].hasPayloads();
+ BytesRef minTerm = subs[i].getMin();
+ termLengths[i] = minTerm == null ? -1 : minTerm.length;
+ assert rightJustifyTerms == false || (termLengths[i] != -1 && termLengths[i] == subs[i].getMax().length);
+ maxTermLength = Math.max(maxTermLength, termLengths[i]);
}
+ this.maxTermLength = rightJustifyTerms ? maxTermLength : -1;
hasFreqs = _hasFreqs;
hasOffsets = _hasOffsets;
hasPositions = _hasPositions;
@@ -80,6 +91,7 @@ public final class MultiTerms extends Te
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+ // nocommit doesn't work w/ rightJustifyTerms?
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
@@ -89,7 +101,7 @@ public final class MultiTerms extends Te
}
if (termsEnums.size() > 0) {
- return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
+ return new MultiTermsEnum(subSlices, maxTermLength).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
@@ -133,7 +145,7 @@ public final class MultiTerms extends Te
}
if (termsEnums.size() > 0) {
- return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
+ return new MultiTermsEnum(subSlices, maxTermLength).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java Sun Dec 7 10:52:03 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Comparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -39,6 +40,8 @@ public final class MultiTermsEnum extend
private final TermsEnumWithSlice[] top;
private final MultiDocsEnum.EnumWithSlice[] subDocs;
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
+ private final int zeroPadTermLength;
+ private final Comparator<BytesRef> cmp;
private BytesRef lastSeek;
private boolean lastSeekExact;
@@ -47,6 +50,7 @@ public final class MultiTermsEnum extend
private int numTop;
private int numSubs;
private BytesRef current;
+ private BytesRef scratch;
static class TermsEnumIndex {
public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
@@ -73,12 +77,19 @@ public final class MultiTermsEnum extend
/** Sole constructor.
* @param slices Which sub-reader slices we should
* merge. */
- public MultiTermsEnum(ReaderSlice[] slices) {
+ public MultiTermsEnum(ReaderSlice[] slices, int zeroPadTermLength) {
queue = new TermMergeQueue(slices.length);
top = new TermsEnumWithSlice[slices.length];
subs = new TermsEnumWithSlice[slices.length];
subDocs = new MultiDocsEnum.EnumWithSlice[slices.length];
subDocsAndPositions = new MultiDocsAndPositionsEnum.EnumWithSlice[slices.length];
+ this.zeroPadTermLength = zeroPadTermLength;
+ if (zeroPadTermLength != -1) {
+ scratch = new BytesRef(zeroPadTermLength);
+ scratch.length = zeroPadTermLength;
+ } else {
+ scratch = null;
+ }
for(int i=0;i<slices.length;i++) {
subs[i] = new TermsEnumWithSlice(i, slices[i]);
subDocs[i] = new MultiDocsEnum.EnumWithSlice();
@@ -87,6 +98,11 @@ public final class MultiTermsEnum extend
subDocsAndPositions[i].slice = slices[i];
}
currentSubs = new TermsEnumWithSlice[slices.length];
+ if (zeroPadTermLength == -1) {
+ cmp = BytesRef.getUTF8SortedAsUnicodeComparator();
+ } else {
+ cmp = BytesRef.getRightJustifiedComparator();
+ }
}
@Override
@@ -130,7 +146,7 @@ public final class MultiTermsEnum extend
numTop = 0;
boolean seekOpt = false;
- if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
+ if (lastSeek != null && cmp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
@@ -148,10 +164,10 @@ public final class MultiTermsEnum extend
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
- final int cmp = term.compareTo(curTerm);
- if (cmp == 0) {
+ final int x = cmp.compare(term, curTerm);
+ if (x == 0) {
status = true;
- } else if (cmp < 0) {
+ } else if (x < 0) {
status = false;
} else {
status = currentSubs[i].terms.seekExact(term);
@@ -166,6 +182,7 @@ public final class MultiTermsEnum extend
if (status) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
+ maybeLeftZeroPad();
assert term.equals(currentSubs[i].current);
}
}
@@ -175,6 +192,18 @@ public final class MultiTermsEnum extend
return numTop > 0;
}
+ private void maybeLeftZeroPad() {
+ if (zeroPadTermLength != -1 && current != null) {
+ int prefix = zeroPadTermLength - current.length;
+ assert prefix >= 0: "prefix=" + prefix + " zeroPadTermLength=" + zeroPadTermLength + " vs " + current.length;
+ for(int i=0;i<prefix;i++) {
+ scratch.bytes[i] = 0;
+ }
+ System.arraycopy(current.bytes, current.offset, scratch.bytes, prefix, current.length);
+ current = scratch;
+ }
+ }
+
@Override
public SeekStatus seekCeil(BytesRef term) throws IOException {
queue.clear();
@@ -182,7 +211,7 @@ public final class MultiTermsEnum extend
lastSeekExact = false;
boolean seekOpt = false;
- if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
+ if (lastSeek != null && cmp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
@@ -200,10 +229,10 @@ public final class MultiTermsEnum extend
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
- final int cmp = term.compareTo(curTerm);
- if (cmp == 0) {
+ final int x = cmp.compare(term, curTerm);
+ if (x == 0) {
status = SeekStatus.FOUND;
- } else if (cmp < 0) {
+ } else if (x < 0) {
status = SeekStatus.NOT_FOUND;
} else {
status = currentSubs[i].terms.seekCeil(term);
@@ -218,6 +247,7 @@ public final class MultiTermsEnum extend
if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
+ maybeLeftZeroPad();
} else {
if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term();
@@ -265,6 +295,7 @@ public final class MultiTermsEnum extend
}
}
current = top[0].current;
+ maybeLeftZeroPad();
}
private void pushTop() throws IOException {
@@ -498,16 +529,16 @@ public final class MultiTermsEnum extend
}
}
- private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
+ private final class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
TermMergeQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
- final int cmp = termsA.current.compareTo(termsB.current);
- if (cmp != 0) {
- return cmp < 0;
+ final int x = cmp.compare(termsA.current, termsB.current);
+ if (x != 0) {
+ return x < 0;
} else {
return termsA.subSlice.start < termsB.subSlice.start;
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java Sun Dec 7 10:52:03 2014
@@ -19,7 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
-import org.apache.lucene.document.Document2StoredFieldVisitor; // javadocs
+import org.apache.lucene.document.DocumentStoredFieldVisitor; // javadocs
/**
* Expert: provides a low-level means of accessing the stored field
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java Sun Dec 7 10:52:03 2014
@@ -141,8 +141,8 @@ final class TermVectorsConsumer extends
}
@Override
- public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
- return new TermVectorsConsumerPerField(invertState, this, fieldInfo);
+ public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo, boolean rightJustifyTerms) {
+ return new TermVectorsConsumerPerField(invertState, this, fieldInfo, rightJustifyTerms);
}
void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Sun Dec 7 10:52:03 2014
@@ -39,9 +39,10 @@ final class TermVectorsConsumerPerField
OffsetAttribute offsetAttribute;
PayloadAttribute payloadAttribute;
boolean hasPayloads; // if enabled, and we actually saw any for this field
+ byte[] scratchBytes;
- public TermVectorsConsumerPerField(FieldInvertState invertState, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
- super(2, invertState, termsWriter, null, fieldInfo);
+ public TermVectorsConsumerPerField(FieldInvertState invertState, TermVectorsConsumer termsWriter, FieldInfo fieldInfo, boolean rightJustifyTerms) {
+ super(2, invertState, termsWriter, null, fieldInfo, rightJustifyTerms);
this.termsWriter = termsWriter;
}
@@ -56,6 +57,24 @@ final class TermVectorsConsumerPerField
termsWriter.addFieldToFlush(this);
}
+ private void maybeLeftZeroPad(BytesRef flushTerm) {
+ if (rightJustifyTerms) {
+ // nocommit need to fix checkIndex to deal w/ this properly (it will be angry that sometimes term vectors terms are not left-padded
+ // "enough") since we may not have seen the max term yet when we write this doc:
+
+ // nocommit we could make this a "per document max term" instead?
+ int prefix = maxTermLength - flushTerm.length;
+ assert prefix >= 0;
+ for(int i=0;i<prefix;i++) {
+ scratchBytes[i] = 0;
+ }
+ System.arraycopy(flushTerm.bytes, flushTerm.offset, scratchBytes, prefix, flushTerm.length);
+ flushTerm.bytes = scratchBytes;
+ flushTerm.offset = 0;
+ flushTerm.length = maxTermLength;
+ }
+ }
+
void finishDocument() throws IOException {
if (doVectors == false) {
return;
@@ -63,6 +82,10 @@ final class TermVectorsConsumerPerField
doVectors = false;
+ if (rightJustifyTerms && (scratchBytes == null || scratchBytes.length < maxTermLength)) {
+ scratchBytes = new byte[maxTermLength];
+ }
+
final int numPostings = bytesHash.size();
final BytesRef flushTerm = termsWriter.flushTerm;
@@ -89,6 +112,8 @@ final class TermVectorsConsumerPerField
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
+ maybeLeftZeroPad(flushTerm);
+
tv.startTerm(flushTerm, freq);
if (doVectorPositions || doVectorOffsets) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermsHash.java?rev=1643659&r1=1643658&r2=1643659&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TermsHash.java Sun Dec 7 10:52:03 2014
@@ -86,7 +86,7 @@ abstract class TermsHash {
}
}
- abstract TermsHashPerField addField(FieldInvertState fieldInvertState, FieldInfo fieldInfo);
+ abstract TermsHashPerField addField(FieldInvertState fieldInvertState, FieldInfo fieldInfo, boolean rightJustifyTerms);
void finishDocument() throws IOException {
if (nextTermsHash != null) {