You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/30 12:07:19 UTC
svn commit: r1642535 [3/19] - in /lucene/dev/branches/lucene6005/lucene:
analysis/common/src/java/org/apache/lucene/collation/
analysis/common/src/test/org/apache/lucene/analysis/core/
analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/ ...
Copied: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java (from r1642229, lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java?p2=lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java&p1=lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java&r1=1642229&r2=1642535&rev=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java Sun Nov 30 11:07:09 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.io.Reader;
+import java.math.BigInteger;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Date;
@@ -43,7 +44,7 @@ import org.apache.lucene.util.BytesRef;
* that also tracks field properties implied by the
* fields being added. */
-public class Document2 implements Iterable<IndexableField> {
+public class Document implements Iterable<IndexableField> {
private static final float DEFAULT_BOOST = 1.0f;
@@ -139,12 +140,8 @@ public class Document2 implements Iterab
}
@Override
- public TokenStream tokenStream(Analyzer analyzerIn, TokenStream reuse) throws IOException {
+ public TokenStream tokenStream(TokenStream reuse) throws IOException {
Analyzer analyzer = fieldTypes.getIndexAnalyzer();
- if (analyzerIn != analyzer) {
- // TODO: remove analyzer from IW APIs
- throw new IllegalArgumentException("analyzer must be the instance from FieldTypes: got " + analyzerIn + " vs " + analyzer);
- }
assert fieldTypes.getIndexOptions(fieldName) != IndexOptions.NONE;
@@ -154,12 +151,16 @@ public class Document2 implements Iterab
switch (fieldType.valueType) {
case INT:
return getReusedBinaryTokenStream(intToBytes(((Number) value).intValue()), reuse);
+ case HALF_FLOAT:
+ return getReusedBinaryTokenStream(halfFloatToSortableBytes(((Number) value).floatValue()), reuse);
case FLOAT:
- return getReusedBinaryTokenStream(floatToBytes(((Number) value).floatValue()), reuse);
+ return getReusedBinaryTokenStream(floatToSortableBytes(((Number) value).floatValue()), reuse);
case LONG:
return getReusedBinaryTokenStream(longToBytes(((Number) value).longValue()), reuse);
case DOUBLE:
- return getReusedBinaryTokenStream(doubleToBytes(((Number) value).doubleValue()), reuse);
+ return getReusedBinaryTokenStream(doubleToSortableBytes(((Number) value).doubleValue()), reuse);
+ case BIG_INT:
+ return getReusedBinaryTokenStream(new BytesRef(((BigInteger) value).toByteArray()), reuse);
case DATE:
return getReusedBinaryTokenStream(longToBytes(((Date) value).getTime()), reuse);
case ATOM:
@@ -256,6 +257,7 @@ public class Document2 implements Iterab
switch (fieldType.valueType) {
case INT:
case LONG:
+ case HALF_FLOAT:
case FLOAT:
case DOUBLE:
return (Number) value;
@@ -282,6 +284,13 @@ public class Document2 implements Iterab
return (Number) value;
case LONG:
return (Number) value;
+ case HALF_FLOAT:
+ int shortBits = HalfFloat.floatToIntBits((Float) value);
+ // nocommit different from other numerics:
+ shortBits = sortableHalfFloatBits(shortBits);
+ assert shortBits >= 0 && shortBits <= Short.MAX_VALUE;
+ return Integer.valueOf(shortBits);
+ //return Integer.valueOf(Float.floatToRawIntBits((Float) value));
case FLOAT:
// nocommit i shouldn't do sortableFloatBits? but why does ot TestSortedNumericSortField.testFloat fail?
int intBits = Float.floatToIntBits((Float) value);
@@ -362,6 +371,8 @@ public class Document2 implements Iterab
return new BytesRef(bytes);
} else if (fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
return new BytesRef(((InetAddress) value).getAddress());
+ } else if (fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+ return new BytesRef(((BigInteger) value).toByteArray());
} else if (value instanceof BytesRef) {
return (BytesRef) value;
} else {
@@ -376,9 +387,22 @@ public class Document2 implements Iterab
} else if (fieldType.docValuesType == DocValuesType.BINARY || fieldType.docValuesType == DocValuesType.SORTED || fieldType.docValuesType == DocValuesType.SORTED_SET) {
if (fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
return new BytesRef(((InetAddress) value).getAddress());
+ } else if (fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+ return new BytesRef(((BigInteger) value).toByteArray());
} else if (value instanceof String) {
- // nocommit somewhat evil we utf8-encode your string?
- return new BytesRef((String) value);
+ String s = (String) value;
+ BytesRef br;
+ if (fieldType.sortCollator != null) {
+ // nocommit thread local clones?
+ synchronized (fieldType.sortCollator) {
+ br = new BytesRef(fieldType.sortCollator.getCollationKey(s).toByteArray());
+ }
+ } else {
+ // nocommit somewhat evil we utf8-encode your string?
+ br = new BytesRef(s);
+ }
+
+ return br;
}
}
@@ -395,11 +419,11 @@ public class Document2 implements Iterab
}
}
- public Document2(FieldTypes fieldTypes) {
+ public Document(FieldTypes fieldTypes) {
this(fieldTypes, true);
}
- public Document2(Document2 other) {
+ public Document(Document other) {
this.fieldTypes = other.fieldTypes;
this.changeSchema = other.changeSchema;
if (changeSchema) {
@@ -410,7 +434,7 @@ public class Document2 implements Iterab
addAll(other);
}
- Document2(FieldTypes fieldTypes, boolean changeSchema) {
+ Document(FieldTypes fieldTypes, boolean changeSchema) {
this.fieldTypes = fieldTypes;
this.changeSchema = changeSchema;
if (changeSchema) {
@@ -569,13 +593,21 @@ public class Document2 implements Iterab
/** Default: store this value. */
// nocommit testme, or remove?
public void addStoredInt(String fieldName, int value) {
- // nocommit akward we inferred large_text here?
if (changeSchema) {
fieldTypes.recordStoredValueType(fieldName, FieldTypes.ValueType.INT);
}
fields.add(new FieldValue(fieldName, value));
}
+ // nocommit throw exc if this field was already indexed/dvd?
+ /** Default: store this value. */
+ public void addStoredDouble(String fieldName, double value) {
+ if (changeSchema) {
+ fieldTypes.recordStoredValueType(fieldName, FieldTypes.ValueType.DOUBLE);
+ }
+ fields.add(new FieldValue(fieldName, value));
+ }
+
/** Default: store & DV this value. */
public void addBinary(String fieldName, BytesRef value) {
if (changeSchema) {
@@ -661,6 +693,15 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, Float.valueOf(value)));
}
+ /** Adds half precision (2 bytes) float. Note that the value is stored with 2 bytes in doc values, but in stored fields it's stored as an
+ * ordinary 4 byte float. Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addHalfFloat(String fieldName, float value) {
+ if (changeSchema) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.HALF_FLOAT);
+ }
+ fields.add(new FieldValue(fieldName, Float.valueOf(value)));
+ }
+
/** Default: support for range filtering/querying and sorting (using numeric doc values). */
public void addLong(String fieldName, long value) {
if (changeSchema) {
@@ -685,6 +726,14 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, Double.valueOf(value)));
}
+ /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addBigInteger(String fieldName, BigInteger value) {
+ if (changeSchema) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.BIG_INT);
+ }
+ fields.add(new FieldValue(fieldName, value));
+ }
+
public void addBoolean(String fieldName, boolean value) {
if (changeSchema) {
fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.BOOLEAN);
@@ -715,11 +764,11 @@ public class Document2 implements Iterab
static {
// nocommit is there a cleaner/general way to detect missing enum value in case switch statically? must we use ecj?
- assert FieldTypes.ValueType.values().length == 12: "missing case for switch statement below";
+ assert FieldTypes.ValueType.values().length == 14: "missing case for switch statement below";
}
/** Note: this FieldTypes must already know about all the fields in the incoming doc. */
- public void addAll(Document2 other) {
+ public void addAll(Document other) {
// nocommit should we insist other.fieldTypes == this.fieldTypes? or, that they are "congruent"?
for (IndexableField indexableField : other.fields) {
String fieldName = indexableField.name();
@@ -745,6 +794,9 @@ public class Document2 implements Iterab
case INT:
addInt(fieldName, field.numericValue().intValue());
break;
+ case HALF_FLOAT:
+ addHalfFloat(fieldName, field.numericValue().floatValue());
+ break;
case FLOAT:
addFloat(fieldName, field.numericValue().floatValue());
break;
@@ -754,6 +806,9 @@ public class Document2 implements Iterab
case DOUBLE:
addDouble(fieldName, field.numericValue().doubleValue());
break;
+ case BIG_INT:
+ addBigInteger(fieldName, (BigInteger) field.value);
+ break;
case BINARY:
addStored(fieldName, field.binaryValue());
break;
@@ -802,10 +857,31 @@ public class Document2 implements Iterab
return token;
}
- public static BytesRef floatToBytes(float value) {
+ public static BytesRef shortToBytes(short v) {
+ int sortableBits = v ^ 0x8000;
+ BytesRef token = new BytesRef(2);
+ token.length = 2;
+ int index = 1;
+ while (index >= 0) {
+ token.bytes[index] = (byte) (sortableBits & 0xff);
+ index--;
+ sortableBits >>>= 8;
+ }
+ return token;
+ }
+
+ public static BytesRef floatToSortableBytes(float value) {
return intToBytes(sortableFloatBits(Float.floatToIntBits(value)));
}
+ public static BytesRef halfFloatToSortableBytes(float value) {
+ return shortToBytes((short) sortableHalfFloatBits(HalfFloat.floatToIntBits(value)));
+ }
+
+ public static int floatToSortableInt(float value) {
+ return sortableFloatBits(Float.floatToIntBits(value));
+ }
+
/** Converts numeric DV field back to double. */
public static double sortableLongToDouble(long v) {
return Double.longBitsToDouble(sortableDoubleBits(v));
@@ -826,6 +902,11 @@ public class Document2 implements Iterab
return Float.intBitsToFloat(v);
}
+ /** Converts numeric DV field back to float. */
+ public static float sortableShortToFloat(short v) {
+ return HalfFloat.intBitsToFloat(sortableHalfFloatBits(v));
+ }
+
// nocommit move elsewhere?
public static int bytesToInt(BytesRef bytes) {
if (bytes.length != 4) {
@@ -839,6 +920,19 @@ public class Document2 implements Iterab
return sortableBits ^ 0x80000000;
}
+ // nocommit move elsewhere?
+ public static int bytesToShort(BytesRef bytes) {
+ if (bytes.length != 2) {
+ throw new IllegalArgumentException("incoming bytes should be length=2; got length=" + bytes.length);
+ }
+ int sortableBits = 0;
+ for(int i=0;i<2;i++) {
+ sortableBits = (sortableBits << 8) | bytes.bytes[bytes.offset + i] & 0xff;
+ }
+
+ return sortableBits ^ 0x8000;
+ }
+
public static BytesRef longToBytes(long v) {
long sortableBits = v ^ 0x8000000000000000L;
BytesRef token = new BytesRef(8);
@@ -852,10 +946,14 @@ public class Document2 implements Iterab
return token;
}
- public static BytesRef doubleToBytes(double value) {
+ public static BytesRef doubleToSortableBytes(double value) {
return longToBytes(sortableDoubleBits(Double.doubleToLongBits(value)));
}
+ public static long doubleToSortableLong(double value) {
+ return sortableDoubleBits(Double.doubleToLongBits(value));
+ }
+
// nocommit move elsewhere?
public static long bytesToLong(BytesRef bytes) {
if (bytes.length != 8) {
@@ -875,6 +973,11 @@ public class Document2 implements Iterab
}
// nocommit move elsewhere?
+ public static float bytesToHalfFloat(BytesRef bytes) {
+ return HalfFloat.intBitsToFloat(sortableHalfFloatBits(bytesToShort(bytes)));
+ }
+
+ // nocommit move elsewhere?
public static double bytesToDouble(BytesRef bytes) {
return Double.longBitsToDouble(sortableDoubleBits(bytesToLong(bytes)));
}
@@ -889,6 +992,12 @@ public class Document2 implements Iterab
return bits ^ (bits >> 31) & 0x7fffffff;
}
+ /** Converts IEEE 754 representation of a half float to sortable order (or back to the original) */
+ // nocommit short?
+ public static int sortableHalfFloatBits(int bits) {
+ return bits ^ (bits >> 15) & 0x7fff;
+ }
+
public Boolean getBoolean(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
FieldValue fieldValue = getFirstFieldValue(fieldName);
@@ -921,6 +1030,16 @@ public class Document2 implements Iterab
}
}
+ public BigInteger getBigInteger(String fieldName) {
+ // nocommit can we assert this is a known field and that its type is correct?
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (BigInteger) fieldValue.value;
+ }
+ }
+
public String getString(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
FieldValue fieldValue = getFirstFieldValue(fieldName);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java Sun Nov 30 11:07:09 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.document;
*/
import java.io.IOException;
+import java.math.BigInteger;
import java.net.InetAddress;
import java.util.Date;
import java.util.HashSet;
@@ -38,7 +39,7 @@ import org.apache.lucene.util.BytesRef;
* @lucene.experimental */
public class Document2StoredFieldVisitor extends StoredFieldVisitor {
- private final Document2 doc;
+ private final Document doc;
private final Set<String> fieldsToAdd;
private final FieldTypes fieldTypes;
@@ -47,14 +48,14 @@ public class Document2StoredFieldVisitor
* @param fieldsToAdd Set of fields to load, or <code>null</code> (all fields).
*/
public Document2StoredFieldVisitor(FieldTypes fieldTypes, Set<String> fieldsToAdd) {
- doc = new Document2(fieldTypes, false);
+ doc = new Document(fieldTypes, false);
this.fieldTypes = fieldTypes;
this.fieldsToAdd = fieldsToAdd;
}
/** Load only fields named in the provided fields. */
public Document2StoredFieldVisitor(FieldTypes fieldTypes, String... fields) {
- doc = new Document2(fieldTypes, false);
+ doc = new Document(fieldTypes, false);
this.fieldTypes = fieldTypes;
fieldsToAdd = new HashSet<>(fields.length);
for(String field : fields) {
@@ -64,7 +65,7 @@ public class Document2StoredFieldVisitor
/** Load all stored fields. */
public Document2StoredFieldVisitor(FieldTypes fieldTypes) {
- doc = new Document2(fieldTypes, false);
+ doc = new Document(fieldTypes, false);
this.fieldTypes = fieldTypes;
this.fieldsToAdd = null;
}
@@ -84,6 +85,8 @@ public class Document2StoredFieldVisitor
FieldTypes.FieldType fieldType = getFieldType(fieldInfo.name);
if (fieldType != null && fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
doc.addInetAddress(fieldInfo.name, InetAddress.getByAddress(value));
+ } else if (fieldType != null && fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+ doc.addBigInteger(fieldInfo.name, new BigInteger(value));
} else {
doc.addBinary(fieldInfo.name, new BytesRef(value));
}
@@ -134,12 +137,12 @@ public class Document2StoredFieldVisitor
/**
* Retrieve the visited document.
- * @return {@link Document2} populated with stored fields. Note that only
+ * @return {@link Document} populated with stored fields. Note that only
* the stored information in the field instances is valid,
* data such as indexing options, term vector options,
* etc is not set.
*/
- public Document2 getDocument() {
+ public Document getDocument() {
return doc;
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Sun Nov 30 11:07:09 2014
@@ -18,12 +18,15 @@ package org.apache.lucene.document;
*/
import java.io.IOException;
+import java.math.BigInteger;
import java.net.InetAddress;
+import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
@@ -46,6 +49,8 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -98,6 +103,17 @@ import org.apache.lucene.util.Version;
// tie into faceting
// tie into index sorting
+// nocommit sugar API to retrieve values from DVs or stored fields or whatever?
+
+// nocommit how will future back-compat work? segment must store field types as of when it was written?
+
+// nocommit how to make this more extensible? e.g. so I can say "this field will facet, hierarchical, etc."
+
+
+// nocommit expose DocValuesRangeFilter?
+
+// nocommit PH should take this and validate highlighting was enabled?
+
// nocommit a segment should store the field type as of when it was written? on upgrade/reindex we can use that?
// nocommit addStored should take numbers too?
@@ -201,8 +217,6 @@ import org.apache.lucene.util.Version;
// nocommit fix all change methods to call validate / rollback
-// nocommit float16?
-
// nocommit can we move multi-field-ness out of IW? so IW only gets a single instance of each field
// nocommit nested/parent/child docs?
@@ -215,16 +229,19 @@ import org.apache.lucene.util.Version;
// nocommit required? not null?
+// nocommit BigInt?
+
+// nocommit BigDecimal?
+
/** Records how each field is indexed, stored, etc. This class persists
* its state using {@link IndexWriter#setCommitData}, using the
* {@link FieldTypes#FIELD_PROPERTIES_KEY} key. */
-// nocommit what about uniqueAtom number int/long? maybe break out isUnique? then, e.g. like norms, you could have unique set, but maybe
-// later turn it off
+public class FieldTypes {
-// nocommit IW should detect if incoming document's fieldTypes != its own
+ public static final int DEFAULT_POSITION_GAP = 0;
-public class FieldTypes {
+ public static final int DEFAULT_OFFSET_GAP = 1;
enum ValueType {
NONE,
@@ -232,9 +249,11 @@ public class FieldTypes {
SHORT_TEXT,
ATOM, // nocommit binary sort of overlaps w/ this?
INT,
+ HALF_FLOAT,
FLOAT,
LONG,
DOUBLE,
+ BIG_INT,
BINARY, // nocommit rename to bytes?
BOOLEAN,
DATE,
@@ -268,7 +287,7 @@ public class FieldTypes {
private long changeCount;
/** Just like current oal.document.FieldType, except for each setting it can also record "not-yet-set". */
- static class FieldType implements IndexableFieldType, Cloneable {
+ class FieldType implements IndexableFieldType, Cloneable {
private final String name;
// Lucene version when we were created:
@@ -392,14 +411,19 @@ public class FieldTypes {
private volatile Analyzer wrappedIndexAnalyzer;
private volatile Analyzer wrappedQueryAnalyzer;
+ Locale sortLocale;
+ Collator sortCollator;
+
boolean validate() {
switch (valueType) {
case NONE:
break;
case INT:
+ case HALF_FLOAT:
case FLOAT:
case LONG:
case DOUBLE:
+ case BIG_INT:
case DATE:
if (highlighted == Boolean.TRUE) {
illegalState(name, "type " + valueType + " cannot highlight");
@@ -410,8 +434,14 @@ public class FieldTypes {
if (queryAnalyzer != null) {
illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
}
- if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
- illegalState(name, "type " + valueType + " must use NUMERIC docValuesType (got: " + docValuesType + ")");
+ if (valueType == ValueType.BIG_INT) {
+ if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET)) {
+ illegalState(name, "type " + valueType + " must use SORTED or SORTED_SET docValuesType (got: " + docValuesType + ")");
+ }
+ } else {
+ if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
+ illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType (got: " + docValuesType + ")");
+ }
}
if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
illegalState(name, "type " + valueType + " cannot use indexOptions > DOCS (got indexOptions " + indexOptions + ")");
@@ -483,7 +513,7 @@ public class FieldTypes {
if (indexNorms == Boolean.TRUE) {
illegalState(name, "type " + valueType + " cannot index norms");
}
- if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
+ if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0 && multiValued != Boolean.TRUE) {
illegalState(name, "type " + valueType + " can only be indexed as DOCS; got " + indexOptions);
}
if (maxTokenCount != null) {
@@ -537,6 +567,10 @@ public class FieldTypes {
illegalState(name, "cannot sort when DocValuesType=" + docValuesType);
}
+ if (sortable == Boolean.FALSE && sortLocale != null) {
+ illegalState(name, "cannot set sortLocale when field is not enabled for sorting");
+ }
+
if (indexOptionsSet) {
if (indexOptions == IndexOptions.NONE) {
if (blockTreeMinItemsInBlock != null) {
@@ -583,6 +617,7 @@ public class FieldTypes {
illegalState(name, "can only setAnalyzerPositionGap if the field is multi-valued");
}
}
+
if (analyzerOffsetGap != null) {
if (indexOptions == IndexOptions.NONE) {
illegalState(name, "can only setAnalyzerOffsetGap if the field is indexed");
@@ -867,27 +902,33 @@ public class FieldTypes {
case INT:
out.writeByte((byte) 4);
break;
- case FLOAT:
+ case HALF_FLOAT:
out.writeByte((byte) 5);
break;
- case LONG:
+ case FLOAT:
out.writeByte((byte) 6);
break;
- case DOUBLE:
+ case LONG:
out.writeByte((byte) 7);
break;
- case BINARY:
+ case DOUBLE:
out.writeByte((byte) 8);
break;
- case BOOLEAN:
+ case BIG_INT:
out.writeByte((byte) 9);
break;
- case DATE:
+ case BINARY:
out.writeByte((byte) 10);
break;
- case INET_ADDRESS:
+ case BOOLEAN:
out.writeByte((byte) 11);
break;
+ case DATE:
+ out.writeByte((byte) 12);
+ break;
+ case INET_ADDRESS:
+ out.writeByte((byte) 13);
+ break;
default:
throw new AssertionError("missing ValueType in switch");
}
@@ -943,6 +984,15 @@ public class FieldTypes {
writeNullableBoolean(out, storeTermVectorPayloads);
writeNullableBoolean(out, isUnique);
+ if (sortLocale != null) {
+ out.writeByte((byte) 1);
+ writeNullableString(out, sortLocale.getLanguage());
+ writeNullableString(out, sortLocale.getCountry());
+ writeNullableString(out, sortLocale.getVariant());
+ } else {
+ out.writeByte((byte) 0);
+ }
+
if (indexOptionsSet == false) {
assert indexOptions == IndexOptions.NONE;
out.writeByte((byte) 0);
@@ -973,66 +1023,6 @@ public class FieldTypes {
writeNullableBoolean(out, highlighted);
}
- private static void writeNullableInteger(DataOutput out, Integer value) throws IOException {
- if (value == null) {
- out.writeByte((byte) 0);
- } else {
- out.writeByte((byte) 1);
- out.writeVInt(value.intValue());
- }
- }
-
- private static Integer readNullableInteger(DataInput in) throws IOException {
- if (in.readByte() == 0) {
- return null;
- } else {
- return in.readVInt();
- }
- }
-
- private static void writeNullableBoolean(DataOutput out, Boolean value) throws IOException {
- if (value == null) {
- out.writeByte((byte) 0);
- } else if (value == Boolean.TRUE) {
- out.writeByte((byte) 1);
- } else {
- out.writeByte((byte) 2);
- }
- }
-
- private static Boolean readNullableBoolean(DataInput in) throws IOException {
- byte b = in.readByte();
- if (b == 0) {
- return null;
- } else if (b == 1) {
- return Boolean.TRUE;
- } else if (b == 2) {
- return Boolean.FALSE;
- } else {
- throw new CorruptIndexException("invalid byte for nullable boolean: " + b, in);
- }
- }
-
- private static void writeNullableString(DataOutput out, String value) throws IOException {
- if (value == null) {
- out.writeByte((byte) 0);
- } else {
- out.writeByte((byte) 1);
- out.writeString(value);
- }
- }
-
- private static String readNullableString(DataInput in) throws IOException {
- byte b = in.readByte();
- if (b == 0) {
- return null;
- } else if (b == 1) {
- return in.readString();
- } else {
- throw new CorruptIndexException("invalid byte for nullable string: " + b, in);
- }
- }
-
public FieldType(DataInput in) throws IOException {
// nocommit under codec control instead?
name = in.readString();
@@ -1056,24 +1046,30 @@ public class FieldTypes {
valueType = ValueType.INT;
break;
case 5:
- valueType = ValueType.FLOAT;
+ valueType = ValueType.HALF_FLOAT;
break;
case 6:
- valueType = ValueType.LONG;
+ valueType = ValueType.FLOAT;
break;
case 7:
- valueType = ValueType.DOUBLE;
+ valueType = ValueType.LONG;
break;
case 8:
- valueType = ValueType.BINARY;
+ valueType = ValueType.DOUBLE;
break;
case 9:
- valueType = ValueType.BOOLEAN;
+ valueType = ValueType.BIG_INT;
break;
case 10:
- valueType = ValueType.DATE;
+ valueType = ValueType.BINARY;
break;
case 11:
+ valueType = ValueType.BOOLEAN;
+ break;
+ case 12:
+ valueType = ValueType.DATE;
+ break;
+ case 13:
valueType = ValueType.INET_ADDRESS;
break;
default:
@@ -1136,7 +1132,17 @@ public class FieldTypes {
storeTermVectorOffsets = readNullableBoolean(in);
storeTermVectorPayloads = readNullableBoolean(in);
isUnique = readNullableBoolean(in);
-
+ b = in.readByte();
+ if (b == 1) {
+ String language = readNullableString(in);
+ String country = readNullableString(in);
+ String variant = readNullableString(in);
+ // nocommit this is not sufficient right? need to use the builder?
+ sortLocale = new Locale(language, country, variant);
+ sortCollator = Collator.getInstance(sortLocale);
+ } else if (b != 0) {
+ throw new CorruptIndexException("invalid byte for sortLocale: " + b, in);
+ }
b = in.readByte();
switch (b) {
case 0:
@@ -1171,6 +1177,32 @@ public class FieldTypes {
docValuesFormat = readNullableString(in);
highlighted = readNullableBoolean(in);
}
+
+ @Override
+ public int getPositionGap() {
+ if (analyzerPositionGap != null) {
+ return analyzerPositionGap;
+ } else if (indexAnalyzer != null) {
+ return indexAnalyzer.getPositionIncrementGap(name);
+ } else if (defaultIndexAnalyzer != null) {
+ return defaultIndexAnalyzer.getPositionIncrementGap(name);
+ } else {
+ return DEFAULT_POSITION_GAP;
+ }
+ }
+
+ @Override
+ public int getOffsetGap() {
+ if (analyzerOffsetGap != null) {
+ return analyzerOffsetGap;
+ } else if (indexAnalyzer != null) {
+ return indexAnalyzer.getOffsetGap(name);
+ } else if (defaultIndexAnalyzer != null) {
+ return defaultIndexAnalyzer.getOffsetGap(name);
+ } else {
+ return DEFAULT_OFFSET_GAP;
+ }
+ }
}
// nocommit need test that you cannot .addStored after already .addLargeText(TokenStream)?
@@ -1441,6 +1473,9 @@ public class FieldTypes {
@Override
public int getPositionIncrementGap(String fieldName) {
+ throw new UnsupportedOperationException();
+
+ /*
FieldType field = fields.get(fieldName);
if (field == null) {
if (defaultIndexAnalyzer == null) {
@@ -1459,10 +1494,13 @@ public class FieldTypes {
} else {
return defaultIndexAnalyzer.getPositionIncrementGap(fieldName);
}
+ */
}
@Override
public int getOffsetGap(String fieldName) {
+ throw new UnsupportedOperationException();
+ /*
FieldType field = fields.get(fieldName);
if (field == null) {
if (defaultIndexAnalyzer == null) {
@@ -1481,6 +1519,7 @@ public class FieldTypes {
} else {
return defaultIndexAnalyzer.getOffsetGap(fieldName);
}
+ */
}
// nocommit what about wrapReader?
@@ -2549,6 +2588,28 @@ public class FieldTypes {
}
}
+ public void setSortLocale(String fieldName, Locale locale) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = newFieldType(fieldName);
+ current.sortLocale = locale;
+ current.sortCollator = Collator.getInstance(locale);
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.sortLocale == null || locale.equals(current.sortLocale) == false) {
+ current.sortLocale = locale;
+ current.sortCollator = Collator.getInstance(locale);
+ changed();
+ }
+
+ }
+
+ public Locale getSortLocale(String fieldName) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+ return fieldType.sortLocale;
+ }
+
/** Each value in this field will be unique (never occur in more than one document). IndexWriter validates this. */
public void setIsUnique(String fieldName) {
FieldType current = fields.get(fieldName);
@@ -2575,9 +2636,11 @@ public class FieldTypes {
// bug
throw new AssertionError("valueType should not be NONE");
case INT:
+ case HALF_FLOAT:
case FLOAT:
case LONG:
case DOUBLE:
+ case BIG_INT:
case DATE:
if (field.highlighted == null) {
field.highlighted = Boolean.FALSE;
@@ -2586,10 +2649,18 @@ public class FieldTypes {
field.storeTermVectors = Boolean.FALSE;
}
if (field.sortable == null) {
- if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.NUMERIC || field.docValuesType == DocValuesType.SORTED_NUMERIC) {
- field.sortable = Boolean.TRUE;
+ if (field.valueType == ValueType.BIG_INT) {
+ if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.SORTED || field.docValuesType == DocValuesType.SORTED_SET) {
+ field.sortable = Boolean.TRUE;
+ } else {
+ field.sortable = Boolean.FALSE;
+ }
} else {
- field.sortable = Boolean.FALSE;
+ if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.NUMERIC || field.docValuesType == DocValuesType.SORTED_NUMERIC) {
+ field.sortable = Boolean.TRUE;
+ } else {
+ field.sortable = Boolean.FALSE;
+ }
}
}
if (field.multiValued == null) {
@@ -2604,10 +2675,18 @@ public class FieldTypes {
}
if (field.docValuesTypeSet == false) {
if (field.sortable == Boolean.TRUE) {
- if (field.multiValued == Boolean.TRUE) {
- field.docValuesType = DocValuesType.SORTED_NUMERIC;
+ if (field.valueType == ValueType.BIG_INT) {
+ if (field.multiValued == Boolean.TRUE) {
+ field.docValuesType = DocValuesType.SORTED_SET;
+ } else {
+ field.docValuesType = DocValuesType.SORTED;
+ }
} else {
- field.docValuesType = DocValuesType.NUMERIC;
+ if (field.multiValued == Boolean.TRUE) {
+ field.docValuesType = DocValuesType.SORTED_NUMERIC;
+ } else {
+ field.docValuesType = DocValuesType.NUMERIC;
+ }
}
}
field.docValuesTypeSet = true;
@@ -2899,7 +2978,7 @@ public class FieldTypes {
switch (fieldType.valueType) {
case INT:
- bytes = Document2.intToBytes(token);
+ bytes = Document.intToBytes(token);
break;
default:
illegalState(fieldName, "cannot create int term query when valueType=" + fieldType.valueType);
@@ -2929,7 +3008,7 @@ public class FieldTypes {
switch (fieldType.valueType) {
case LONG:
- bytes = Document2.longToBytes(token);
+ bytes = Document.longToBytes(token);
break;
default:
illegalState(fieldName, "cannot create long term query when valueType=" + fieldType.valueType);
@@ -3025,6 +3104,8 @@ public class FieldTypes {
// nocommit split to newInt/Float/etc./Range
+ // nocommit More, Less?
+
// nocommit not great that the toString of the filter returned here is ... not easy to understand
public Filter newRangeFilter(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
@@ -3049,23 +3130,33 @@ public class FieldTypes {
switch (fieldType.valueType) {
case INT:
- minTerm = min == null ? null : Document2.intToBytes(min.intValue());
- maxTerm = max == null ? null : Document2.intToBytes(max.intValue());
+ minTerm = min == null ? null : Document.intToBytes(min.intValue());
+ maxTerm = max == null ? null : Document.intToBytes(max.intValue());
+ break;
+
+ case HALF_FLOAT:
+ minTerm = min == null ? null : Document.halfFloatToSortableBytes(min.floatValue());
+ maxTerm = max == null ? null : Document.halfFloatToSortableBytes(max.floatValue());
break;
case FLOAT:
- minTerm = min == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(min.floatValue())));
- maxTerm = max == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(max.floatValue())));
+ minTerm = min == null ? null : Document.floatToSortableBytes(min.floatValue());
+ maxTerm = max == null ? null : Document.floatToSortableBytes(max.floatValue());
break;
case LONG:
- minTerm = min == null ? null : Document2.longToBytes(min.longValue());
- maxTerm = max == null ? null : Document2.longToBytes(max.longValue());
+ minTerm = min == null ? null : Document.longToBytes(min.longValue());
+ maxTerm = max == null ? null : Document.longToBytes(max.longValue());
break;
case DOUBLE:
- minTerm = min == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(min.doubleValue())));
- maxTerm = max == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(max.doubleValue())));
+ minTerm = min == null ? null : Document.doubleToSortableBytes(min.doubleValue());
+ maxTerm = max == null ? null : Document.doubleToSortableBytes(max.doubleValue());
+ break;
+
+ case BIG_INT:
+ minTerm = min == null ? null : new BytesRef(((BigInteger) min).toByteArray());
+ maxTerm = max == null ? null : new BytesRef(((BigInteger) max).toByteArray());
break;
default:
@@ -3141,8 +3232,8 @@ public class FieldTypes {
illegalState(fieldName, "this field was not indexed for fast ranges");
}
- BytesRef minTerm = min == null ? null : Document2.longToBytes(min.getTime());
- BytesRef maxTerm = max == null ? null : Document2.longToBytes(max.getTime());
+ BytesRef minTerm = min == null ? null : Document.longToBytes(min.getTime());
+ BytesRef maxTerm = max == null ? null : Document.longToBytes(max.getTime());
return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
}
@@ -3235,7 +3326,8 @@ public class FieldTypes {
} else {
sortField.setMissingValue(Integer.MAX_VALUE);
}
- } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
if (reverse.booleanValue()) {
sortField.setMissingValue(Integer.MAX_VALUE);
} else {
@@ -3245,6 +3337,52 @@ public class FieldTypes {
return sortField;
}
+ case HALF_FLOAT:
+ {
+ SortField sortField;
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit todo
+ throw new UnsupportedOperationException();
+ } else {
+
+ final Float missingValue;
+
+ if (fieldType.sortMissingLast == Boolean.TRUE) {
+ if (reverse.booleanValue()) {
+ missingValue = Float.NEGATIVE_INFINITY;
+ } else {
+ missingValue = Float.POSITIVE_INFINITY;
+ }
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
+ if (reverse.booleanValue()) {
+ missingValue = Float.POSITIVE_INFINITY;
+ } else {
+ missingValue = Float.NEGATIVE_INFINITY;
+ }
+ }
+
+ FieldComparatorSource compSource = new FieldComparatorSource() {
+ @Override
+ public FieldComparator<Float> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+ return new HalfFloatComparator(numHits, fieldName, missingValue);
+ }
+ };
+
+ sortField = new SortField(fieldName, compSource, reverse) {
+ @Override
+ public String toString() {
+ return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+ }
+ };
+
+ // nocommit not needed?
+ sortField.setMissingValue(missingValue);
+ }
+
+ return sortField;
+ }
+
case FLOAT:
{
SortField sortField;
@@ -3260,7 +3398,8 @@ public class FieldTypes {
} else {
sortField.setMissingValue(Float.POSITIVE_INFINITY);
}
- } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
if (reverse.booleanValue()) {
sortField.setMissingValue(Float.POSITIVE_INFINITY);
} else {
@@ -3286,7 +3425,8 @@ public class FieldTypes {
} else {
sortField.setMissingValue(Long.MAX_VALUE);
}
- } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
if (reverse.booleanValue()) {
sortField.setMissingValue(Long.MAX_VALUE);
} else {
@@ -3311,7 +3451,8 @@ public class FieldTypes {
} else {
sortField.setMissingValue(Double.POSITIVE_INFINITY);
}
- } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
if (reverse.booleanValue()) {
sortField.setMissingValue(Double.POSITIVE_INFINITY);
} else {
@@ -3321,6 +3462,53 @@ public class FieldTypes {
return sortField;
}
+ case BIG_INT:
+ // nocommit fixme
+ {
+ SortField sortField;
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit todo
+ throw new UnsupportedOperationException();
+ } else {
+
+ final Float missingValue;
+
+ if (fieldType.sortMissingLast == Boolean.TRUE) {
+ if (reverse.booleanValue()) {
+ missingValue = Float.NEGATIVE_INFINITY;
+ } else {
+ missingValue = Float.POSITIVE_INFINITY;
+ }
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
+ if (reverse.booleanValue()) {
+ missingValue = Float.POSITIVE_INFINITY;
+ } else {
+ missingValue = Float.NEGATIVE_INFINITY;
+ }
+ }
+
+ FieldComparatorSource compSource = new FieldComparatorSource() {
+ @Override
+ public FieldComparator<Float> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+ return new HalfFloatComparator(numHits, fieldName, missingValue);
+ }
+ };
+
+ sortField = new SortField(fieldName, compSource, reverse) {
+ @Override
+ public String toString() {
+ return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+ }
+ };
+
+ // nocommit not needed?
+ sortField.setMissingValue(missingValue);
+ }
+
+ return sortField;
+ }
+
case SHORT_TEXT:
case ATOM:
case BINARY:
@@ -3343,7 +3531,8 @@ public class FieldTypes {
} else {
sortField.setMissingValue(SortField.STRING_LAST);
}
- } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+ } else {
+ assert fieldType.sortMissingLast == Boolean.FALSE;
if (reverse.booleanValue()) {
sortField.setMissingValue(SortField.STRING_LAST);
} else {
@@ -3547,5 +3736,65 @@ public class FieldTypes {
addFieldNamesField();
}
+ static void writeNullableInteger(DataOutput out, Integer value) throws IOException {
+ if (value == null) {
+ out.writeByte((byte) 0);
+ } else {
+ out.writeByte((byte) 1);
+ out.writeVInt(value.intValue());
+ }
+ }
+
+ static Integer readNullableInteger(DataInput in) throws IOException {
+ if (in.readByte() == 0) {
+ return null;
+ } else {
+ return in.readVInt();
+ }
+ }
+
+ static void writeNullableBoolean(DataOutput out, Boolean value) throws IOException {
+ if (value == null) {
+ out.writeByte((byte) 0);
+ } else if (value == Boolean.TRUE) {
+ out.writeByte((byte) 1);
+ } else {
+ out.writeByte((byte) 2);
+ }
+ }
+
+ static Boolean readNullableBoolean(DataInput in) throws IOException {
+ byte b = in.readByte();
+ if (b == 0) {
+ return null;
+ } else if (b == 1) {
+ return Boolean.TRUE;
+ } else if (b == 2) {
+ return Boolean.FALSE;
+ } else {
+ throw new CorruptIndexException("invalid byte for nullable boolean: " + b, in);
+ }
+ }
+
+ static void writeNullableString(DataOutput out, String value) throws IOException {
+ if (value == null) {
+ out.writeByte((byte) 0);
+ } else {
+ out.writeByte((byte) 1);
+ out.writeString(value);
+ }
+ }
+
+ static String readNullableString(DataInput in) throws IOException {
+ byte b = in.readByte();
+ if (b == 0) {
+ return null;
+ } else if (b == 1) {
+ return in.readString();
+ } else {
+ throw new CorruptIndexException("invalid byte for nullable string: " + b, in);
+ }
+ }
+
// nocommit add sugar to wrap long NDVs as float/double?
}
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,86 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit move to NumericUtils
+
+/** Utility class to convert half-precision 16 bit floating-point number according
+ * to IEEE 754-2008. */
+
+// nocommit need separate test here
+// nocommit does this match the standard?
+public class HalfFloat {
+
+ // From https://github.com/ata4/ioutils/blob/master/src/info/ata4/io/util/HalfFloat.java, in turn from http://stackoverflow.com/a/6162687
+ private HalfFloat() {
+ }
+
+ // Only uses bottom 16 bits:
+ public static float intBitsToFloat(int hbits) {
+ int mant = hbits & 0x03ff; // 10 bits mantissa
+ int exp = hbits & 0x7c00; // 5 bits exponent
+ if (exp == 0x7c00) { // NaN/Inf
+ exp = 0x3fc00; // -> NaN/Inf
+ } else if (exp != 0) { // normalized value
+ exp += 0x1c000; // exp - 15 + 127
+ if (mant == 0 && exp > 0x1c400) { // smooth transition
+ return Float.intBitsToFloat((hbits & 0x8000) << 16
+ | exp << 13 | 0x3ff);
+ }
+ } else if (mant != 0) { // && exp==0 -> subnormal
+ exp = 0x1c400; // make it normal
+ do {
+ mant <<= 1; // mantissa * 2
+ exp -= 0x400; // decrease exp by 1
+ } while ((mant & 0x400) == 0); // while not normal
+ mant &= 0x3ff; // discard subnormal bit
+ } // else +/-0 -> +/-0
+ return Float.intBitsToFloat( // combine all parts
+ (hbits & 0x8000) << 16 // sign << ( 31 - 15 )
+ | (exp | mant) << 13); // value << ( 23 - 10 )
+ }
+
+ // Only uses bottom 16 bits:
+ public static int floatToIntBits(float fval) {
+ int fbits = Float.floatToIntBits(fval);
+ int sign = fbits >>> 16 & 0x8000; // sign only
+ int val = (fbits & 0x7fffffff) + 0x1000; // rounded value
+ if (val >= 0x47800000) { // might be or become NaN/Inf
+ // avoid Inf due to rounding
+ if ((fbits & 0x7fffffff) >= 0x47800000) { // is or must become NaN/Inf
+ if (val < 0x7f800000) { // was value but too large
+ return sign | 0x7c00; // make it +/-Inf
+ }
+ return sign | 0x7c00 | // remains +/-Inf or NaN
+ (fbits & 0x007fffff) >>> 13; // keep NaN (and Inf) bits
+ }
+ return sign | 0x7bff; // unrounded not quite Inf
+ }
+ if (val >= 0x38800000) { // remains normalized value
+ return sign | val - 0x38000000 >>> 13; // exp - 127 + 15
+ }
+ if (val < 0x33000000) { // too small for subnormal
+ return sign; // becomes +/-0
+ }
+ val = (fbits & 0x7fffffff) >>> 23; // tmp exp for subnormal calc
+ return sign | ((fbits & 0x7fffff | 0x800000) // add subnormal bit
+ + (0x800000 >>> val - 102) // round depending on cut off
+ >>> 126 - val); // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
+ }
+}
+
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,94 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FieldComparator;
+
+// nocommit can't we do all numeric comparators this way? if we fix all numeric dv to write sortable versions?
+class HalfFloatComparator extends FieldComparator.NumericComparator<Float> {
+ private final short[] values;
+ private final short missingShortValue;
+ private short bottom;
+ private short topValue;
+
+ /**
+ * Creates a new comparator based on {@link Float#compare} for {@code numHits}.
+ * When a document has no value for the field, {@code missingValue} is substituted.
+ */
+ public HalfFloatComparator(int numHits, String field, Float missingValue) {
+ super(field, missingValue);
+ values = new short[numHits];
+ missingShortValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(missingValue));
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ return (int) values[slot1] - (int) values[slot2];
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ // TODO: are there sneaky non-branch ways to compute sign of float?
+ short v = (short) currentReaderValues.get(doc);
+ // Test for v == 0 to save Bits.get method call for
+ // the common case (doc has value and value is non-zero):
+ if (docsWithField != null && v == 0 && !docsWithField.get(doc)) {
+ v = missingShortValue;
+ }
+
+ return (int) bottom - (int) v;
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ short v = (short) currentReaderValues.get(doc);
+ // Test for v == 0 to save Bits.get method call for
+ // the common case (doc has value and value is non-zero):
+ if (docsWithField != null && v == 0 && !docsWithField.get(doc)) {
+ v = missingShortValue;
+ }
+
+ values[slot] = v;
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public void setTopValue(Float value) {
+ topValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(value));
+ }
+
+ @Override
+ public Float value(int slot) {
+ return Document.sortableShortToFloat(values[slot]);
+ }
+
+ @Override
+ public int compareTop(int doc) {
+ short docValue = (short) currentReaderValues.get(doc);
+ // Test for docValue == 0 to save Bits.get method call for
+ // the common case (doc has value and value is non-zero):
+ if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
+ docValue = missingShortValue;
+ }
+ return (int) topValue - (int) docValue;
+ }
+}
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,34 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit remove
+
+/** Data type of the numeric value
+ * @since 3.2
+ */
+public enum NumericType {
+ /** 32-bit integer numeric type */
+ INT,
+ /** 64-bit long numeric type */
+ LONG,
+ /** 32-bit float numeric type */
+ FLOAT,
+ /** 64-bit double numeric type */
+ DOUBLE
+}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java Sun Nov 30 11:07:09 2014
@@ -17,7 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Sun Nov 30 11:07:09 2014
@@ -34,7 +34,7 @@ import java.util.Map;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.search.DocIdSetIterator;
@@ -1739,7 +1739,7 @@ public class CheckIndex implements Close
for (int j = 0; j < reader.maxDoc(); ++j) {
// Intentionally pull even deleted documents to
// make sure they too are not corrupt:
- Document2 doc = reader.document(j);
+ Document doc = reader.document(j);
if (liveDocs == null || liveDocs.get(j)) {
status.docCount++;
status.totFields += doc.getFields().size();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Sun Nov 30 11:07:09 2014
@@ -599,18 +599,16 @@ final class DefaultIndexingChain extends
* if this is the first time we are seeing this field
* name in this document. */
public void invert(IndexableField field, boolean first, Term delTerm) throws IOException {
+ IndexableFieldType fieldType = field.fieldType();
if (first) {
// First time we're seeing this field (indexed) in
// this document:
invertState.reset();
- } else if (docState.analyzer != null) {
- // TODO: this "multi-field-ness" (and, Analyzer) should be outside of IW somehow
- invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
- invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);
+ } else {
+ invertState.position += fieldType.getPositionGap();
+ invertState.offset += fieldType.getOffsetGap();
}
- IndexableFieldType fieldType = field.fieldType();
-
IndexOptions indexOptions = fieldType.indexOptions();
fieldInfo.setIndexOptions(indexOptions);
@@ -630,7 +628,7 @@ final class DefaultIndexingChain extends
*/
boolean aborting = false;
boolean succeededInProcessingField = false;
- try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
+ try (TokenStream stream = tokenStream = field.tokenStream(tokenStream)) {
// reset the TokenStream to the first token
stream.reset();
invertState.setAttributeSource(stream);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java Sun Nov 30 11:07:09 2014
@@ -23,7 +23,6 @@ import static org.apache.lucene.util.Ram
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
-import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Sun Nov 30 11:07:09 2014
@@ -407,7 +407,7 @@ final class DocumentsWriter implements C
}
}
- boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs, final Analyzer analyzer,
+ boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs,
final Term delTerm) throws IOException {
boolean hasEvents = preUpdate();
@@ -424,7 +424,7 @@ final class DocumentsWriter implements C
final DocumentsWriterPerThread dwpt = perThread.dwpt;
final int dwptNumDocs = dwpt.getNumDocsInRAM();
try {
- dwpt.updateDocuments(docs, analyzer, delTerm);
+ dwpt.updateDocuments(docs, delTerm);
} finally {
// We don't know how many documents were actually
// counted as indexed, so we must subtract here to
@@ -447,8 +447,7 @@ final class DocumentsWriter implements C
return postUpdate(flushingDWPT, hasEvents);
}
- boolean updateDocument(final Iterable<? extends IndexableField> doc, final Analyzer analyzer,
- final Term delTerm) throws IOException {
+ boolean updateDocument(final Iterable<? extends IndexableField> doc, final Term delTerm) throws IOException {
boolean hasEvents = preUpdate();
@@ -465,7 +464,7 @@ final class DocumentsWriter implements C
final DocumentsWriterPerThread dwpt = perThread.dwpt;
final int dwptNumDocs = dwpt.getNumDocsInRAM();
try {
- dwpt.updateDocument(doc, analyzer, delTerm);
+ dwpt.updateDocument(doc, delTerm);
} finally {
// We don't know whether the document actually
// counted as being indexed, so we must subtract here to
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Sun Nov 30 11:07:09 2014
@@ -24,9 +24,8 @@ import java.util.Locale;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
import org.apache.lucene.search.similarities.Similarity;
@@ -69,7 +68,6 @@ class DocumentsWriterPerThread {
static class DocState {
final DocumentsWriterPerThread docWriter;
- Analyzer analyzer;
InfoStream infoStream;
Similarity similarity;
int docID;
@@ -85,10 +83,7 @@ class DocumentsWriterPerThread {
}
public void clear() {
- // don't hold onto doc nor analyzer, in case it is
- // largish:
doc = null;
- analyzer = null;
}
}
@@ -225,17 +220,16 @@ class DocumentsWriterPerThread {
}
}
- public void updateDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer, Term delTerm) throws IOException {
+ public void updateDocument(Iterable<? extends IndexableField> doc, Term delTerm) throws IOException {
testPoint("DocumentsWriterPerThread addDocument start");
- if (doc instanceof Document2) {
- Document2 doc2 = (Document2) doc;
+ if (doc instanceof Document) {
+ Document doc2 = (Document) doc;
if (doc2.getFieldTypes() != fieldTypes) {
throw new IllegalArgumentException("this document wasn't created by this writer (fieldTypes are different)");
}
}
assert deleteQueue != null;
docState.doc = doc;
- docState.analyzer = analyzer;
docState.docID = numDocsInRAM;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
@@ -269,10 +263,9 @@ class DocumentsWriterPerThread {
finishDocument(delTerm);
}
- public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer, Term delTerm) throws IOException {
+ public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Term delTerm) throws IOException {
testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
- docState.analyzer = analyzer;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
@@ -287,8 +280,8 @@ class DocumentsWriterPerThread {
// document, so the counter will be "wrong" in that case, but
// it's very hard to fix (we can't easily distinguish aborting
// vs non-aborting exceptions):
- if (doc instanceof Document2) {
- Document2 doc2 = (Document2) doc;
+ if (doc instanceof Document) {
+ Document doc2 = (Document) doc;
if (doc2.getFieldTypes() != fieldTypes) {
throw new IllegalArgumentException("this document wasn't created by this writer (fieldTypes are different)");
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java Sun Nov 30 11:07:09 2014
@@ -26,7 +26,7 @@ import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
import org.apache.lucene.document.Document2StoredFieldVisitor;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.store.AlreadyClosedException;
@@ -373,7 +373,7 @@ public abstract class IndexReader implem
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
- public final Document2 document(int docID) throws IOException {
+ public final Document document(int docID) throws IOException {
final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes());
document(docID, visitor);
return visitor.getDocument();
@@ -384,7 +384,7 @@ public abstract class IndexReader implem
* fields. Note that this is simply sugar for {@link
* Document2StoredFieldVisitor#Document2StoredFieldVisitor(Set)}.
*/
- public final Document2 document(int docID, Set<String> fieldsToLoad)
+ public final Document document(int docID, Set<String> fieldsToLoad)
throws IOException {
final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes(), fieldsToLoad);
document(docID, visitor);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Sun Nov 30 11:07:09 2014
@@ -39,12 +39,10 @@ import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
-import org.apache.lucene.document.Document2;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
@@ -248,7 +246,6 @@ public class IndexWriter implements Clos
volatile Throwable tragedy;
private final Directory directory; // where this index resides
- private final Analyzer analyzer; // how to analyze text
final FieldTypes fieldTypes; // schema
private volatile long changeCount; // increments every time a change is completed
@@ -864,8 +861,6 @@ public class IndexWriter implements Clos
messageState();
}
- analyzer = fieldTypes.getIndexAnalyzer();
-
// nocommit what to do here... cannot delegate codecs
if ((config.getCodec() instanceof Lucene50Codec) == false) {
codec = config.getCodec();
@@ -893,8 +888,8 @@ public class IndexWriter implements Clos
return fieldTypes;
}
- public Document2 newDocument() {
- return new Document2(fieldTypes);
+ public Document newDocument() {
+ return new Document(fieldTypes);
}
// reads latest field infos for the commit
@@ -1050,12 +1045,6 @@ public class IndexWriter implements Clos
return directory;
}
- /** Returns the analyzer used by this index. */
- public Analyzer getAnalyzer() {
- ensureOpen();
- return analyzer;
- }
-
/** Returns total number of docs in this index, including
* docs not yet flushed (still in the RAM buffer),
* not counting deletions.
@@ -1207,7 +1196,7 @@ public class IndexWriter implements Clos
try {
boolean success = false;
try {
- if (docWriter.updateDocuments(docs, analyzer, delTerm)) {
+ if (docWriter.updateDocuments(docs, delTerm)) {
processEvents(true, false);
}
success = true;
@@ -1357,7 +1346,7 @@ public class IndexWriter implements Clos
try {
boolean success = false;
try {
- if (docWriter.updateDocument(doc, analyzer, term)) {
+ if (docWriter.updateDocument(doc, term)) {
processEvents(true, false);
}
success = true;
@@ -2369,7 +2358,7 @@ public class IndexWriter implements Clos
SegmentInfos sis = SegmentInfos.readLatestCommit(dir); // read infos from dir
totalDocCount += sis.totalDocCount();
- fieldTypes.addAll(FieldTypes.getFieldTypes(sis.getUserData(), analyzer, fieldTypes.getSimilarity()));
+ fieldTypes.addAll(FieldTypes.getFieldTypes(sis.getUserData(), fieldTypes.getIndexAnalyzer(), fieldTypes.getSimilarity()));
for (SegmentCommitInfo info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java Sun Nov 30 11:07:09 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.util.BytesRef;
public interface IndexableField {
-
/** Field name */
public String name();
@@ -45,10 +44,9 @@ public interface IndexableField {
* Creates the TokenStream used for indexing this field. If appropriate,
* implementations should use the given Analyzer to create the TokenStreams.
*
- * @param analyzer Analyzer that should be used to create the TokenStreams from
* @param reuse TokenStream for a previous instance of this field <b>name</b>. This allows
* custom field types (like StringField and NumericField) that do not use
- * the analyzer to still have good performance. Note: the passed-in type
+ * an analyzer to still have good performance. Note: the passed-in type
* may be inappropriate, for example if you mix up different types of Fields
* for the same field name. So its the responsibility of the implementation to
* check.
@@ -56,7 +54,9 @@ public interface IndexableField {
* a non-null value if the field is to be indexed
* @throws IOException Can be thrown while creating the TokenStream
*/
- public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException;
+ default public TokenStream tokenStream(TokenStream reuse) throws IOException {
+ return null;
+ }
/**
* Returns the field's index-time boost.
@@ -78,20 +78,32 @@ public interface IndexableField {
* @see Similarity#computeNorm(FieldInvertState)
* @see DefaultSimilarity#encodeNormValue(float)
*/
- public float boost();
+ default public float boost() {
+ return 1.0f;
+ }
/** Non-null if this field has a stored binary value */
- public BytesRef binaryValue();
+ default public BytesRef binaryValue() {
+ return null;
+ }
/** Non-null if this field has a binary doc value */
- public BytesRef binaryDocValue();
+ default public BytesRef binaryDocValue() {
+ return null;
+ }
/** Non-null if this field has a string value */
- public String stringValue();
+ default public String stringValue() {
+ return null;
+ }
/** Non-null if this field has a numeric value */
- public Number numericValue();
+ default public Number numericValue() {
+ return null;
+ }
/** Non-null if this field has a numeric doc value */
- public Number numericDocValue();
+ default public Number numericDocValue() {
+ return null;
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java Sun Nov 30 11:07:09 2014
@@ -26,7 +26,9 @@ import org.apache.lucene.analysis.Analyz
public interface IndexableFieldType {
/** True if the field's value should be stored */
- public boolean stored();
+ default public boolean stored() {
+ return false;
+ }
/**
* True if this field's indexed form should be also stored
@@ -39,7 +41,9 @@ public interface IndexableFieldType {
* This option is illegal if {@link #indexOptions()} returns
* IndexOptions.NONE.
*/
- public boolean storeTermVectors();
+ default public boolean storeTermVectors() {
+ return false;
+ }
/**
* True if this field's token character offsets should also
@@ -48,7 +52,9 @@ public interface IndexableFieldType {
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false)
*/
- public boolean storeTermVectorOffsets();
+ default public boolean storeTermVectorOffsets() {
+ return false;
+ }
/**
* True if this field's token positions should also be stored
@@ -57,7 +63,9 @@ public interface IndexableFieldType {
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false).
*/
- public boolean storeTermVectorPositions();
+ default public boolean storeTermVectorPositions() {
+ return false;
+ }
/**
* True if this field's token payloads should also be stored
@@ -66,7 +74,9 @@ public interface IndexableFieldType {
* This option is illegal if term vector positions are not enabled
* for the field ({@link #storeTermVectors()} is false).
*/
- public boolean storeTermVectorPayloads();
+ default public boolean storeTermVectorPayloads() {
+ return false;
+ }
/**
* True if normalization values should be omitted for the field.
@@ -74,15 +84,31 @@ public interface IndexableFieldType {
* This saves memory, but at the expense of scoring quality (length normalization
* will be disabled), and if you omit norms, you cannot use index-time boosts.
*/
- public boolean omitNorms();
+ default public boolean omitNorms() {
+ return false;
+ }
/** {@link IndexOptions}, describing what should be
* recorded into the inverted index */
- public IndexOptions indexOptions();
+ default public IndexOptions indexOptions() {
+ return IndexOptions.NONE;
+ }
/**
* DocValues {@link DocValuesType}: how the field's value will be indexed
* into docValues.
*/
- public DocValuesType docValuesType();
+ default public DocValuesType docValuesType() {
+ return DocValuesType.NONE;
+ }
+
+ /** Returns the gap to insert between multi-valued, tokenized fields */
+ default public int getPositionGap() {
+ return 1;
+ }
+
+ /** Returns the gap offset to insert between multi-valued, tokenized fields */
+ default public int getOffsetGap() {
+ return 0;
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java Sun Nov 30 11:07:09 2014
@@ -1,6 +1,5 @@
package org.apache.lucene.index;
-import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.packed.PackedInts;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java Sun Nov 30 11:07:09 2014
@@ -31,8 +31,6 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
-import org.apache.lucene.document.BinaryDocValuesField;
-import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java Sun Nov 30 11:07:09 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
-import org.apache.lucene.document.Document;
import org.apache.lucene.document.Document2StoredFieldVisitor; // javadocs
/**
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java Sun Nov 30 11:07:09 2014
@@ -20,8 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.search.ControlledRealTimeReopenThread; // javadocs
import org.apache.lucene.search.Query;
@@ -49,7 +48,7 @@ public class TrackingIndexWriter {
this.writer = writer;
}
- public Document2 newDocument() {
+ public Document newDocument() {
return writer.newDocument();
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java Sun Nov 30 11:07:09 2014
@@ -18,13 +18,13 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
/**
* A range filter built on top of numeric doc values field
@@ -285,14 +285,14 @@ public abstract class DocValuesRangeFilt
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
- // using NumericUtils to easier find the next bigger/lower value
+ // using Document to easier find the next bigger/lower value
final float inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
float f = lowerVal.floatValue();
if (!includeUpper && f > 0.0f && Float.isInfinite(f))
return null;
- int i = NumericUtils.floatToSortableInt(f);
- inclusiveLowerPoint = NumericUtils.sortableIntToFloat( includeLower ? i : (i + 1) );
+ int i = Document.floatToSortableInt(f);
+ inclusiveLowerPoint = Document.sortableIntToFloat(includeLower ? i : (i + 1));
} else {
inclusiveLowerPoint = Float.NEGATIVE_INFINITY;
}
@@ -300,8 +300,8 @@ public abstract class DocValuesRangeFilt
float f = upperVal.floatValue();
if (!includeUpper && f < 0.0f && Float.isInfinite(f))
return null;
- int i = NumericUtils.floatToSortableInt(f);
- inclusiveUpperPoint = NumericUtils.sortableIntToFloat( includeUpper ? i : (i - 1) );
+ int i = Document.floatToSortableInt(f);
+ inclusiveUpperPoint = Document.sortableIntToFloat(includeUpper ? i : (i - 1));
} else {
inclusiveUpperPoint = Float.POSITIVE_INFINITY;
}
@@ -331,14 +331,14 @@ public abstract class DocValuesRangeFilt
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
- // using NumericUtils to easier find the next bigger/lower value
+ // using Document to easier find the next bigger/lower value
final double inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
double f = lowerVal.doubleValue();
if (!includeUpper && f > 0.0 && Double.isInfinite(f))
return null;
- long i = NumericUtils.doubleToSortableLong(f);
- inclusiveLowerPoint = NumericUtils.sortableLongToDouble( includeLower ? i : (i + 1L) );
+ long i = Document.doubleToSortableLong(f);
+ inclusiveLowerPoint = Document.sortableLongToDouble(includeLower ? i : (i + 1L));
} else {
inclusiveLowerPoint = Double.NEGATIVE_INFINITY;
}
@@ -346,8 +346,8 @@ public abstract class DocValuesRangeFilt
double f = upperVal.doubleValue();
if (!includeUpper && f < 0.0 && Double.isInfinite(f))
return null;
- long i = NumericUtils.doubleToSortableLong(f);
- inclusiveUpperPoint = NumericUtils.sortableLongToDouble( includeUpper ? i : (i - 1L) );
+ long i = Document.doubleToSortableLong(f);
+ inclusiveUpperPoint = Document.sortableLongToDouble(includeUpper ? i : (i - 1L));
} else {
inclusiveUpperPoint = Double.POSITIVE_INFINITY;
}