You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/30 12:07:19 UTC
svn commit: r1642535 [3/19] - in /lucene/dev/branches/lucene6005/lucene: analysis/common/src/java/org/apache/lucene/collation/ analysis/common/src/test/org/apache/lucene/analysis/core/ analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/ ...

Copied: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java (from r1642229, lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java?p2=lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java&p1=lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java&r1=1642229&r2=1642535&rev=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java Sun Nov 30 11:07:09 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.document;
 
 import java.io.IOException;
 import java.io.Reader;
+import java.math.BigInteger;
 import java.net.InetAddress;
 import java.util.ArrayList;
 import java.util.Date;
@@ -43,7 +44,7 @@ import org.apache.lucene.util.BytesRef;
  *  that also tracks field properties implied by the
  *  fields being added. */
 
-public class Document2 implements Iterable<IndexableField> {
+public class Document implements Iterable<IndexableField> {
 
   private static final float DEFAULT_BOOST = 1.0f;
 
@@ -139,12 +140,8 @@ public class Document2 implements Iterab
     }
 
     @Override
-    public TokenStream tokenStream(Analyzer analyzerIn, TokenStream reuse) throws IOException {
+    public TokenStream tokenStream(TokenStream reuse) throws IOException {
       Analyzer analyzer = fieldTypes.getIndexAnalyzer();
-      if (analyzerIn != analyzer) {
-        // TODO: remove analyzer from IW APIs
-        throw new IllegalArgumentException("analyzer must be the instance from FieldTypes: got " + analyzerIn + " vs " + analyzer);
-      }
 
       assert fieldTypes.getIndexOptions(fieldName) != IndexOptions.NONE;
 
@@ -154,12 +151,16 @@ public class Document2 implements Iterab
       switch (fieldType.valueType) {
       case INT:
         return getReusedBinaryTokenStream(intToBytes(((Number) value).intValue()), reuse);
+      case HALF_FLOAT:
+        return getReusedBinaryTokenStream(halfFloatToSortableBytes(((Number) value).floatValue()), reuse);
       case FLOAT:
-        return getReusedBinaryTokenStream(floatToBytes(((Number) value).floatValue()), reuse);
+        return getReusedBinaryTokenStream(floatToSortableBytes(((Number) value).floatValue()), reuse);
       case LONG:
         return getReusedBinaryTokenStream(longToBytes(((Number) value).longValue()), reuse);
       case DOUBLE:
-        return getReusedBinaryTokenStream(doubleToBytes(((Number) value).doubleValue()), reuse);
+        return getReusedBinaryTokenStream(doubleToSortableBytes(((Number) value).doubleValue()), reuse);
+      case BIG_INT:
+        return getReusedBinaryTokenStream(new BytesRef(((BigInteger) value).toByteArray()), reuse);
       case DATE:
         return getReusedBinaryTokenStream(longToBytes(((Date) value).getTime()), reuse);
       case ATOM:
@@ -256,6 +257,7 @@ public class Document2 implements Iterab
       switch (fieldType.valueType) {
       case INT:
       case LONG:
+      case HALF_FLOAT:
       case FLOAT:
       case DOUBLE:
         return (Number) value;
@@ -282,6 +284,13 @@ public class Document2 implements Iterab
         return (Number) value;
       case LONG:
         return (Number) value;
+      case HALF_FLOAT:
+        int shortBits = HalfFloat.floatToIntBits((Float) value);
+        // nocommit different from other numerics:
+        shortBits = sortableHalfFloatBits(shortBits);
+        assert shortBits >= 0 && shortBits <= Short.MAX_VALUE;
+        return Integer.valueOf(shortBits);
+        //return Integer.valueOf(Float.floatToRawIntBits((Float) value));
       case FLOAT:
         // nocommit i shouldn't do sortableFloatBits?  but why does ot TestSortedNumericSortField.testFloat fail?
         int intBits = Float.floatToIntBits((Float) value);
@@ -362,6 +371,8 @@ public class Document2 implements Iterab
         return new BytesRef(bytes);
       } else if (fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
         return new BytesRef(((InetAddress) value).getAddress());
+      } else if (fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+        return new BytesRef(((BigInteger) value).toByteArray());
       } else if (value instanceof BytesRef) {
         return (BytesRef) value;
       } else {
@@ -376,9 +387,22 @@ public class Document2 implements Iterab
       } else if (fieldType.docValuesType == DocValuesType.BINARY || fieldType.docValuesType == DocValuesType.SORTED || fieldType.docValuesType == DocValuesType.SORTED_SET) {
         if (fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
           return new BytesRef(((InetAddress) value).getAddress());
+        } else if (fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+          return new BytesRef(((BigInteger) value).toByteArray());
         } else if (value instanceof String) {
-          // nocommit somewhat evil we utf8-encode your string?
-          return new BytesRef((String) value);
+          String s = (String) value;
+          BytesRef br;
+          if (fieldType.sortCollator != null) {
+            // nocommit thread local clones?
+            synchronized (fieldType.sortCollator) {
+              br = new BytesRef(fieldType.sortCollator.getCollationKey(s).toByteArray());
+            }
+          } else {
+            // nocommit somewhat evil we utf8-encode your string?
+            br = new BytesRef(s);
+          }
+
+          return br;
         }
       }
 
@@ -395,11 +419,11 @@ public class Document2 implements Iterab
     }
   }
 
-  public Document2(FieldTypes fieldTypes) {
+  public Document(FieldTypes fieldTypes) {
     this(fieldTypes, true);
   }
 
-  public Document2(Document2 other) {
+  public Document(Document other) {
     this.fieldTypes = other.fieldTypes;
     this.changeSchema = other.changeSchema;
     if (changeSchema) {
@@ -410,7 +434,7 @@ public class Document2 implements Iterab
     addAll(other);
   }
 
-  Document2(FieldTypes fieldTypes, boolean changeSchema) {
+  Document(FieldTypes fieldTypes, boolean changeSchema) {
     this.fieldTypes = fieldTypes;
     this.changeSchema = changeSchema;
     if (changeSchema) {
@@ -569,13 +593,21 @@ public class Document2 implements Iterab
   /** Default: store this value. */
   // nocommit testme, or remove?
   public void addStoredInt(String fieldName, int value) {
-    // nocommit akward we inferred large_text here?
     if (changeSchema) {
       fieldTypes.recordStoredValueType(fieldName, FieldTypes.ValueType.INT);
     }
     fields.add(new FieldValue(fieldName, value));
   }
 
+  // nocommit throw exc if this field was already indexed/dvd?
+  /** Default: store this value. */
+  public void addStoredDouble(String fieldName, double value) {
+    if (changeSchema) {
+      fieldTypes.recordStoredValueType(fieldName, FieldTypes.ValueType.DOUBLE);
+    }
+    fields.add(new FieldValue(fieldName, value));
+  }
+
   /** Default: store & DV this value. */
   public void addBinary(String fieldName, BytesRef value) {
     if (changeSchema) {
@@ -661,6 +693,15 @@ public class Document2 implements Iterab
     fields.add(new FieldValue(fieldName, Float.valueOf(value)));
   }
 
+  /** Adds half precision (2 bytes) float.  Note that the value is stored with 2 bytes in doc values, but in stored fields it's stored as an
+   *  ordinary 4 byte float.  Default: support for range filtering/querying and sorting (using numeric doc values). */
+  public void addHalfFloat(String fieldName, float value) {
+    if (changeSchema) {
+      fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.HALF_FLOAT);
+    }
+    fields.add(new FieldValue(fieldName, Float.valueOf(value)));
+  }
+
   /** Default: support for range filtering/querying and sorting (using numeric doc values). */
   public void addLong(String fieldName, long value) {
     if (changeSchema) {
@@ -685,6 +726,14 @@ public class Document2 implements Iterab
     fields.add(new FieldValue(fieldName, Double.valueOf(value)));
   }
 
+  /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+  public void addBigInteger(String fieldName, BigInteger value) {
+    if (changeSchema) {
+      fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.BIG_INT);
+    }
+    fields.add(new FieldValue(fieldName, value));
+  }
+
   public void addBoolean(String fieldName, boolean value) {
     if (changeSchema) {
       fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.BOOLEAN);
@@ -715,11 +764,11 @@ public class Document2 implements Iterab
 
   static {
     // nocommit is there a cleaner/general way to detect missing enum value in case switch statically?  must we use ecj?
-    assert FieldTypes.ValueType.values().length == 12: "missing case for switch statement below";
+    assert FieldTypes.ValueType.values().length == 14: "missing case for switch statement below";
   }
 
   /** Note: this FieldTypes must already know about all the fields in the incoming doc. */
-  public void addAll(Document2 other) {
+  public void addAll(Document other) {
     // nocommit should we insist other.fieldTypes == this.fieldTypes?  or, that they are "congruent"?
     for (IndexableField indexableField : other.fields) {
       String fieldName = indexableField.name();
@@ -745,6 +794,9 @@ public class Document2 implements Iterab
         case INT:
           addInt(fieldName, field.numericValue().intValue());
           break;
+        case HALF_FLOAT:
+          addHalfFloat(fieldName, field.numericValue().floatValue());
+          break;
         case FLOAT:
           addFloat(fieldName, field.numericValue().floatValue());
           break;
@@ -754,6 +806,9 @@ public class Document2 implements Iterab
         case DOUBLE:
           addDouble(fieldName, field.numericValue().doubleValue());
           break;
+        case BIG_INT:
+          addBigInteger(fieldName, (BigInteger) field.value);
+          break;
         case BINARY:
           addStored(fieldName, field.binaryValue());
           break;
@@ -802,10 +857,31 @@ public class Document2 implements Iterab
     return token;
   }
 
-  public static BytesRef floatToBytes(float value) {
+  public static BytesRef shortToBytes(short v) {
+    int sortableBits = v ^ 0x8000;
+    BytesRef token = new BytesRef(2);
+    token.length = 2;
+    int index = 1;
+    while (index >= 0) {
+      token.bytes[index] = (byte) (sortableBits & 0xff);
+      index--;
+      sortableBits >>>= 8;
+    }
+    return token;
+  }
+
+  public static BytesRef floatToSortableBytes(float value) {
     return intToBytes(sortableFloatBits(Float.floatToIntBits(value)));
   }
 
+  public static BytesRef halfFloatToSortableBytes(float value) {
+    return shortToBytes((short) sortableHalfFloatBits(HalfFloat.floatToIntBits(value)));
+  }
+
+  public static int floatToSortableInt(float value) {
+    return sortableFloatBits(Float.floatToIntBits(value));
+  }
+
   /** Converts numeric DV field back to double. */
   public static double sortableLongToDouble(long v) {
     return Double.longBitsToDouble(sortableDoubleBits(v));
@@ -826,6 +902,11 @@ public class Document2 implements Iterab
     return Float.intBitsToFloat(v);
   }
 
+  /** Converts numeric DV field back to float. */
+  public static float sortableShortToFloat(short v) {
+    return HalfFloat.intBitsToFloat(sortableHalfFloatBits(v));
+  }
+
   // nocommit move elsewhere?
   public static int bytesToInt(BytesRef bytes) {
     if (bytes.length != 4) {
@@ -839,6 +920,19 @@ public class Document2 implements Iterab
     return sortableBits ^ 0x80000000;
   }
 
+  // nocommit move elsewhere?
+  public static int bytesToShort(BytesRef bytes) {
+    if (bytes.length != 2) {
+      throw new IllegalArgumentException("incoming bytes should be length=2; got length=" + bytes.length);
+    }
+    int sortableBits = 0;
+    for(int i=0;i<2;i++) {
+      sortableBits = (sortableBits << 8) | bytes.bytes[bytes.offset + i] & 0xff;
+    }
+
+    return sortableBits ^ 0x8000;
+  }
+
   public static BytesRef longToBytes(long v) {
     long sortableBits = v ^ 0x8000000000000000L;
     BytesRef token = new BytesRef(8);
@@ -852,10 +946,14 @@ public class Document2 implements Iterab
     return token;
   }
 
-  public static BytesRef doubleToBytes(double value) {
+  public static BytesRef doubleToSortableBytes(double value) {
     return longToBytes(sortableDoubleBits(Double.doubleToLongBits(value)));
   }
 
+  public static long doubleToSortableLong(double value) {
+    return sortableDoubleBits(Double.doubleToLongBits(value));
+  }
+
   // nocommit move elsewhere?
   public static long bytesToLong(BytesRef bytes) {
     if (bytes.length != 8) {
@@ -875,6 +973,11 @@ public class Document2 implements Iterab
   }
 
   // nocommit move elsewhere?
+  public static float bytesToHalfFloat(BytesRef bytes) {
+    return HalfFloat.intBitsToFloat(sortableHalfFloatBits(bytesToShort(bytes)));
+  }
+
+  // nocommit move elsewhere?
   public static double bytesToDouble(BytesRef bytes) {
     return Double.longBitsToDouble(sortableDoubleBits(bytesToLong(bytes)));
   }
@@ -889,6 +992,12 @@ public class Document2 implements Iterab
     return bits ^ (bits >> 31) & 0x7fffffff;
   }
 
+  /** Converts IEEE 754 representation of a half float to sortable order (or back to the original) */
+  // nocommit short?
+  public static int sortableHalfFloatBits(int bits) {
+    return bits ^ (bits >> 15) & 0x7fff;
+  }
+
   public Boolean getBoolean(String fieldName) {
     // nocommit can we assert this is a known field and that its type is correct?
     FieldValue fieldValue = getFirstFieldValue(fieldName);
@@ -921,6 +1030,16 @@ public class Document2 implements Iterab
     }
   }
 
+  public BigInteger getBigInteger(String fieldName) {
+    // nocommit can we assert this is a known field and that its type is correct?
+    FieldValue fieldValue = getFirstFieldValue(fieldName);
+    if (fieldValue == null) {
+      return null;
+    } else {
+      return (BigInteger) fieldValue.value;
+    }
+  }
+
   public String getString(String fieldName) {
     // nocommit can we assert this is a known field and that its type is correct?
     FieldValue fieldValue = getFirstFieldValue(fieldName);

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2StoredFieldVisitor.java Sun Nov 30 11:07:09 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.document;
  */
 
 import java.io.IOException;
+import java.math.BigInteger;
 import java.net.InetAddress;
 import java.util.Date;
 import java.util.HashSet;
@@ -38,7 +39,7 @@ import org.apache.lucene.util.BytesRef;
  * @lucene.experimental */
 
 public class Document2StoredFieldVisitor extends StoredFieldVisitor {
-  private final Document2 doc;
+  private final Document doc;
   private final Set<String> fieldsToAdd;
   private final FieldTypes fieldTypes;
 
@@ -47,14 +48,14 @@ public class Document2StoredFieldVisitor
    * @param fieldsToAdd Set of fields to load, or <code>null</code> (all fields).
    */
   public Document2StoredFieldVisitor(FieldTypes fieldTypes, Set<String> fieldsToAdd) {
-    doc = new Document2(fieldTypes, false);
+    doc = new Document(fieldTypes, false);
     this.fieldTypes = fieldTypes;
     this.fieldsToAdd = fieldsToAdd;
   }
 
   /** Load only fields named in the provided fields. */
   public Document2StoredFieldVisitor(FieldTypes fieldTypes, String... fields) {
-    doc = new Document2(fieldTypes, false);
+    doc = new Document(fieldTypes, false);
     this.fieldTypes = fieldTypes;
     fieldsToAdd = new HashSet<>(fields.length);
     for(String field : fields) {
@@ -64,7 +65,7 @@ public class Document2StoredFieldVisitor
 
   /** Load all stored fields. */
   public Document2StoredFieldVisitor(FieldTypes fieldTypes) {
-    doc = new Document2(fieldTypes, false);
+    doc = new Document(fieldTypes, false);
     this.fieldTypes = fieldTypes;
     this.fieldsToAdd = null;
   }
@@ -84,6 +85,8 @@ public class Document2StoredFieldVisitor
     FieldTypes.FieldType fieldType = getFieldType(fieldInfo.name);
     if (fieldType != null && fieldType.valueType == FieldTypes.ValueType.INET_ADDRESS) {
       doc.addInetAddress(fieldInfo.name, InetAddress.getByAddress(value));
+    } else if (fieldType != null && fieldType.valueType == FieldTypes.ValueType.BIG_INT) {
+      doc.addBigInteger(fieldInfo.name, new BigInteger(value));
     } else {
       doc.addBinary(fieldInfo.name, new BytesRef(value));
     }
@@ -134,12 +137,12 @@ public class Document2StoredFieldVisitor
 
   /**
    * Retrieve the visited document.
-   * @return {@link Document2} populated with stored fields. Note that only
+   * @return {@link Document} populated with stored fields. Note that only
    *         the stored information in the field instances is valid,
    *         data such as indexing options, term vector options,
    *         etc is not set.
    */
-  public Document2 getDocument() {
+  public Document getDocument() {
     return doc;
   }
 }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Sun Nov 30 11:07:09 2014
@@ -18,12 +18,15 @@ package org.apache.lucene.document;
  */
 
 import java.io.IOException;
+import java.math.BigInteger;
 import java.net.InetAddress;
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -46,6 +49,8 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.index.IndexableFieldType;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.FieldComparatorSource;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@@ -98,6 +103,17 @@ import org.apache.lucene.util.Version;
 // tie into faceting
 // tie into index sorting
 
+// nocommit sugar API to retrieve values from DVs or stored fields or whatever?
+
+// nocommit how will future back-compat work?  segment must store field types as of when it was written?
+
+// nocommit how to make this more extensible?  e.g. so I can say "this field will facet, hierarchical, etc."
+
+
+// nocommit expose DocValuesRangeFilter?
+
+// nocommit PH should take this and validate highlighting was enabled?
+
 // nocommit a segment should store the field type as of when it was written?  on upgrade/reindex we can use that?
 
 // nocommit addStored should take numbers too?
@@ -201,8 +217,6 @@ import org.apache.lucene.util.Version;
 
 // nocommit fix all change methods to call validate / rollback
 
-// nocommit float16?
-
 // nocommit can we move multi-field-ness out of IW?  so IW only gets a single instance of each field
 
 // nocommit nested/parent/child docs?
@@ -215,16 +229,19 @@ import org.apache.lucene.util.Version;
 
 // nocommit required?  not null?
 
+// nocommit BigInt?
+
+// nocommit BigDecimal?
+
 /** Records how each field is indexed, stored, etc.  This class persists
  *  its state using {@link IndexWriter#setCommitData}, using the
  *  {@link FieldTypes#FIELD_PROPERTIES_KEY} key. */
 
-// nocommit what about uniqueAtom number int/long?  maybe break out isUnique?  then, e.g. like norms, you could have unique set, but maybe
-// later turn it off
+public class FieldTypes {
 
-// nocommit IW should detect if incoming document's fieldTypes != its own
+  public static final int DEFAULT_POSITION_GAP = 0;
 
-public class FieldTypes {
+  public static final int DEFAULT_OFFSET_GAP = 1;
 
   enum ValueType {
     NONE,
@@ -232,9 +249,11 @@ public class FieldTypes {
     SHORT_TEXT,
     ATOM,  // nocommit binary sort of overlaps w/ this?
     INT,
+    HALF_FLOAT,
     FLOAT,
     LONG,
     DOUBLE,
+    BIG_INT,
     BINARY, // nocommit rename to bytes?
     BOOLEAN,
     DATE,
@@ -268,7 +287,7 @@ public class FieldTypes {
   private long changeCount;
 
   /** Just like current oal.document.FieldType, except for each setting it can also record "not-yet-set". */
-  static class FieldType implements IndexableFieldType, Cloneable {
+  class FieldType implements IndexableFieldType, Cloneable {
     private final String name;
 
     // Lucene version when we were created:
@@ -392,14 +411,19 @@ public class FieldTypes {
     private volatile Analyzer wrappedIndexAnalyzer;
     private volatile Analyzer wrappedQueryAnalyzer;
 
+    Locale sortLocale;
+    Collator sortCollator;
+
     boolean validate() {
       switch (valueType) {
       case NONE:
         break;
       case INT:
+      case HALF_FLOAT:
       case FLOAT:
       case LONG:
       case DOUBLE:
+      case BIG_INT:
       case DATE:
         if (highlighted == Boolean.TRUE) {
           illegalState(name, "type " + valueType + " cannot highlight");
@@ -410,8 +434,14 @@ public class FieldTypes {
         if (queryAnalyzer != null) {
           illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
         }
-        if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
-          illegalState(name, "type " + valueType + " must use NUMERIC docValuesType (got: " + docValuesType + ")");
+        if (valueType == ValueType.BIG_INT) {
+          if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET)) {
+            illegalState(name, "type " + valueType + " must use SORTED or SORTED_SET docValuesType (got: " + docValuesType + ")");
+          }
+        } else {
+          if (docValuesType != DocValuesType.NONE && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
+            illegalState(name, "type " + valueType + " must use NUMERIC or SORTED_NUMERIC docValuesType (got: " + docValuesType + ")");
+          }
         }
         if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
           illegalState(name, "type " + valueType + " cannot use indexOptions > DOCS (got indexOptions " + indexOptions + ")");
@@ -483,7 +513,7 @@ public class FieldTypes {
         if (indexNorms == Boolean.TRUE) {
           illegalState(name, "type " + valueType + " cannot index norms");
         }
-        if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0) {
+        if (indexOptions != IndexOptions.NONE && indexOptions.compareTo(IndexOptions.DOCS) > 0 && multiValued != Boolean.TRUE) {
           illegalState(name, "type " + valueType + " can only be indexed as DOCS; got " + indexOptions);
         }
         if (maxTokenCount != null) {
@@ -537,6 +567,10 @@ public class FieldTypes {
         illegalState(name, "cannot sort when DocValuesType=" + docValuesType);
       }
 
+      if (sortable == Boolean.FALSE && sortLocale != null) {
+        illegalState(name, "cannot set sortLocale when field is not enabled for sorting");
+      }
+
       if (indexOptionsSet) {
         if (indexOptions == IndexOptions.NONE) {
           if (blockTreeMinItemsInBlock != null) {
@@ -583,6 +617,7 @@ public class FieldTypes {
           illegalState(name, "can only setAnalyzerPositionGap if the field is multi-valued");
         }
       }
+
       if (analyzerOffsetGap != null) {
         if (indexOptions == IndexOptions.NONE) {
           illegalState(name, "can only setAnalyzerOffsetGap if the field is indexed");
@@ -867,27 +902,33 @@ public class FieldTypes {
       case INT:
         out.writeByte((byte) 4);
         break;
-      case FLOAT:
+      case HALF_FLOAT:
         out.writeByte((byte) 5);
         break;
-      case LONG:
+      case FLOAT:
         out.writeByte((byte) 6);
         break;
-      case DOUBLE:
+      case LONG:
         out.writeByte((byte) 7);
         break;
-      case BINARY:
+      case DOUBLE:
         out.writeByte((byte) 8);
         break;
-      case BOOLEAN:
+      case BIG_INT:
         out.writeByte((byte) 9);
         break;
-      case DATE:
+      case BINARY:
         out.writeByte((byte) 10);
         break;
-      case INET_ADDRESS:
+      case BOOLEAN:
         out.writeByte((byte) 11);
         break;
+      case DATE:
+        out.writeByte((byte) 12);
+        break;
+      case INET_ADDRESS:
+        out.writeByte((byte) 13);
+        break;
       default:
         throw new AssertionError("missing ValueType in switch");
       }
@@ -943,6 +984,15 @@ public class FieldTypes {
       writeNullableBoolean(out, storeTermVectorPayloads);
       writeNullableBoolean(out, isUnique);
 
+      if (sortLocale != null) {
+        out.writeByte((byte) 1);
+        writeNullableString(out, sortLocale.getLanguage());
+        writeNullableString(out, sortLocale.getCountry());
+        writeNullableString(out, sortLocale.getVariant());
+      } else {
+        out.writeByte((byte) 0);
+      }
+
       if (indexOptionsSet == false) {
         assert indexOptions == IndexOptions.NONE;
         out.writeByte((byte) 0);
@@ -973,66 +1023,6 @@ public class FieldTypes {
       writeNullableBoolean(out, highlighted);
     }
 
-    private static void writeNullableInteger(DataOutput out, Integer value) throws IOException {
-      if (value == null) {
-        out.writeByte((byte) 0);
-      } else {
-        out.writeByte((byte) 1);
-        out.writeVInt(value.intValue());
-      }
-    }
-
-    private static Integer readNullableInteger(DataInput in) throws IOException {
-      if (in.readByte() == 0) {
-        return null;
-      } else {
-        return in.readVInt();
-      }
-    }
-
-    private static void writeNullableBoolean(DataOutput out, Boolean value) throws IOException {
-      if (value == null) {
-        out.writeByte((byte) 0);
-      } else if (value == Boolean.TRUE) {
-        out.writeByte((byte) 1);
-      } else {
-        out.writeByte((byte) 2);
-      }
-    }
-
-    private static Boolean readNullableBoolean(DataInput in) throws IOException {
-      byte b = in.readByte();
-      if (b == 0) {
-        return null;
-      } else if (b == 1) {
-        return Boolean.TRUE;
-      } else if (b == 2) {
-        return Boolean.FALSE;
-      } else {
-        throw new CorruptIndexException("invalid byte for nullable boolean: " + b, in);
-      }
-    }
-
-    private static void writeNullableString(DataOutput out, String value) throws IOException {
-      if (value == null) {
-        out.writeByte((byte) 0);
-      } else {
-        out.writeByte((byte) 1);
-        out.writeString(value);
-      }
-    }
-
-    private static String readNullableString(DataInput in) throws IOException {
-      byte b = in.readByte();
-      if (b == 0) {
-        return null;
-      } else if (b == 1) {
-        return in.readString();
-      } else {
-        throw new CorruptIndexException("invalid byte for nullable string: " + b, in);
-      }
-    }
-
     public FieldType(DataInput in) throws IOException {
       // nocommit under codec control instead?
       name = in.readString();
@@ -1056,24 +1046,30 @@ public class FieldTypes {
         valueType = ValueType.INT;
         break;
       case 5:
-        valueType = ValueType.FLOAT;
+        valueType = ValueType.HALF_FLOAT;
         break;
       case 6:
-        valueType = ValueType.LONG;
+        valueType = ValueType.FLOAT;
         break;
       case 7:
-        valueType = ValueType.DOUBLE;
+        valueType = ValueType.LONG;
         break;
       case 8:
-        valueType = ValueType.BINARY;
+        valueType = ValueType.DOUBLE;
         break;
       case 9:
-        valueType = ValueType.BOOLEAN;
+        valueType = ValueType.BIG_INT;
         break;
       case 10:
-        valueType = ValueType.DATE;
+        valueType = ValueType.BINARY;
         break;
       case 11:
+        valueType = ValueType.BOOLEAN;
+        break;
+      case 12:
+        valueType = ValueType.DATE;
+        break;
+      case 13:
         valueType = ValueType.INET_ADDRESS;
         break;
       default:
@@ -1136,7 +1132,17 @@ public class FieldTypes {
       storeTermVectorOffsets = readNullableBoolean(in);
       storeTermVectorPayloads = readNullableBoolean(in);
       isUnique = readNullableBoolean(in);
-
+      b = in.readByte();
+      if (b == 1) {
+        String language = readNullableString(in);
+        String country = readNullableString(in);
+        String variant = readNullableString(in);
+        // nocommit this is not sufficient right?  need to use the builder?
+        sortLocale = new Locale(language, country, variant);
+        sortCollator = Collator.getInstance(sortLocale);
+      } else if (b != 0) {
+        throw new CorruptIndexException("invalid byte for sortLocale: " + b, in);        
+      }
       b = in.readByte();
       switch (b) {
       case 0:
@@ -1171,6 +1177,32 @@ public class FieldTypes {
       docValuesFormat = readNullableString(in);
       highlighted = readNullableBoolean(in);
     }
+
+    @Override
+    public int getPositionGap() {
+      if (analyzerPositionGap != null) {
+        return analyzerPositionGap;
+      } else if (indexAnalyzer != null) {
+        return indexAnalyzer.getPositionIncrementGap(name);
+      } else if (defaultIndexAnalyzer != null) {
+        return defaultIndexAnalyzer.getPositionIncrementGap(name);
+      } else {
+        return DEFAULT_POSITION_GAP;
+      }
+    }
+
+    @Override
+    public int getOffsetGap() {
+      if (analyzerOffsetGap != null) {
+        return analyzerOffsetGap;
+      } else if (indexAnalyzer != null) {
+        return indexAnalyzer.getOffsetGap(name);
+      } else if (defaultIndexAnalyzer != null) {
+        return defaultIndexAnalyzer.getOffsetGap(name);
+      } else {
+        return DEFAULT_OFFSET_GAP;
+      }
+    }
   }
 
   // nocommit need test that you cannot .addStored after already .addLargeText(TokenStream)?
@@ -1441,6 +1473,9 @@ public class FieldTypes {
 
     @Override
     public int getPositionIncrementGap(String fieldName) {
+      throw new UnsupportedOperationException();
+
+      /*
       FieldType field = fields.get(fieldName);
       if (field == null) {
         if (defaultIndexAnalyzer == null) {
@@ -1459,10 +1494,13 @@ public class FieldTypes {
       } else {
         return defaultIndexAnalyzer.getPositionIncrementGap(fieldName);
       }
+      */
     }
 
     @Override
     public int getOffsetGap(String fieldName) {
+      throw new UnsupportedOperationException();
+      /*
       FieldType field = fields.get(fieldName);
       if (field == null) {
         if (defaultIndexAnalyzer == null) {
@@ -1481,6 +1519,7 @@ public class FieldTypes {
       } else {
         return defaultIndexAnalyzer.getOffsetGap(fieldName);
       }
+      */
     }
 
     // nocommit what about wrapReader?
@@ -2549,6 +2588,28 @@ public class FieldTypes {
     }
   }
 
+  public void setSortLocale(String fieldName, Locale locale) {
+    FieldType current = fields.get(fieldName);
+    if (current == null) {
+      current = newFieldType(fieldName);
+      current.sortLocale = locale;
+      current.sortCollator = Collator.getInstance(locale);
+      fields.put(fieldName, current);
+      changed();
+    } else if (current.sortLocale == null || locale.equals(current.sortLocale) == false) {
+      current.sortLocale = locale;
+      current.sortCollator = Collator.getInstance(locale);
+      changed();
+    }
+
+  }
+
+  public Locale getSortLocale(String fieldName) {
+    // Field must exist:
+    FieldType fieldType = getFieldType(fieldName);
+    return fieldType.sortLocale;
+  }
+
   /** Each value in this field will be unique (never occur in more than one document).  IndexWriter validates this.  */
   public void setIsUnique(String fieldName) {
     FieldType current = fields.get(fieldName);
@@ -2575,9 +2636,11 @@ public class FieldTypes {
       // bug
       throw new AssertionError("valueType should not be NONE");
     case INT:
+    case HALF_FLOAT:
     case FLOAT:
     case LONG:
     case DOUBLE:
+    case BIG_INT:
     case DATE:
       if (field.highlighted == null) {
         field.highlighted = Boolean.FALSE;
@@ -2586,10 +2649,18 @@ public class FieldTypes {
         field.storeTermVectors = Boolean.FALSE;
       }
       if (field.sortable == null) {
-        if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.NUMERIC || field.docValuesType == DocValuesType.SORTED_NUMERIC) {
-          field.sortable = Boolean.TRUE;
+        if (field.valueType == ValueType.BIG_INT) {
+          if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.SORTED || field.docValuesType == DocValuesType.SORTED_SET) {
+            field.sortable = Boolean.TRUE;
+          } else {
+            field.sortable = Boolean.FALSE;
+          }
         } else {
-          field.sortable = Boolean.FALSE;
+          if (field.docValuesTypeSet == false || field.docValuesType == DocValuesType.NUMERIC || field.docValuesType == DocValuesType.SORTED_NUMERIC) {
+            field.sortable = Boolean.TRUE;
+          } else {
+            field.sortable = Boolean.FALSE;
+          }
         }
       }
       if (field.multiValued == null) {
@@ -2604,10 +2675,18 @@ public class FieldTypes {
       }
       if (field.docValuesTypeSet == false) {
         if (field.sortable == Boolean.TRUE) {
-          if (field.multiValued == Boolean.TRUE) {
-            field.docValuesType = DocValuesType.SORTED_NUMERIC;
+          if (field.valueType == ValueType.BIG_INT) {
+            if (field.multiValued == Boolean.TRUE) {
+              field.docValuesType = DocValuesType.SORTED_SET;
+            } else {
+              field.docValuesType = DocValuesType.SORTED;
+            }
           } else {
-            field.docValuesType = DocValuesType.NUMERIC;
+            if (field.multiValued == Boolean.TRUE) {
+              field.docValuesType = DocValuesType.SORTED_NUMERIC;
+            } else {
+              field.docValuesType = DocValuesType.NUMERIC;
+            }
           }
         }
         field.docValuesTypeSet = true;
@@ -2899,7 +2978,7 @@ public class FieldTypes {
 
     switch (fieldType.valueType) {
     case INT:
-      bytes = Document2.intToBytes(token);
+      bytes = Document.intToBytes(token);
       break;
     default:
       illegalState(fieldName, "cannot create int term query when valueType=" + fieldType.valueType);
@@ -2929,7 +3008,7 @@ public class FieldTypes {
 
     switch (fieldType.valueType) {
     case LONG:
-      bytes = Document2.longToBytes(token);
+      bytes = Document.longToBytes(token);
       break;
     default:
       illegalState(fieldName, "cannot create long term query when valueType=" + fieldType.valueType);
@@ -3025,6 +3104,8 @@ public class FieldTypes {
 
   // nocommit split to newInt/Float/etc./Range
 
+  // nocommit More, Less?
+
   // nocommit not great that the toString of the filter returned here is ... not easy to understand
   public Filter newRangeFilter(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
 
@@ -3049,23 +3130,33 @@ public class FieldTypes {
     
     switch (fieldType.valueType) {
     case INT:
-      minTerm = min == null ? null : Document2.intToBytes(min.intValue());
-      maxTerm = max == null ? null : Document2.intToBytes(max.intValue());
+      minTerm = min == null ? null : Document.intToBytes(min.intValue());
+      maxTerm = max == null ? null : Document.intToBytes(max.intValue());
+      break;
+
+    case HALF_FLOAT:
+      minTerm = min == null ? null : Document.halfFloatToSortableBytes(min.floatValue());
+      maxTerm = max == null ? null : Document.halfFloatToSortableBytes(max.floatValue());
       break;
 
     case FLOAT:
-      minTerm = min == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(min.floatValue())));
-      maxTerm = max == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(max.floatValue())));
+      minTerm = min == null ? null : Document.floatToSortableBytes(min.floatValue());
+      maxTerm = max == null ? null : Document.floatToSortableBytes(max.floatValue());
       break;
 
     case LONG:
-      minTerm = min == null ? null : Document2.longToBytes(min.longValue());
-      maxTerm = max == null ? null : Document2.longToBytes(max.longValue());
+      minTerm = min == null ? null : Document.longToBytes(min.longValue());
+      maxTerm = max == null ? null : Document.longToBytes(max.longValue());
       break;
 
     case DOUBLE:
-      minTerm = min == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(min.doubleValue())));
-      maxTerm = max == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(max.doubleValue())));
+      minTerm = min == null ? null : Document.doubleToSortableBytes(min.doubleValue());
+      maxTerm = max == null ? null : Document.doubleToSortableBytes(max.doubleValue());
+      break;
+
+    case BIG_INT:
+      minTerm = min == null ? null : new BytesRef(((BigInteger) min).toByteArray());
+      maxTerm = max == null ? null :  new BytesRef(((BigInteger) max).toByteArray());
       break;
 
     default:
@@ -3141,8 +3232,8 @@ public class FieldTypes {
       illegalState(fieldName, "this field was not indexed for fast ranges");
     }
 
-    BytesRef minTerm = min == null ? null : Document2.longToBytes(min.getTime());
-    BytesRef maxTerm = max == null ? null : Document2.longToBytes(max.getTime());
+    BytesRef minTerm = min == null ? null : Document.longToBytes(min.getTime());
+    BytesRef maxTerm = max == null ? null : Document.longToBytes(max.getTime());
 
     return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
   }
@@ -3235,7 +3326,8 @@ public class FieldTypes {
           } else {
             sortField.setMissingValue(Integer.MAX_VALUE);
           }
-        } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+        } else {
+          assert fieldType.sortMissingLast == Boolean.FALSE;
           if (reverse.booleanValue()) {
             sortField.setMissingValue(Integer.MAX_VALUE);
           } else {
@@ -3245,6 +3337,52 @@ public class FieldTypes {
         return sortField;
       }
 
+    case HALF_FLOAT:
+      {
+        SortField sortField;
+        if (fieldType.multiValued == Boolean.TRUE) {
+          // nocommit todo
+          throw new UnsupportedOperationException();
+        } else {
+
+          final Float missingValue;
+
+          if (fieldType.sortMissingLast == Boolean.TRUE) {
+            if (reverse.booleanValue()) {
+              missingValue = Float.NEGATIVE_INFINITY;
+            } else {
+              missingValue = Float.POSITIVE_INFINITY;
+            }
+          } else {
+            assert fieldType.sortMissingLast == Boolean.FALSE;
+            if (reverse.booleanValue()) {
+              missingValue = Float.POSITIVE_INFINITY;
+            } else {
+              missingValue = Float.NEGATIVE_INFINITY;
+            }
+          }
+
+          FieldComparatorSource compSource = new FieldComparatorSource() {
+              @Override
+              public FieldComparator<Float> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+                return new HalfFloatComparator(numHits, fieldName, missingValue);
+              }
+            };
+
+          sortField = new SortField(fieldName, compSource, reverse) {
+              @Override
+              public String toString() {
+                return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+              }
+            };
+
+          // nocommit not needed?
+          sortField.setMissingValue(missingValue);
+        }
+
+        return sortField;
+      }
+
     case FLOAT:
       {
         SortField sortField;
@@ -3260,7 +3398,8 @@ public class FieldTypes {
           } else {
             sortField.setMissingValue(Float.POSITIVE_INFINITY);
           }
-        } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+        } else {
+          assert fieldType.sortMissingLast == Boolean.FALSE;
           if (reverse.booleanValue()) {
             sortField.setMissingValue(Float.POSITIVE_INFINITY);
           } else {
@@ -3286,7 +3425,8 @@ public class FieldTypes {
           } else {
             sortField.setMissingValue(Long.MAX_VALUE);
           }
-        } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+        } else {
+          assert fieldType.sortMissingLast == Boolean.FALSE;
           if (reverse.booleanValue()) {
             sortField.setMissingValue(Long.MAX_VALUE);
           } else {
@@ -3311,7 +3451,8 @@ public class FieldTypes {
           } else {
             sortField.setMissingValue(Double.POSITIVE_INFINITY);
           }
-        } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+        } else {
+          assert fieldType.sortMissingLast == Boolean.FALSE;
           if (reverse.booleanValue()) {
             sortField.setMissingValue(Double.POSITIVE_INFINITY);
           } else {
@@ -3321,6 +3462,53 @@ public class FieldTypes {
         return sortField;
       }
 
+    case BIG_INT:
+      // nocommit fixme
+      {
+        SortField sortField;
+        if (fieldType.multiValued == Boolean.TRUE) {
+          // nocommit todo
+          throw new UnsupportedOperationException();
+        } else {
+
+          final Float missingValue;
+
+          if (fieldType.sortMissingLast == Boolean.TRUE) {
+            if (reverse.booleanValue()) {
+              missingValue = Float.NEGATIVE_INFINITY;
+            } else {
+              missingValue = Float.POSITIVE_INFINITY;
+            }
+          } else {
+            assert fieldType.sortMissingLast == Boolean.FALSE;
+            if (reverse.booleanValue()) {
+              missingValue = Float.POSITIVE_INFINITY;
+            } else {
+              missingValue = Float.NEGATIVE_INFINITY;
+            }
+          }
+
+          FieldComparatorSource compSource = new FieldComparatorSource() {
+              @Override
+              public FieldComparator<Float> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+                return new HalfFloatComparator(numHits, fieldName, missingValue);
+              }
+            };
+
+          sortField = new SortField(fieldName, compSource, reverse) {
+              @Override
+              public String toString() {
+                return "<halffloat" + ": \"" + fieldName + "\" missingValue=" + missingValue + ">";
+              }
+            };
+
+          // nocommit not needed?
+          sortField.setMissingValue(missingValue);
+        }
+
+        return sortField;
+      }
+
     case SHORT_TEXT:
     case ATOM:
     case BINARY:
@@ -3343,7 +3531,8 @@ public class FieldTypes {
           } else {
             sortField.setMissingValue(SortField.STRING_LAST);
           }
-        } else if (fieldType.sortMissingLast == Boolean.FALSE) {
+        } else {
+          assert fieldType.sortMissingLast == Boolean.FALSE;
           if (reverse.booleanValue()) {
             sortField.setMissingValue(SortField.STRING_LAST);
           } else {
@@ -3547,5 +3736,65 @@ public class FieldTypes {
     addFieldNamesField();
   }
 
+  static void writeNullableInteger(DataOutput out, Integer value) throws IOException {
+    if (value == null) {
+      out.writeByte((byte) 0);
+    } else {
+      out.writeByte((byte) 1);
+      out.writeVInt(value.intValue());
+    }
+  }
+
+  static Integer readNullableInteger(DataInput in) throws IOException {
+    if (in.readByte() == 0) {
+      return null;
+    } else {
+      return in.readVInt();
+    }
+  }
+
+  static void writeNullableBoolean(DataOutput out, Boolean value) throws IOException {
+    if (value == null) {
+      out.writeByte((byte) 0);
+    } else if (value == Boolean.TRUE) {
+      out.writeByte((byte) 1);
+    } else {
+      out.writeByte((byte) 2);
+    }
+  }
+
+  static Boolean readNullableBoolean(DataInput in) throws IOException {
+    byte b = in.readByte();
+    if (b == 0) {
+      return null;
+    } else if (b == 1) {
+      return Boolean.TRUE;
+    } else if (b == 2) {
+      return Boolean.FALSE;
+    } else {
+      throw new CorruptIndexException("invalid byte for nullable boolean: " + b, in);
+    }
+  }
+
+  static void writeNullableString(DataOutput out, String value) throws IOException {
+    if (value == null) {
+      out.writeByte((byte) 0);
+    } else {
+      out.writeByte((byte) 1);
+      out.writeString(value);
+    }
+  }
+
+  static String readNullableString(DataInput in) throws IOException {
+    byte b = in.readByte();
+    if (b == 0) {
+      return null;
+    } else if (b == 1) {
+      return in.readString();
+    } else {
+      throw new CorruptIndexException("invalid byte for nullable string: " + b, in);
+    }
+  }
+
   // nocommit add sugar to wrap long NDVs as float/double?
 }

Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloat.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,86 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit move to NumericUtils
+
+/** Utility class to convert half-precision 16 bit floating-point number according
+ *  to IEEE 754-2008. */
+
+// nocommit need separate test here
+// nocommit does this match the standard?
+public class HalfFloat {
+
+  // From https://github.com/ata4/ioutils/blob/master/src/info/ata4/io/util/HalfFloat.java, in turn from http://stackoverflow.com/a/6162687
+  private HalfFloat() {
+  }
+
+  // Only uses bottom 16 bits:
+  public static float intBitsToFloat(int hbits) {
+    int mant = hbits & 0x03ff;          // 10 bits mantissa
+    int exp = hbits & 0x7c00;           // 5 bits exponent
+    if (exp == 0x7c00) {                // NaN/Inf
+      exp = 0x3fc00;                  // -> NaN/Inf
+    } else if (exp != 0) {              // normalized value
+      exp += 0x1c000;                 // exp - 15 + 127
+      if (mant == 0 && exp > 0x1c400) {  // smooth transition
+        return Float.intBitsToFloat((hbits & 0x8000) << 16
+                                    | exp << 13 | 0x3ff);
+      }
+    } else if (mant != 0) {             // && exp==0 -> subnormal
+      exp = 0x1c400;                  // make it normal
+      do {
+        mant <<= 1;                 // mantissa * 2
+        exp -= 0x400;               // decrease exp by 1
+      } while ((mant & 0x400) == 0);  // while not normal
+      mant &= 0x3ff;                  // discard subnormal bit
+    }                                   // else +/-0 -> +/-0
+    return Float.intBitsToFloat(        // combine all parts
+                                (hbits & 0x8000) << 16      // sign  << ( 31 - 15 )
+                                | (exp | mant) << 13);      // value << ( 23 - 10 )
+  }
+
+  // Only uses bottom 16 bits:
+  public static int floatToIntBits(float fval) {
+    int fbits = Float.floatToIntBits(fval);
+    int sign = fbits >>> 16 & 0x8000;           // sign only
+    int val = (fbits & 0x7fffffff) + 0x1000;    // rounded value
+    if (val >= 0x47800000) {                    // might be or become NaN/Inf
+      // avoid Inf due to rounding
+      if ((fbits & 0x7fffffff) >= 0x47800000) { // is or must become NaN/Inf
+        if (val < 0x7f800000) {             // was value but too large
+          return sign | 0x7c00;           // make it +/-Inf
+        }
+        return sign | 0x7c00 |              // remains +/-Inf or NaN
+          (fbits & 0x007fffff) >>> 13; // keep NaN (and Inf) bits
+      }
+      return sign | 0x7bff;                   // unrounded not quite Inf
+    }
+    if (val >= 0x38800000) {                    // remains normalized value
+      return sign | val - 0x38000000 >>> 13;  // exp - 127 + 15
+    }
+    if (val < 0x33000000) {                     // too small for subnormal
+      return sign;                            // becomes +/-0
+    }
+    val = (fbits & 0x7fffffff) >>> 23;          // tmp exp for subnormal calc
+    return sign | ((fbits & 0x7fffff | 0x800000) // add subnormal bit
+                   + (0x800000 >>> val - 102)          // round depending on cut off
+                   >>> 126 - val); // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
+  }
+}
+

Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/HalfFloatComparator.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,94 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FieldComparator;
+
+// nocommit can't we do all numeric comparators this way?  if we fix all numeric dv to write sortable versions?
+class HalfFloatComparator extends FieldComparator.NumericComparator<Float> {
+  private final short[] values;
+  private final short missingShortValue;
+  private short bottom;
+  private short topValue;
+
+  /** 
+   * Creates a new comparator based on {@link Float#compare} for {@code numHits}.
+   * When a document has no value for the field, {@code missingValue} is substituted. 
+   */
+  public HalfFloatComparator(int numHits, String field, Float missingValue) {
+    super(field, missingValue);
+    values = new short[numHits];
+    missingShortValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(missingValue));
+  }
+    
+  @Override
+  public int compare(int slot1, int slot2) {
+    return (int) values[slot1] - (int) values[slot2];
+  }
+
+  @Override
+  public int compareBottom(int doc) {
+    // TODO: are there sneaky non-branch ways to compute sign of float?
+    short v = (short) currentReaderValues.get(doc);
+    // Test for v == 0 to save Bits.get method call for
+    // the common case (doc has value and value is non-zero):
+    if (docsWithField != null && v == 0 && !docsWithField.get(doc)) {
+      v = missingShortValue;
+    }
+
+    return (int) bottom - (int) v;
+  }
+
+  @Override
+  public void copy(int slot, int doc) {
+    short v =  (short) currentReaderValues.get(doc);
+    // Test for v == 0 to save Bits.get method call for
+    // the common case (doc has value and value is non-zero):
+    if (docsWithField != null && v == 0 && !docsWithField.get(doc)) {
+      v = missingShortValue;
+    }
+
+    values[slot] = v;
+  }
+    
+  @Override
+  public void setBottom(final int bottom) {
+    this.bottom = values[bottom];
+  }
+
+  @Override
+  public void setTopValue(Float value) {
+    topValue = (short) Document.sortableHalfFloatBits(HalfFloat.floatToIntBits(value));
+  }
+
+  @Override
+  public Float value(int slot) {
+    return Document.sortableShortToFloat(values[slot]);
+  }
+
+  @Override
+  public int compareTop(int doc) {
+    short docValue = (short) currentReaderValues.get(doc);
+    // Test for docValue == 0 to save Bits.get method call for
+    // the common case (doc has value and value is non-zero):
+    if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
+      docValue = missingShortValue;
+    }
+    return (int) topValue - (int) docValue;
+  }
+}

Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java?rev=1642535&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/NumericType.java Sun Nov 30 11:07:09 2014
@@ -0,0 +1,34 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit remove
+
+/** Data type of the numeric value
+ * @since 3.2
+ */
+public enum NumericType {
+  /** 32-bit integer numeric type */
+  INT, 
+  /** 64-bit long numeric type */
+  LONG, 
+  /** 32-bit float numeric type */
+  FLOAT, 
+  /** 64-bit double numeric type */
+  DOUBLE
+}

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java Sun Nov 30 11:07:09 2014
@@ -17,7 +17,6 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Sun Nov 30 11:07:09 2014
@@ -34,7 +34,7 @@ import java.util.Map;
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldTypes;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -1739,7 +1739,7 @@ public class CheckIndex implements Close
       for (int j = 0; j < reader.maxDoc(); ++j) {
         // Intentionally pull even deleted documents to
         // make sure they too are not corrupt:
-        Document2 doc = reader.document(j);
+        Document doc = reader.document(j);
         if (liveDocs == null || liveDocs.get(j)) {
           status.docCount++;
           status.totFields += doc.getFields().size();

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Sun Nov 30 11:07:09 2014
@@ -599,18 +599,16 @@ final class DefaultIndexingChain extends
      *  if this is the first time we are seeing this field
      *  name in this document. */
     public void invert(IndexableField field, boolean first, Term delTerm) throws IOException {
+      IndexableFieldType fieldType = field.fieldType();
       if (first) {
         // First time we're seeing this field (indexed) in
         // this document:
         invertState.reset();
-      } else if (docState.analyzer != null) {
-        // TODO: this "multi-field-ness" (and, Analyzer) should be outside of IW somehow
-        invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
-        invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);
+      } else {
+        invertState.position += fieldType.getPositionGap();
+        invertState.offset += fieldType.getOffsetGap();
       }
 
-      IndexableFieldType fieldType = field.fieldType();
-
       IndexOptions indexOptions = fieldType.indexOptions();
       fieldInfo.setIndexOptions(indexOptions);
 
@@ -630,7 +628,7 @@ final class DefaultIndexingChain extends
        */
       boolean aborting = false;
       boolean succeededInProcessingField = false;
-      try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
+      try (TokenStream stream = tokenStream = field.tokenStream(tokenStream)) {
         // reset the TokenStream to the first token
         stream.reset();
         invertState.setAttributeSource(stream);

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocValuesUpdate.java Sun Nov 30 11:07:09 2014
@@ -23,7 +23,6 @@ import static org.apache.lucene.util.Ram
 import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
 import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
 
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Sun Nov 30 11:07:09 2014
@@ -407,7 +407,7 @@ final class DocumentsWriter implements C
     }
   }
 
-  boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs, final Analyzer analyzer,
+  boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs,
                           final Term delTerm) throws IOException {
     boolean hasEvents = preUpdate();
 
@@ -424,7 +424,7 @@ final class DocumentsWriter implements C
       final DocumentsWriterPerThread dwpt = perThread.dwpt;
       final int dwptNumDocs = dwpt.getNumDocsInRAM();
       try {
-        dwpt.updateDocuments(docs, analyzer, delTerm);
+        dwpt.updateDocuments(docs, delTerm);
       } finally {
         // We don't know how many documents were actually
         // counted as indexed, so we must subtract here to
@@ -447,8 +447,7 @@ final class DocumentsWriter implements C
     return postUpdate(flushingDWPT, hasEvents);
   }
 
-  boolean updateDocument(final Iterable<? extends IndexableField> doc, final Analyzer analyzer,
-      final Term delTerm) throws IOException {
+  boolean updateDocument(final Iterable<? extends IndexableField> doc, final Term delTerm) throws IOException {
 
     boolean hasEvents = preUpdate();
 
@@ -465,7 +464,7 @@ final class DocumentsWriter implements C
       final DocumentsWriterPerThread dwpt = perThread.dwpt;
       final int dwptNumDocs = dwpt.getNumDocsInRAM();
       try {
-        dwpt.updateDocument(doc, analyzer, delTerm); 
+        dwpt.updateDocument(doc, delTerm); 
       } finally {
         // We don't know whether the document actually
         // counted as being indexed, so we must subtract here to

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Sun Nov 30 11:07:09 2014
@@ -24,9 +24,8 @@ import java.util.Locale;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldTypes;
 import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
 import org.apache.lucene.search.similarities.Similarity;
@@ -69,7 +68,6 @@ class DocumentsWriterPerThread {
 
   static class DocState {
     final DocumentsWriterPerThread docWriter;
-    Analyzer analyzer;
     InfoStream infoStream;
     Similarity similarity;
     int docID;
@@ -85,10 +83,7 @@ class DocumentsWriterPerThread {
     }
 
     public void clear() {
-      // don't hold onto doc nor analyzer, in case it is
-      // largish:
       doc = null;
-      analyzer = null;
     }
   }
 
@@ -225,17 +220,16 @@ class DocumentsWriterPerThread {
     }
   }
 
-  public void updateDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer, Term delTerm) throws IOException {
+  public void updateDocument(Iterable<? extends IndexableField> doc, Term delTerm) throws IOException {
     testPoint("DocumentsWriterPerThread addDocument start");
-    if (doc instanceof Document2) {
-      Document2 doc2 = (Document2) doc;
+    if (doc instanceof Document) {
+      Document doc2 = (Document) doc;
       if (doc2.getFieldTypes() != fieldTypes) {
         throw new IllegalArgumentException("this document wasn't created by this writer (fieldTypes are different)");
       }
     }
     assert deleteQueue != null;
     docState.doc = doc;
-    docState.analyzer = analyzer;
     docState.docID = numDocsInRAM;
     if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
       infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
@@ -269,10 +263,9 @@ class DocumentsWriterPerThread {
     finishDocument(delTerm);
   }
 
-  public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer, Term delTerm) throws IOException {
+  public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Term delTerm) throws IOException {
     testPoint("DocumentsWriterPerThread addDocuments start");
     assert deleteQueue != null;
-    docState.analyzer = analyzer;
     if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
       infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
     }
@@ -287,8 +280,8 @@ class DocumentsWriterPerThread {
         // document, so the counter will be "wrong" in that case, but
         // it's very hard to fix (we can't easily distinguish aborting
         // vs non-aborting exceptions):
-        if (doc instanceof Document2) {
-          Document2 doc2 = (Document2) doc;
+        if (doc instanceof Document) {
+          Document doc2 = (Document) doc;
           if (doc2.getFieldTypes() != fieldTypes) {
             throw new IllegalArgumentException("this document wasn't created by this writer (fieldTypes are different)");
           }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexReader.java Sun Nov 30 11:07:09 2014
@@ -26,7 +26,7 @@ import java.util.Set;
 import java.util.WeakHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Document2StoredFieldVisitor;
 import org.apache.lucene.document.FieldTypes;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -373,7 +373,7 @@ public abstract class IndexReader implem
   // TODO: we need a separate StoredField, so that the
   // Document returned here contains that class not
   // IndexableField
-  public final Document2 document(int docID) throws IOException {
+  public final Document document(int docID) throws IOException {
     final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes());
     document(docID, visitor);
     return visitor.getDocument();
@@ -384,7 +384,7 @@ public abstract class IndexReader implem
    * fields.  Note that this is simply sugar for {@link
    * Document2StoredFieldVisitor#Document2StoredFieldVisitor(Set)}.
    */
-  public final Document2 document(int docID, Set<String> fieldsToLoad)
+  public final Document document(int docID, Set<String> fieldsToLoad)
       throws IOException {
     final Document2StoredFieldVisitor visitor = new Document2StoredFieldVisitor(getFieldTypes(), fieldsToLoad);
     document(docID, visitor);

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Sun Nov 30 11:07:09 2014
@@ -39,12 +39,10 @@ import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50Codec;
-import org.apache.lucene.document.Document2;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldTypes;
 import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
 import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
@@ -248,7 +246,6 @@ public class IndexWriter implements Clos
   volatile Throwable tragedy;
 
   private final Directory directory;  // where this index resides
-  private final Analyzer analyzer;    // how to analyze text
   final FieldTypes fieldTypes; // schema
 
   private volatile long changeCount; // increments every time a change is completed
@@ -864,8 +861,6 @@ public class IndexWriter implements Clos
         messageState();
       }
 
-      analyzer = fieldTypes.getIndexAnalyzer();
-
       // nocommit what to do here... cannot delegate codecs
       if ((config.getCodec() instanceof Lucene50Codec) == false) {
         codec = config.getCodec();
@@ -893,8 +888,8 @@ public class IndexWriter implements Clos
     return fieldTypes;
   }
 
-  public Document2 newDocument() {
-    return new Document2(fieldTypes);
+  public Document newDocument() {
+    return new Document(fieldTypes);
   }
   
   // reads latest field infos for the commit
@@ -1050,12 +1045,6 @@ public class IndexWriter implements Clos
     return directory;
   }
 
-  /** Returns the analyzer used by this index. */
-  public Analyzer getAnalyzer() {
-    ensureOpen();
-    return analyzer;
-  }
-
   /** Returns total number of docs in this index, including
    *  docs not yet flushed (still in the RAM buffer),
    *  not counting deletions.
@@ -1207,7 +1196,7 @@ public class IndexWriter implements Clos
     try {
       boolean success = false;
       try {
-        if (docWriter.updateDocuments(docs, analyzer, delTerm)) {
+        if (docWriter.updateDocuments(docs, delTerm)) {
           processEvents(true, false);
         }
         success = true;
@@ -1357,7 +1346,7 @@ public class IndexWriter implements Clos
     try {
       boolean success = false;
       try {
-        if (docWriter.updateDocument(doc, analyzer, term)) {
+        if (docWriter.updateDocument(doc, term)) {
           processEvents(true, false);
         }
         success = true;
@@ -2369,7 +2358,7 @@ public class IndexWriter implements Clos
           SegmentInfos sis = SegmentInfos.readLatestCommit(dir); // read infos from dir
           totalDocCount += sis.totalDocCount();
 
-          fieldTypes.addAll(FieldTypes.getFieldTypes(sis.getUserData(), analyzer, fieldTypes.getSimilarity()));
+          fieldTypes.addAll(FieldTypes.getFieldTypes(sis.getUserData(), fieldTypes.getIndexAnalyzer(), fieldTypes.getSimilarity()));
 
           for (SegmentCommitInfo info : sis) {
             assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableField.java Sun Nov 30 11:07:09 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.util.BytesRef;
 
 public interface IndexableField {
 
-
   /** Field name */
   public String name();
 
@@ -45,10 +44,9 @@ public interface IndexableField {
    * Creates the TokenStream used for indexing this field.  If appropriate,
    * implementations should use the given Analyzer to create the TokenStreams.
    *
-   * @param analyzer Analyzer that should be used to create the TokenStreams from
    * @param reuse TokenStream for a previous instance of this field <b>name</b>. This allows
    *              custom field types (like StringField and NumericField) that do not use
-   *              the analyzer to still have good performance. Note: the passed-in type
+   *              an analyzer to still have good performance. Note: the passed-in type
    *              may be inappropriate, for example if you mix up different types of Fields
    *              for the same field name. So its the responsibility of the implementation to
    *              check.
@@ -56,7 +54,9 @@ public interface IndexableField {
    *         a non-null value if the field is to be indexed
    * @throws IOException Can be thrown while creating the TokenStream
    */
-  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException;
+  default public TokenStream tokenStream(TokenStream reuse) throws IOException {
+    return null;
+  }
 
   /** 
    * Returns the field's index-time boost.
@@ -78,20 +78,32 @@ public interface IndexableField {
    * @see Similarity#computeNorm(FieldInvertState)
    * @see DefaultSimilarity#encodeNormValue(float)
    */
-  public float boost();
+  default public float boost() {
+    return 1.0f;
+  }
 
   /** Non-null if this field has a stored binary value */
-  public BytesRef binaryValue();
+  default public BytesRef binaryValue() {
+    return null;
+  }
 
   /** Non-null if this field has a binary doc value */
-  public BytesRef binaryDocValue();
+  default public BytesRef binaryDocValue() {
+    return null;
+  }
 
   /** Non-null if this field has a string value */
-  public String stringValue();
+  default public String stringValue() {
+    return null;
+  }
 
   /** Non-null if this field has a numeric value */
-  public Number numericValue(); 
+  default public Number numericValue() { 
+    return null;
+  }
 
   /** Non-null if this field has a numeric doc value */
-  public Number numericDocValue(); 
+  default public Number numericDocValue() {
+    return null;
+  }
 }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java Sun Nov 30 11:07:09 2014
@@ -26,7 +26,9 @@ import org.apache.lucene.analysis.Analyz
 public interface IndexableFieldType {
 
   /** True if the field's value should be stored */
-  public boolean stored();
+  default public boolean stored() {
+    return false;
+  }
   
   /** 
    * True if this field's indexed form should be also stored 
@@ -39,7 +41,9 @@ public interface IndexableFieldType {
    * This option is illegal if {@link #indexOptions()} returns
    * IndexOptions.NONE.
    */
-  public boolean storeTermVectors();
+  default public boolean storeTermVectors() {
+    return false;
+  }
 
   /** 
    * True if this field's token character offsets should also
@@ -48,7 +52,9 @@ public interface IndexableFieldType {
    * This option is illegal if term vectors are not enabled for the field
    * ({@link #storeTermVectors()} is false)
    */
-  public boolean storeTermVectorOffsets();
+  default public boolean storeTermVectorOffsets() {
+    return false;
+  }
 
   /** 
    * True if this field's token positions should also be stored
@@ -57,7 +63,9 @@ public interface IndexableFieldType {
    * This option is illegal if term vectors are not enabled for the field
    * ({@link #storeTermVectors()} is false). 
    */
-  public boolean storeTermVectorPositions();
+  default public boolean storeTermVectorPositions() {
+    return false;
+  }
   
   /** 
    * True if this field's token payloads should also be stored
@@ -66,7 +74,9 @@ public interface IndexableFieldType {
    * This option is illegal if term vector positions are not enabled 
    * for the field ({@link #storeTermVectors()} is false).
    */
-  public boolean storeTermVectorPayloads();
+  default public boolean storeTermVectorPayloads() {
+    return false;
+  }
 
   /**
    * True if normalization values should be omitted for the field.
@@ -74,15 +84,31 @@ public interface IndexableFieldType {
    * This saves memory, but at the expense of scoring quality (length normalization
    * will be disabled), and if you omit norms, you cannot use index-time boosts. 
    */
-  public boolean omitNorms();
+  default public boolean omitNorms() {
+    return false;
+  }
 
   /** {@link IndexOptions}, describing what should be
    *  recorded into the inverted index */
-  public IndexOptions indexOptions();
+  default public IndexOptions indexOptions() {
+    return IndexOptions.NONE;
+  }
 
   /** 
    * DocValues {@link DocValuesType}: how the field's value will be indexed
    * into docValues.
    */
-  public DocValuesType docValuesType();  
+  default public DocValuesType docValuesType() {
+    return DocValuesType.NONE;
+  }
+
+  /** Returns the gap to insert between multi-valued, tokenized fields */
+  default public int getPositionGap() {
+    return 1;
+  }
+
+  /** Returns the gap offset to insert between multi-valued, tokenized fields */
+  default public int getOffsetGap() {
+    return 0;
+  }
 }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java Sun Nov 30 11:07:09 2014
@@ -1,6 +1,5 @@
 package org.apache.lucene.index;
 
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.packed.PackedInts;

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java Sun Nov 30 11:07:09 2014
@@ -31,8 +31,6 @@ import org.apache.lucene.codecs.DocValue
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
-import org.apache.lucene.document.BinaryDocValuesField;
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java Sun Nov 30 11:07:09 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 
-import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Document2StoredFieldVisitor; // javadocs
 
 /**

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java Sun Nov 30 11:07:09 2014
@@ -20,8 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document2;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldTypes;
 import org.apache.lucene.search.ControlledRealTimeReopenThread; // javadocs
 import org.apache.lucene.search.Query;
@@ -49,7 +48,7 @@ public class TrackingIndexWriter {
     this.writer = writer;
   }
 
-  public Document2 newDocument() {
+  public Document newDocument() {
     return writer.newDocument();
   }
 

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java?rev=1642535&r1=1642534&r2=1642535&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java Sun Nov 30 11:07:09 2014
@@ -18,13 +18,13 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
 
 /**
  * A range filter built on top of numeric doc values field 
@@ -285,14 +285,14 @@ public abstract class DocValuesRangeFilt
       @Override
       public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
         // we transform the floating point numbers to sortable integers
-        // using NumericUtils to easier find the next bigger/lower value
+        // using Document to easier find the next bigger/lower value
         final float inclusiveLowerPoint, inclusiveUpperPoint;
         if (lowerVal != null) {
           float f = lowerVal.floatValue();
           if (!includeUpper && f > 0.0f && Float.isInfinite(f))
             return null;
-          int i = NumericUtils.floatToSortableInt(f);
-          inclusiveLowerPoint = NumericUtils.sortableIntToFloat( includeLower ?  i : (i + 1) );
+          int i = Document.floatToSortableInt(f);
+          inclusiveLowerPoint = Document.sortableIntToFloat(includeLower ? i : (i + 1));
         } else {
           inclusiveLowerPoint = Float.NEGATIVE_INFINITY;
         }
@@ -300,8 +300,8 @@ public abstract class DocValuesRangeFilt
           float f = upperVal.floatValue();
           if (!includeUpper && f < 0.0f && Float.isInfinite(f))
             return null;
-          int i = NumericUtils.floatToSortableInt(f);
-          inclusiveUpperPoint = NumericUtils.sortableIntToFloat( includeUpper ? i : (i - 1) );
+          int i = Document.floatToSortableInt(f);
+          inclusiveUpperPoint = Document.sortableIntToFloat(includeUpper ? i : (i - 1));
         } else {
           inclusiveUpperPoint = Float.POSITIVE_INFINITY;
         }
@@ -331,14 +331,14 @@ public abstract class DocValuesRangeFilt
       @Override
       public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
         // we transform the floating point numbers to sortable integers
-        // using NumericUtils to easier find the next bigger/lower value
+        // using Document to easier find the next bigger/lower value
         final double inclusiveLowerPoint, inclusiveUpperPoint;
         if (lowerVal != null) {
           double f = lowerVal.doubleValue();
           if (!includeUpper && f > 0.0 && Double.isInfinite(f))
             return null;
-          long i = NumericUtils.doubleToSortableLong(f);
-          inclusiveLowerPoint = NumericUtils.sortableLongToDouble( includeLower ?  i : (i + 1L) );
+          long i = Document.doubleToSortableLong(f);
+          inclusiveLowerPoint = Document.sortableLongToDouble(includeLower ? i : (i + 1L));
         } else {
           inclusiveLowerPoint = Double.NEGATIVE_INFINITY;
         }
@@ -346,8 +346,8 @@ public abstract class DocValuesRangeFilt
           double f = upperVal.doubleValue();
           if (!includeUpper && f < 0.0 && Double.isInfinite(f))
             return null;
-          long i = NumericUtils.doubleToSortableLong(f);
-          inclusiveUpperPoint = NumericUtils.sortableLongToDouble( includeUpper ? i : (i - 1L) );
+          long i = Document.doubleToSortableLong(f);
+          inclusiveUpperPoint = Document.sortableLongToDouble(includeUpper ? i : (i - 1L));
         } else {
           inclusiveUpperPoint = Double.POSITIVE_INFINITY;
         }