You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/10/21 10:45:45 UTC
svn commit: r1633314 [1/2] - in /lucene/dev/branches/lucene6005/lucene:
analysis/common/src/java/org/apache/lucene/analysis/core/
classification/src/java/org/apache/lucene/classification/utils/
core/src/java/org/apache/lucene/codecs/blocktree/ core/src...
Author: mikemccand
Date: Tue Oct 21 08:45:44 2014
New Revision: 1633314
URL: http://svn.apache.org/r1633314
Log:
LUCENE-6005: work in progress
Added:
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java (with props)
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/CoreKeywordTokenizer.java (with props)
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (with props)
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (with props)
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/StringTokenStream.java (with props)
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java (with props)
Modified:
lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Field.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/StorableField.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDocsAndPositions.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestOmitNorms.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/document/LazyDocument.java
Modified: lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1633314&r1=1633313&r2=1633314&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Tue Oct 21 08:45:44 2014
@@ -34,7 +34,7 @@ public final class KeywordTokenizer exte
private boolean done = false;
private int finalOffset;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public KeywordTokenizer() {
this(DEFAULT_BUFFER_SIZE);
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java?rev=1633314&r1=1633313&r2=1633314&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java Tue Oct 21 08:45:44 2014
@@ -97,9 +97,7 @@ public class DatasetSplitter {
}
} else {
for (StorableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
- if (storableField.readerValue() != null) {
- doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
- } else if (storableField.binaryValue() != null) {
+ if (storableField.binaryValue() != null) {
doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
} else if (storableField.stringValue() != null) {
doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1633314&r1=1633313&r2=1633314&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Tue Oct 21 08:45:44 2014
@@ -293,18 +293,7 @@ public final class BlockTreeTermsWriter
int maxItemsInBlock)
throws IOException
{
- if (minItemsInBlock <= 1) {
- throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
- }
- if (maxItemsInBlock <= 0) {
- throw new IllegalArgumentException("maxItemsInBlock must be >= 1; got " + maxItemsInBlock);
- }
- if (minItemsInBlock > maxItemsInBlock) {
- throw new IllegalArgumentException("maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
- }
- if (2*(minItemsInBlock-1) > maxItemsInBlock) {
- throw new IllegalArgumentException("maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
- }
+ validateSettings(minItemsInBlock, maxItemsInBlock);
maxDoc = state.segmentInfo.getDocCount();
@@ -359,6 +348,23 @@ public final class BlockTreeTermsWriter
indexOut.writeLong(dirStart);
}
+ // nocommit refactor the other forks to share this:
+
+ /** Throws {@code IllegalArgumentException} if any of these settings
+ * is invalid. */
+ public static void validateSettings(int minItemsInBlock,
+ int maxItemsInBlock) {
+ if (minItemsInBlock <= 1) {
+ throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
+ }
+ if (minItemsInBlock > maxItemsInBlock) {
+ throw new IllegalArgumentException("maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
+ }
+ if (2*(minItemsInBlock-1) > maxItemsInBlock) {
+ throw new IllegalArgumentException("maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock);
+ }
+ }
+
@Override
public void write(Fields fields) throws IOException {
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java?rev=1633314&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java Tue Oct 21 08:45:44 2014
@@ -0,0 +1,86 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+
+final class BinaryTokenStream extends TokenStream {
+ private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+ private boolean available = true;
+
+ public BinaryTokenStream() {
+ }
+
+ public void setValue(BytesRef value) {
+ bytesAtt.setBytesRef(value);
+ }
+
+ @Override
+ public boolean incrementToken() {
+ if (available) {
+ clearAttributes();
+ available = false;
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void reset() {
+ available = true;
+ }
+
+ public interface ByteTermAttribute extends TermToBytesRefAttribute {
+ public void setBytesRef(BytesRef bytes);
+ }
+
+ public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
+ private BytesRef bytes;
+
+ @Override
+ public void fillBytesRef() {
+ // no-op: the bytes was already filled by our owner's incrementToken
+ }
+
+ @Override
+ public BytesRef getBytesRef() {
+ return bytes;
+ }
+
+ @Override
+ public void setBytesRef(BytesRef bytes) {
+ this.bytes = bytes;
+ }
+
+ @Override
+ public void clear() {
+ // nocommit must null bytes here, and reset should re-instate it
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+ other.bytes = bytes;
+ }
+ }
+}
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/CoreKeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/CoreKeywordTokenizer.java?rev=1633314&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/CoreKeywordTokenizer.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/CoreKeywordTokenizer.java Tue Oct 21 08:45:44 2014
@@ -0,0 +1,81 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+// nocommit ... this is fork of KeywordTokenizer ... what to do ... must Document2 live outside Lucene core ...
+final class CoreKeywordTokenizer extends Tokenizer {
+ /** Default read buffer size */
+ public static final int DEFAULT_BUFFER_SIZE = 256;
+
+ private boolean done = false;
+ private int finalOffset;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ public CoreKeywordTokenizer() {
+ this(DEFAULT_BUFFER_SIZE);
+ }
+
+ CoreKeywordTokenizer(int bufferSize) {
+ if (bufferSize <= 0) {
+ throw new IllegalArgumentException("bufferSize must be > 0");
+ }
+ termAtt.resizeBuffer(bufferSize);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (!done) {
+ clearAttributes();
+ done = true;
+ int upto = 0;
+ char[] buffer = termAtt.buffer();
+ while (true) {
+ final int length = input.read(buffer, upto, buffer.length-upto);
+ if (length == -1) break;
+ upto += length;
+ if (upto == buffer.length)
+ buffer = termAtt.resizeBuffer(1+buffer.length);
+ }
+ termAtt.setLength(upto);
+ finalOffset = correctOffset(upto);
+ offsetAtt.setOffset(correctOffset(0), finalOffset);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public final void end() throws IOException {
+ super.end();
+ // set final offset
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ this.done = false;
+ }
+}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java?rev=1633314&r1=1633313&r2=1633314&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document.java Tue Oct 21 08:45:44 2014
@@ -66,9 +66,6 @@ public final class Document implements I
if (newField.fieldsData == null) {
newField.fieldsData = field.binaryValue();
}
- if (newField.fieldsData == null) {
- newField.fieldsData = field.readerValue();
- }
add(newField);
}
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java?rev=1633314&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java Tue Oct 21 08:45:44 2014
@@ -0,0 +1,389 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.FieldTypes.FieldType;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.IndexDocument;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.StorableField;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FilterIterator;
+
+/** A simpler API for building a document for indexing,
+ * that also tracks field properties implied by the
+ * fields being added. */
+
+public class Document2 implements IndexDocument {
+
+ private static final float DEFAULT_BOOST = 1.0f;
+
+ private final FieldTypes fieldTypes;
+ private final List<FieldValue> fields = new ArrayList<>();
+
+ private class FieldValue implements IndexableField, StorableField {
+ final String fieldName;
+ final Object value;
+ final float boost;
+ final FieldType fieldType;
+
+ public FieldValue(String name, Object value) {
+ this(name, value, DEFAULT_BOOST);
+ }
+
+ public FieldValue(String fieldName, Object value, float boost) {
+ this.fieldName = fieldName;
+ this.value = value;
+ this.boost = boost;
+ this.fieldType = fieldTypes.getFieldType(fieldName);
+ }
+
+ @Override
+ public String name() {
+ return fieldName;
+ }
+
+ @Override
+ public IndexableFieldType fieldType() {
+ return fieldType;
+ }
+
+ @Override
+ public float boost() {
+ return boost;
+ }
+
+ @Override
+ public TokenStream tokenStream(Analyzer analyzerIn, TokenStream reuse) throws IOException {
+ Analyzer analyzer = fieldTypes.getAnalyzer();
+ if (analyzerIn != analyzer) {
+ // TODO: remove analyzer from IW APIs
+ throw new IllegalArgumentException("analyzer must be the instance from FieldTypes");
+ }
+
+ FieldTypes.FieldType fieldType = fieldTypes.getFieldType(fieldName);
+ switch (fieldType.valueType) {
+ case INT:
+ case FLOAT:
+ case LONG:
+ case DOUBLE:
+ NumericTokenStream nts;
+ if (reuse != null) {
+ if (reuse instanceof NumericTokenStream == false) {
+ FieldTypes.illegalState(fieldName, "should have had NumericTokenStream for reuse, but got " + reuse);
+ }
+ nts = (NumericTokenStream) reuse;
+ if (fieldType.numericPrecisionStep == null || nts.getPrecisionStep() != fieldType.numericPrecisionStep.intValue()) {
+ FieldTypes.illegalState(fieldName, "reused NumericTokenStream has precisionStep " + nts.getPrecisionStep() + ", which is different from FieldType's " + fieldType.numericPrecisionStep);
+ }
+ } else {
+ nts = new NumericTokenStream(fieldType.numericPrecisionStep);
+ }
+ // initialize value in TokenStream
+ final Number number = (Number) value;
+ switch (fieldType.valueType) {
+ case INT:
+ nts.setIntValue(number.intValue());
+ break;
+ case LONG:
+ nts.setLongValue(number.longValue());
+ break;
+ case FLOAT:
+ nts.setFloatValue(number.floatValue());
+ break;
+ case DOUBLE:
+ nts.setDoubleValue(number.doubleValue());
+ break;
+ default:
+ throw new AssertionError("Should never get here");
+ }
+ return nts;
+
+ case ATOM:
+ if (value instanceof String) {
+ StringTokenStream sts;
+ if (reuse != null) {
+ if (reuse instanceof StringTokenStream == false) {
+ FieldTypes.illegalState(fieldName, "should have had StringTokenStream for reuse, but got " + reuse);
+ }
+ sts = (StringTokenStream) reuse;
+ } else {
+ sts = new StringTokenStream();
+ }
+ sts.setValue((String) value);
+ return sts;
+ } else {
+ assert value instanceof byte[];
+ BinaryTokenStream bts;
+ if (reuse != null) {
+ if (reuse instanceof BinaryTokenStream == false) {
+ FieldTypes.illegalState(fieldName, "should have had BinaryTokenStream for reuse, but got " + reuse);
+ }
+ bts = (BinaryTokenStream) reuse;
+ } else {
+ bts = new BinaryTokenStream();
+ }
+ bts.setValue(new BytesRef((byte[]) value));
+ return bts;
+ }
+
+ case BINARY:
+ assert value instanceof byte[];
+ BinaryTokenStream bts;
+ if (reuse != null) {
+ if (reuse instanceof BinaryTokenStream == false) {
+ FieldTypes.illegalState(fieldName, "should have had BinaryTokenStream for reuse, but got " + reuse);
+ }
+ bts = (BinaryTokenStream) reuse;
+ } else {
+ bts = new BinaryTokenStream();
+ }
+ bts.setValue(new BytesRef((byte[]) value));
+ return bts;
+
+ case SHORT_TEXT:
+ case TEXT:
+ if (value instanceof TokenStream) {
+ return (TokenStream) value;
+ } else if (value instanceof Reader) {
+ return analyzer.tokenStream(name(), (Reader) value);
+ } else {
+ return analyzer.tokenStream(name(), (String) value);
+ }
+
+ default:
+ FieldTypes.illegalState(fieldName, "valueType=" + fieldType.valueType + " cannot be indexed");
+
+ // Dead code but javac disagrees:
+ return null;
+ }
+ }
+
+ @Override
+ public Number numericValue() {
+ switch (fieldType.valueType) {
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ return (Number) value;
+ default:
+ return null;
+ }
+ }
+
+ @Override
+ public Number numericDocValue() {
+ switch (fieldType.valueType) {
+ case INT:
+ return (Number) value;
+ case LONG:
+ return (Number) value;
+ case FLOAT:
+ return Integer.valueOf(Float.floatToIntBits((Float) value));
+ case DOUBLE:
+ return Long.valueOf(Double.doubleToLongBits((Double) value));
+ default:
+ return null;
+ }
+ }
+
+ @Override
+ public String stringValue() {
+ switch (fieldType.valueType) {
+ case SHORT_TEXT:
+ case TEXT:
+ return (String) value;
+ case ATOM:
+ if (value instanceof String) {
+ return (String) value;
+ } else {
+ return null;
+ }
+ default:
+ return null;
+ }
+ }
+
+ @Override
+ public BytesRef binaryValue() {
+ if (value instanceof byte[]) {
+ return new BytesRef((byte[]) value);
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public BytesRef binaryDocValue() {
+ if (value instanceof byte[]) {
+ return new BytesRef((byte[]) value);
+ } else if (value instanceof String && (fieldType.docValuesType == DocValuesType.BINARY || fieldType.docValuesType == DocValuesType.SORTED || fieldType.docValuesType == DocValuesType.SORTED_SET)) {
+ // nocommit somewhat evil we utf8-encode your string?
+ return new BytesRef((String) value);
+ }
+
+ return null;
+ }
+ }
+
+ public Document2(FieldTypes fieldTypes) {
+ this.fieldTypes = fieldTypes;
+ }
+
+ @Override
+ public Iterable<IndexableField> indexableFields() {
+ return new Iterable<IndexableField>() {
+ @Override
+ public Iterator<IndexableField> iterator() {
+ return Document2.this.indexedFieldsIterator();
+ }
+ };
+ }
+
+ @Override
+ public Iterable<StorableField> storableFields() {
+ return new Iterable<StorableField>() {
+ @Override
+ public Iterator<StorableField> iterator() {
+ return Document2.this.storedFieldsIterator();
+ }
+ };
+ }
+
+ private Iterator<StorableField> storedFieldsIterator() {
+ return new FilterIterator<StorableField,FieldValue>(fields.iterator()) {
+ @Override
+ protected boolean predicateFunction(FieldValue field) {
+ return field.fieldType.stored() || field.fieldType.docValueType() != null;
+ }
+ };
+ }
+
+ private Iterator<IndexableField> indexedFieldsIterator() {
+ return new FilterIterator<IndexableField,FieldValue>(fields.iterator()) {
+ @Override
+ protected boolean predicateFunction(FieldValue field) {
+ return field.fieldType.indexOptions() != null;
+ }
+ };
+ }
+
+ /** E.g. a "country" field. Default: indexes this value as a single token, and disables norms and freqs, and also enables sorting (indexes doc values) and stores it. */
+ public void addAtom(String fieldName, String value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.ATOM);
+ fields.add(new FieldValue(fieldName, value));
+ }
+
+ /** E.g. an "id" (primary key) field. Default: indexes this value as a single token, and disables norms and freqs. */
+ public void addAtom(String fieldName, byte[] value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.ATOM);
+ fields.add(new FieldValue(fieldName, value));
+ }
+
+ /** E.g. a "title" field. Default: indexes this value as multiple tokens from analyzer, and disables norms and freqs, and also enables
+ * sorting (indexes sorted doc values). */
+ public void addShortText(String fieldName, String value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.SHORT_TEXT);
+ fields.add(new FieldValue(fieldName, value));
+ }
+
+ /** Default: store this value. */
+ public void addStored(String fieldName, byte[] value) {
+ // nocommit akward we inferred binary here?
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.BINARY);
+ fields.add(new FieldValue(fieldName, value));
+ }
+
+ /** Default: store this value. */
+ public void addStored(String fieldName, String value) {
+ // nocommit akward we inferred large_text here?
+ fieldTypes.recordLargeTextType(fieldName, true);
+ fields.add(new FieldValue(fieldName, value));
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer and stores the value. */
+ public void addLargeText(String fieldName, String value) {
+ addLargeText(fieldName, value, DEFAULT_BOOST);
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer and stores the value. */
+ public void addLargeText(String fieldName, String value, float boost) {
+ fieldTypes.recordLargeTextType(fieldName, true);
+ fields.add(new FieldValue(fieldName, value, boost));
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer. */
+ public void addLargeText(String fieldName, TokenStream value) {
+ addLargeText(fieldName, value, DEFAULT_BOOST);
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer. */
+ public void addLargeText(String fieldName, TokenStream value, float boost) {
+ fieldTypes.recordLargeTextType(fieldName, false);
+ fields.add(new FieldValue(fieldName, value, boost));
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer. */
+ public void addLargeText(String fieldName, Reader reader) {
+ addLargeText(fieldName, reader, DEFAULT_BOOST);
+ }
+
+ /** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer. */
+ public void addLargeText(String fieldName, Reader value, float boost) {
+ fieldTypes.recordLargeTextType(fieldName, false);
+ fields.add(new FieldValue(fieldName, value, boost));
+ }
+
+ // addLongArray, addIntArray
+
+ // nocommit don't use overloadign here ... change to addLong, addFloat, etc.
+ /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addNumber(String fieldName, int value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.INT);
+ fields.add(new FieldValue(fieldName, Integer.valueOf(value)));
+ }
+
+ /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addNumber(String fieldName, float value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.FLOAT);
+ fields.add(new FieldValue(fieldName, Float.valueOf(value)));
+ }
+
+ /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addNumber(String fieldName, long value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.LONG);
+ fields.add(new FieldValue(fieldName, Long.valueOf(value)));
+ }
+
+ /** Default: support for range filtering/querying and sorting (using numeric doc values). */
+ public void addNumber(String fieldName, double value) {
+ fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.DOUBLE);
+ fields.add(new FieldValue(fieldName, Double.valueOf(value)));
+ }
+}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Field.java?rev=1633314&r1=1633313&r2=1633314&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Field.java Tue Oct 21 08:45:44 2014
@@ -253,7 +253,7 @@ public class Field implements IndexableF
/**
* The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
- * getBinaryValue() must be set.
+ * binaryValue() must be set.
*/
@Override
public String stringValue() {
@@ -267,9 +267,8 @@ public class Field implements IndexableF
/**
* The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
- * getBinaryValue() must be set.
+ * binaryValue() must be set.
*/
- @Override
public Reader readerValue() {
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
}
@@ -416,7 +415,7 @@ public class Field implements IndexableF
/**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
- * values from stringValue() or getBinaryValue()
+ * values from stringValue() or binaryValue()
*/
public void setTokenStream(TokenStream tokenStream) {
if (type.indexOptions() == null || !type.tokenized()) {
@@ -469,6 +468,15 @@ public class Field implements IndexableF
}
@Override
+ public Number numericDocValue() {
+ if (fieldsData instanceof Number) {
+ return (Number) fieldsData;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
public BytesRef binaryValue() {
if (fieldsData instanceof BytesRef) {
return (BytesRef) fieldsData;
@@ -476,6 +484,15 @@ public class Field implements IndexableF
return null;
}
}
+
+ @Override
+ public BytesRef binaryDocValue() {
+ if (fieldsData instanceof BytesRef) {
+ return (BytesRef) fieldsData;
+ } else {
+ return null;
+ }
+ }
/** Prints a Field for human consumption. */
@Override
@@ -497,6 +514,7 @@ public class Field implements IndexableF
/** Returns the {@link FieldType} for this field. */
@Override
public FieldType fieldType() {
+ // nocommit shouldn't we make sure type is frozen at this point?
return type;
}
@@ -559,54 +577,6 @@ public class Field implements IndexableF
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this);
}
-
- static final class StringTokenStream extends TokenStream {
- private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
- private boolean used = false;
- private String value = null;
-
- /** Creates a new TokenStream that returns a String as single token.
- * <p>Warning: Does not initialize the value, you must call
- * {@link #setValue(String)} afterwards!
- */
- StringTokenStream() {
- }
-
- /** Sets the string value. */
- void setValue(String value) {
- this.value = value;
- }
-
- @Override
- public boolean incrementToken() {
- if (used) {
- return false;
- }
- clearAttributes();
- termAttribute.append(value);
- offsetAttribute.setOffset(0, value.length());
- used = true;
- return true;
- }
-
- @Override
- public void end() throws IOException {
- super.end();
- final int finalOffset = value.length();
- offsetAttribute.setOffset(finalOffset, finalOffset);
- }
-
- @Override
- public void reset() {
- used = false;
- }
-
- @Override
- public void close() {
- value = null;
- }
- }
/** Specifies whether and how a field should be stored. */
public static enum Store {
Added: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1633314&view=auto
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (added)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Tue Oct 21 08:45:44 2014
@@ -0,0 +1,1498 @@
+package org.apache.lucene.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50Codec;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.NumericUtils;
+
+// TODO
+// - explore what it'd be like to add other higher level types?
+// - BigInt, BigDecimal, IPV6
+// - setters for posinc/offset gaps?
+// - can we remove analyzer from IW?
+// - what about sparse fields... anything for us to do...
+// - payloads just stay write once in their own way?
+// - how to handle old indices w/ no field types yet?
+// - can we simplify how "low level schema" (FieldInfo) merges itself?
+// - only if this new API is the only way to add docs to IndexWriter ... hmm
+// - CheckIndex could optionally do more validation
+// - e.g. that the terms in a numeric field are correct (lead w/ prefix, 7 bit clean)
+// - or that terms in an ipv6 field always have 16 bytes
+// - add query time integration
+// - query parsers
+// - exc if invalid field name asked for
+// - numeric range queries "just work"
+// - creating SortField
+// - creating queries, catching invalid field names, no positions indexed, etc.
+// - SortField should verify FieldTypes.sortable is set for that field
+// - prox queries can verify field was indexed w/ positions
+// - normal queries can verify field was even indexed
+// - move analyzer out of IW/IWC into Field/FieldType/s only?
+// - we could go back to allowing pulling the Document from a reader, updating, re-indexing? e.g. we can know all fields were stored, and
+// throw exc if not
+// - get save/load working
+// - why does STS fill offset...
+// - no more field reuse right?
+
+// Lucene's secret schemas
+// FieldInfos
+// SortField.type
+// DocValuesType
+// subclassing QueryParsers
+// PerFieldPF/DVF
+// PerFieldSimilarityWrapper
+// PerFieldAnalyzerWrapper
+// oal.document
+
+// nocommit make ValueType public? add setter so you can set that too?
+
+// language for the field? (to default collator)
+
+// nocommit sort order, sort options (e.g. for collation)
+// case sensitive, punc sensitive, accent sensitive
+// can we fold in ICUCollationDocValuesField somehow...
+
+// nocommit need index vs search time analysis?
+
+// nocommit suggesters
+
+// nocommit how to change block tree's block settings?
+
+// nocommit index-time sorting should be here too
+
+// nocommit sort by languages
+
+// nocommit can we require use of analyzer factories?
+
+// nocommit what schema options does solr offer
+
+// nocommit accent removal and lowercasing for wildcards should just work
+
+// separate analyzer for phrase queries in suggesters
+
+// go through the process of adding a "new high schema type"
+
+// nocommit Index class? enforcing unique id, xlog?
+
+// nocommit how to randomize IWC? RIW?
+
+// nocommit add .getSort method
+
+// nocommit add .getXXXQuery? method
+
+// nocommit maybe we need change IW's setCommitData API to be "add/remove key/value from commit data"?
+
+// nocommit just persist as FieldInfos? but that's per-segment, and ... it enforces the low-level constraints?
+
+// nocommit unique/primary key ?
+
+// nocommit must document / make sugar for creating IndexSearcher w/ sim from this class
+
+// nocommit fix all change methods to call validate / rollback
+
+// nocommit boolean, float16
+
+// nocommit can we move multi-field-ness out of IW? so IW only gets a single instance of each field
+
+/** Records how each field is indexed, stored, etc. This class persists
+ * its state using {@link IndexWriter#setCommitData}, using the
+ * {@link FieldTypes#FIELD_PROPERTIES_KEY} key. */
+
+public class FieldTypes {
+
+ /** Key used to store the field types inside {@link IndexWriter#setCommitData}. */
+ public static final String FIELD_PROPERTIES_KEY = "field_properties";
+
+ // nocommit reduce to just number, text, atom?
+ enum ValueType {
+ TEXT,
+ SHORT_TEXT,
+ ATOM,
+ INT,
+ FLOAT,
+ LONG,
+ DOUBLE,
+ BINARY,
+ // nocommit primary_key?
+ }
+
+ private final IndexReader reader;
+
+ // nocommit: messy: this is non-final because of circular dependency issues...
+ private IndexWriter writer;
+
+ private final Map<String,FieldType> fields = new HashMap<>();
+
+ private final Analyzer defaultAnalyzer;
+ private final Similarity defaultSimilarity;
+
+ /** Just like current oal.document.FieldType, except for each setting it can also record "not-yet-set". */
+ static class FieldType implements IndexableFieldType {
+ private final String name;
+
+ public FieldType(String name) {
+ this.name = name;
+ }
+
+ // nocommit volatile for all these instance vars:
+ volatile ValueType valueType;
+ volatile DocValuesType docValuesType;
+
+ // Expert: settings we pass to BlockTree to control how many terms are allowed in each block.
+ volatile Integer blockTreeMinItemsInBlock;
+ volatile Integer blockTreeMaxItemsInBlock;
+
+ volatile Integer numericPrecisionStep;
+
+ // Whether this field's values are stored, or null if it's not yet set:
+ private volatile Boolean stored;
+
+ // Whether this field's values should be indexed as doc values for sorting:
+ private volatile Boolean sortable;
+
+ // Whether this field may appear more than once per document:
+ private volatile Boolean multiValued;
+
+ // Whether this field's norms are indexed:
+ private volatile Boolean indexNorms;
+
+ private volatile Boolean storeTermVectors;
+ private volatile Boolean storeTermVectorPositions;
+ private volatile Boolean storeTermVectorOffsets;
+ private volatile Boolean storeTermVectorPayloads;
+
+ // Field is indexed if this != null:
+ private volatile IndexOptions indexOptions;
+
+ // nocommit: not great that we can't also set other formats... but we need per-field wrappers to do this, or we need to move
+ // "per-field-ness" of these formats into here, or something:
+ private volatile String postingsFormat;
+ private volatile String docValuesFormat;
+
+ // NOTE: not persisted, because we don't have API for persisting any analyzer :(
+ private volatile Analyzer analyzer;
+ private volatile Similarity similarity;
+
+ private boolean validate() {
+ if (valueType != null) {
+ switch (valueType) {
+ case INT:
+ case FLOAT:
+ case LONG:
+ case DOUBLE:
+ if (analyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an analyzer");
+ }
+ if (docValuesType != null && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
+ illegalState(name, "type " + valueType + " must use NUMERIC docValuesType (got: " + docValuesType + ")");
+ }
+ if (indexOptions != null && indexOptions.compareTo(IndexOptions.DOCS_ONLY) > 0) {
+ illegalState(name, "type " + valueType + " cannot use indexOptions > DOCS_ONLY (got indexOptions " + indexOptions + ")");
+ }
+ break;
+ case TEXT:
+ if (sortable == Boolean.TRUE) {
+ illegalState(name, "type " + valueType + " cannot sort");
+ }
+ if (docValuesType != null) {
+ illegalState(name, "type " + valueType + " cannot use docValuesType " + docValuesType);
+ }
+ break;
+ case SHORT_TEXT:
+ if (docValuesType != null && docValuesType != DocValuesType.BINARY && docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET) {
+ illegalState(name, "type " + valueType + " cannot use docValuesType " + docValuesType);
+ }
+ break;
+ case BINARY:
+ if (analyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an analyzer");
+ }
+ if (docValuesType != null && docValuesType != DocValuesType.BINARY && docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET) {
+ illegalState(name, "type " + valueType + " must use BINARY docValuesType (got: " + docValuesType + ")");
+ }
+ break;
+ case ATOM:
+ if (analyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an analyzer");
+ }
+ // nocommit make sure norms are disabled?
+ if (indexOptions != null && indexOptions.compareTo(IndexOptions.DOCS_ONLY) > 0) {
+ // nocommit too anal?
+ illegalState(name, "type " + valueType + " can only be indexed as DOCS_ONLY; got " + indexOptions);
+ }
+ break;
+ default:
+ throw new AssertionError("missing value type in switch");
+ }
+ // nocommit more checks
+ }
+
+ if (multiValued == Boolean.TRUE &&
+ (docValuesType == DocValuesType.NUMERIC ||
+ docValuesType == DocValuesType.SORTED ||
+ docValuesType == DocValuesType.BINARY)) {
+ illegalState(name, "DocValuesType=" + docValuesType + " cannot be multi-valued");
+ }
+
+ if (indexOptions == null && blockTreeMinItemsInBlock != null) {
+ illegalState(name, "can only setTermsDictBlockSize if the field is indexed");
+ }
+
+ if (postingsFormat != null && blockTreeMinItemsInBlock != null) {
+ illegalState(name, "cannot use both setTermsDictBlockSize and setPostingsFormat");
+ }
+
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ b.append("field \"");
+ b.append(name);
+ b.append("\":\n");
+ b.append(" valueType: ");
+ if (valueType != null) {
+ b.append(valueType);
+ } else {
+ b.append("unset");
+ }
+ b.append('\n');
+
+ if (valueType == ValueType.INT ||
+ valueType == ValueType.FLOAT ||
+ valueType == ValueType.LONG ||
+ valueType == ValueType.DOUBLE) {
+ b.append(" numericPrecisionsStep: ");
+ if (numericPrecisionStep == null) {
+ b.append(numericPrecisionStep);
+ } else {
+ b.append(" unset");
+ }
+ b.append('\n');
+ }
+
+ b.append(" stored: ");
+ if (stored != null) {
+ b.append(stored);
+ } else {
+ b.append("unset");
+ }
+ b.append('\n');
+
+ b.append(" sortable: ");
+ if (sortable != null) {
+ b.append(sortable);
+ } else {
+ b.append("unset");
+ }
+ b.append('\n');
+
+ b.append(" docValuesType: ");
+ if (docValuesType != null) {
+ b.append(docValuesType);
+ } else {
+ b.append("unset");
+ }
+ b.append('\n');
+
+ b.append(" indexOptions: ");
+ if (indexOptions != null) {
+ b.append(indexOptions);
+ } else {
+ b.append("unset");
+ }
+ b.append('\n');
+
+ return b.toString();
+ }
+
+ @Override
+ public boolean stored() {
+ return stored == Boolean.TRUE;
+ }
+
+ @Override
+ public boolean storeTermVectors() {
+ return storeTermVectors == Boolean.TRUE;
+ }
+
+ @Override
+ public boolean storeTermVectorOffsets() {
+ return storeTermVectorOffsets == Boolean.TRUE;
+ }
+
+ @Override
+ public boolean storeTermVectorPositions() {
+ return storeTermVectorPositions == Boolean.TRUE;
+ }
+
+ @Override
+ public boolean storeTermVectorPayloads() {
+ return storeTermVectorPayloads == Boolean.TRUE;
+ }
+
+ @Override
+ public boolean omitNorms() {
+ return indexNorms == null || indexNorms.booleanValue() == false;
+ }
+
+ @Override
+ public IndexOptions indexOptions() {
+ return indexOptions;
+ }
+
+ @Override
+ public DocValuesType docValueType() {
+ return docValuesType;
+ }
+ }
+
+ /** Create a new index-time (writable) instance using the specified default analyzer, and {@link IndexSearcher#getDefaultSimilarity}
+ * similarity. Note that you must call {@link #setIndexWriter} before changing any types. */
+ public FieldTypes(Analyzer defaultAnalyzer) {
+ this(defaultAnalyzer, IndexSearcher.getDefaultSimilarity());
+ }
+
+ /** Create a new index-time (writable) instance using the specified default analyzer and similarity. Note that you must call {@link
+ * #setIndexWriter} before changing any types. */
+ public FieldTypes(Analyzer defaultAnalyzer, Similarity defaultSimilarity) {
+ this.reader = null;
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.defaultSimilarity = defaultSimilarity;
+ }
+
+ /** Create a new search-time (read-only) instance using the specified default analyzer, and {@link IndexSearcher#getDefaultSimilarity}
+ * similarity. */
+ public FieldTypes(DirectoryReader reader, Analyzer defaultAnalyzer) throws IOException {
+ this(reader, defaultAnalyzer, IndexSearcher.getDefaultSimilarity());
+ }
+
+ /** Create a new search-time (read-only) instance using the specified default analyzer. */
+ public FieldTypes(DirectoryReader reader, Analyzer defaultAnalyzer, Similarity defaultSimilarity) throws IOException {
+ this.reader = reader;
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.defaultSimilarity = defaultSimilarity;
+ loadFields(reader.getIndexCommit().getUserData());
+ }
+
+ public synchronized void setIndexWriter(IndexWriter writer) throws IOException {
+ if (this.writer == null) {
+ if (this.reader == null) {
+ this.writer = writer;
+ loadFields(writer.getCommitData());
+ } else {
+ throw new IllegalStateException("this FieldProperies is read-only (has an IndexReader already)");
+ }
+ } else {
+ throw new IllegalStateException("setIndexWriter was already called");
+ }
+ }
+
+ private void loadFields(Map<String,String> commitUserData) {
+ // nocommit must deserialize current fields from commit data
+ }
+
+ public synchronized void setPostingsFormat(String fieldName, String postingsFormat) {
+ // Will throw exception if this postingsFormat is unrecognized:
+ PostingsFormat.forName(postingsFormat);
+
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.postingsFormat = postingsFormat;
+ fields.put(fieldName, current);
+ changed();
+ } else {
+ current.postingsFormat = postingsFormat;
+ changed();
+ }
+ }
+
+ synchronized FieldType getFieldType(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current;
+ }
+
+ public synchronized String getPostingsFormat(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.postingsFormat;
+ }
+
+ public synchronized void setDocValuesFormat(String fieldName, String docValuesFormat) {
+ // Will throw exception if this docValuesFormat is unrecognized:
+ DocValuesFormat.forName(docValuesFormat);
+
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.docValuesFormat = docValuesFormat;
+ fields.put(fieldName, current);
+ changed();
+ } else {
+ current.docValuesFormat = docValuesFormat;
+ changed();
+ }
+ }
+
+ synchronized FieldType getField(String fieldName) {
+ return fields.get(fieldName);
+ }
+
+ // nocommit sugar to getIndexWriter? and it takes care of linking together this & IW?
+
+ // nocommit but how can we randomized IWC for tests?
+
+ /** Returns a new default {@link IndexWriterConfig}, with {@link Analyzer}, {@link Similarity} and {@link Codec}) pre-set. */
+ public IndexWriterConfig getDefaultIndexWriterConfig() {
+ IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
+ iwc.setSimilarity(similarity);
+ iwc.setCodec(codec);
+
+ return iwc;
+ }
+
+ private final Similarity similarity = new PerFieldSimilarityWrapper() {
+ @Override
+ public Similarity get(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field.similarity != null) {
+ return field.similarity;
+ } else {
+ return FieldTypes.this.defaultSimilarity;
+ }
+ }
+ };
+
+ // nocommit but how can we randomized Codec in tests?
+ private final Codec codec = new Lucene50Codec() {
+ // nocommit: too bad we can't just set every format here? what if we fix this schema to record which format per field, and then
+ // remove PerFieldXXXFormat...?
+ @Override
+ public PostingsFormat getPostingsFormatForField(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field != null) {
+ if (field.postingsFormat != null) {
+ return PostingsFormat.forName(field.postingsFormat);
+ } else if (field.blockTreeMinItemsInBlock != null) {
+ assert field.blockTreeMaxItemsInBlock != null;
+ // nocommit do we now have cleaner API for this? Ie "get me default PF, changing these settings"...
+ return new Lucene41PostingsFormat(field.blockTreeMinItemsInBlock.intValue(),
+ field.blockTreeMaxItemsInBlock.intValue());
+ }
+ }
+ return super.getPostingsFormatForField(fieldName);
+ }
+
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field != null && field.docValuesFormat != null) {
+ return DocValuesFormat.forName(field.docValuesFormat);
+ }
+ return super.getDocValuesFormatForField(fieldName);
+ }
+ };
+
+ private static final Analyzer KEYWORD_ANALYZER = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(final String fieldName) {
+ return new TokenStreamComponents(new CoreKeywordTokenizer());
+ }
+ };
+
+ private final Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field.analyzer != null) {
+ return field.analyzer;
+ } else if (field.valueType == ValueType.ATOM) {
+ // nocommit need test showing that if you index an ATOM and search field:"XXX YYY" with that atom, it works
+ return KEYWORD_ANALYZER;
+ }
+ return FieldTypes.this.defaultAnalyzer;
+ }
+
+ // nocommit what about wrapReader?
+ };
+
+ /** Returns {@link Similarity} that returns the per-field Similarity. */
+ public Similarity getSimilarity() {
+ return similarity;
+ }
+
+ /** Returns {@link Codec} that returns the per-field formats. */
+ public Codec getCodec() {
+ return codec;
+ }
+
+ /** Returns {@link Analyzer} that returns the per-field analyzer. */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /** NOTE: analyzer does not persist, so each time you create {@code FieldTypes} from
+ * {@linkIndexWriter} or {@link IndexReader} you must set all per-field analyzers again. */
+ public synchronized void setAnalyzer(String fieldName, Analyzer analyzer) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.analyzer = analyzer;
+ fields.put(fieldName, current);
+ changed();
+ } else {
+ current.analyzer = analyzer;
+ changed();
+ }
+ }
+
+ public synchronized Analyzer getAnalyzer(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.analyzer;
+ }
+
+ /** NOTE: similarity does not persist, so each time you create {@code FieldTypes} from
+ * {@linkIndexWriter} or {@link IndexReader} you must set all per-field similarities again. */
+ public synchronized void setSimilarity(String fieldName, Similarity similarity) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.similarity = similarity;
+ fields.put(fieldName, current);
+ changed();
+ } else {
+ // nocommit should we not allow this...
+ current.similarity = similarity;
+ changed();
+ }
+ }
+
+ public synchronized Similarity getSimilarity(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.similarity;
+ }
+
+ /** Notes that this field may have more than one value per document. */
+ public synchronized void setMultiValued(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.multiValued = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.multiValued == null) {
+ boolean success = false;
+ try {
+ current.multiValued = Boolean.TRUE;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.multiValued = null;
+ }
+ }
+ changed();
+ } else if (current.multiValued == Boolean.FALSE) {
+ illegalState(fieldName, "multiValued was already set to False");
+ }
+ }
+
+ /** Returns true if this field may have more than one value per document. */
+ public synchronized boolean getMultiValued(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.multiValued == Boolean.TRUE;
+ }
+
+ /** Sets the minimum number of terms in each term block in the terms dictionary. These can be changed at any time, but changes only take
+ * effect for newly written (flushed or merged) segments. The default is 25; higher values make fewer, larger blocks, which require less
+ * heap in the IndexReader but slows down term lookups. */
+ public synchronized void setTermsDictBlockSize(String fieldName, int minItemsPerBlock) {
+ setTermsDictBlockSize(fieldName, minItemsPerBlock, 2*(minItemsPerBlock-1));
+ }
+
+ /** Sets the minimum and maximum number of terms in each term block in the terms dictionary. These can be changed at any time, but changes only take
+ * effect for newly written (flushed or merged) segments. The default is 25 and 48; higher values make fewer, larger blocks, which require less
+ * heap in the IndexReader but slows down term lookups. */
+ public synchronized void setTermsDictBlockSize(String fieldName, int minItemsPerBlock, int maxItemsPerBlock) {
+ ensureWritable();
+
+ try {
+ BlockTreeTermsWriter.validateSettings(minItemsPerBlock, maxItemsPerBlock);
+ } catch (IllegalArgumentException iae) {
+ illegalState(fieldName, iae.getMessage());
+ }
+
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.blockTreeMinItemsInBlock = minItemsPerBlock;
+ current.blockTreeMaxItemsInBlock = maxItemsPerBlock;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.blockTreeMinItemsInBlock == null) {
+ boolean success = false;
+ try {
+ current.blockTreeMinItemsInBlock = minItemsPerBlock;
+ current.blockTreeMaxItemsInBlock = maxItemsPerBlock;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.blockTreeMinItemsInBlock = null;
+ current.blockTreeMaxItemsInBlock = null;
+ }
+ }
+ changed();
+ } else {
+ current.blockTreeMinItemsInBlock = minItemsPerBlock;
+ current.blockTreeMaxItemsInBlock = maxItemsPerBlock;
+ assert current.validate();
+ }
+ }
+
+ /** Enables sorting for this field, using doc values of the appropriate type. */
+ // nocommit either rename this, or rename enableStored, or both (they are the same letters just shuffled!)
+ public synchronized void enableSorted(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.sortable = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.sortable == null) {
+ boolean success = false;
+ try {
+ current.sortable = Boolean.TRUE;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.sortable = null;
+ }
+ }
+ changed();
+ } else if (current.sortable == Boolean.FALSE) {
+ illegalState(fieldName, "sorting was already disabled");
+ }
+ }
+
+ /** Disables sorting for this field. */
+ public synchronized void disableSorted(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.sortable = Boolean.FALSE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.sortable != Boolean.FALSE) {
+ // nocommit ok to allow this?
+ // nocommit should we validate?
+ current.sortable = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public synchronized boolean getSorted(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.sortable == Boolean.TRUE;
+ }
+
+ // nocommit too ambitious?
+ public synchronized void enableHighlighted(String fieldName) {
+ }
+
+ // nocommit too ambitious?
+ public synchronized void disableHighlighted(String fieldName) {
+ }
+
+ // nocommit too ambitious?
+ public synchronized boolean getHighlighted(String fieldName) {
+ return false;
+ }
+
+ /** Enables norms for this field. This is only allowed if norms were not already disabled. */
+ public synchronized void enableNorms(String fieldName) {
+ // throws exc if norms were already disabled
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.indexNorms = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.indexNorms == null) {
+ boolean success = false;
+ try {
+ current.indexNorms = Boolean.TRUE;
+ success = true;
+ } finally {
+ if (success == false) {
+ current.indexNorms = null;
+ }
+ }
+ changed();
+ } else if (current.indexNorms == Boolean.FALSE) {
+ illegalState(fieldName, "cannot enable norms that were already disable");
+ }
+ }
+
+ /** Disable norms for this field. */
+ public synchronized void disableNorms(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.indexNorms = Boolean.FALSE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.indexNorms != Boolean.FALSE) {
+ current.indexNorms = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public synchronized boolean getNorms(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.indexNorms == Boolean.TRUE;
+ }
+
+ /** Store values for this field. This can be changed at any time. */
+ public void enableStored(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.stored = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.stored != Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.stored = Boolean.TRUE;
+ changed();
+ }
+ }
+
+ /** Do not store values for this field. This can be changed at any time. */
+ public synchronized void disableStored(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.stored = Boolean.FALSE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.stored == null || current.stored == Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.stored = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ /** Whether this field's value is stored. */
+ public synchronized boolean getStored(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.stored == Boolean.TRUE;
+ }
+
+ // nocommit iterator over all fields / types?
+
+ public synchronized void setNumericPrecisionStep(String fieldName, int precStep) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ fields.put(fieldName, current);
+ current.numericPrecisionStep = precStep;
+ changed();
+ } else if (current.numericPrecisionStep == null) {
+ current.numericPrecisionStep = precStep;
+ changed();
+ } else if (current.numericPrecisionStep.intValue() != precStep) {
+ illegalState(fieldName, "cannot change numericPrecisionStep from " + current.numericPrecisionStep + " to " + precStep);
+ }
+ }
+
+ /** @throws IllegalStateException if this field is unknown, or is not a numeric field. */
+ public synchronized int getNumericPrecisionStep(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.numericPrecisionStep == null) {
+ illegalState(fieldName, "no numericPrecisionStep is set");
+ }
+ return current.numericPrecisionStep;
+ }
+
+ // nocommit should we make a single method to enable the different combinations...?
+ public synchronized void enableTermVectors(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.storeTermVectors = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.storeTermVectors != Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectors = Boolean.TRUE;
+ changed();
+ }
+ }
+
+ public synchronized void disableTermVectors(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.storeTermVectors = Boolean.FALSE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.storeTermVectors != Boolean.FALSE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectors = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public synchronized boolean getTermVectors(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.storeTermVectors == Boolean.TRUE;
+ }
+
+ public void enableTermVectorOffsets(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectors != Boolean.TRUE) {
+ illegalState(fieldName, "cannot enable termVectorOffsets when termVectors haven't been enabled");
+ }
+ if (current.storeTermVectorOffsets != Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorOffsets = Boolean.TRUE;
+ changed();
+ }
+ }
+
+ public void disableTermVectorOffsets(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectorOffsets == Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorOffsets = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public boolean getTermVectorOffsets(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.storeTermVectorOffsets == Boolean.TRUE;
+ }
+
+ public void enableTermVectorPositions(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectors != Boolean.TRUE) {
+ illegalState(fieldName, "cannot enable termVectorPositions when termVectors haven't been enabled");
+ }
+ if (current.storeTermVectorPositions != Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorPositions = Boolean.TRUE;
+ changed();
+ }
+ }
+
+ public void disableTermVectorPositions(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectorPositions == Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorPositions = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public boolean getTermVectorPositions(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.storeTermVectorPositions == Boolean.TRUE;
+ }
+
+ public void enableTermVectorPayloads(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectors != Boolean.TRUE) {
+ illegalState(fieldName, "cannot enable termVectorPayloads when termVectors haven't been enabled");
+ }
+ if (current.storeTermVectorPayloads != Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorPayloads = Boolean.TRUE;
+ changed();
+ }
+ }
+
+ public void disableTermVectorPayloads(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ if (current.storeTermVectorPayloads == Boolean.TRUE) {
+ // nocommit should this change not be allowed...
+ current.storeTermVectorPayloads = Boolean.FALSE;
+ changed();
+ }
+ }
+
+ public boolean getTermVectorPayloads(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.storeTermVectorPayloads == Boolean.TRUE;
+ }
+
+ /** Changes index options for this field. This can be set to any
+ * value if it's not already set for the provided field; otherwise
+ * it can only be downgraded as low as DOCS_ONLY but never unset
+ * entirely (once indexed, always indexed). */
+ public void setIndexOptions(String fieldName, IndexOptions indexOptions) {
+ ensureWritable();
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.indexOptions = indexOptions;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.indexOptions == null) {
+ boolean success = false;
+ try {
+ current.indexOptions = indexOptions;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.indexOptions = null;
+ }
+ }
+ changed();
+ } else if (current.indexOptions != indexOptions) {
+ // Only allow downgrading IndexOptions:
+ if (current.indexOptions.compareTo(indexOptions) < 0) {
+ illegalState(fieldName, "cannot upgrade indexOptions from " + current.indexOptions + " to " + indexOptions);
+ }
+ current.indexOptions = indexOptions;
+ changed();
+ }
+ }
+
+ public IndexOptions getIndexOptions(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ return null;
+ } else {
+ return current.indexOptions;
+ }
+ }
+
+ public synchronized void setDocValuesType(String fieldName, DocValuesType dvType) {
+ ensureWritable();
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.docValuesType = dvType;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.docValuesType == null) {
+ boolean success = false;
+ current.docValuesType = dvType;
+ try {
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.docValuesType = null;
+ }
+ }
+ changed();
+ } else if (current.docValuesType != dvType) {
+ illegalState(fieldName, "cannot change from docValuesType " + current.docValuesType + " to docValutesType " + dvType);
+ }
+ }
+
+ public synchronized DocValuesType getDocValuesType(String fieldName, DocValuesType dvType) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ return null;
+ } else {
+ return current.docValuesType;
+ }
+ }
+
+ synchronized void recordValueType(String fieldName, ValueType valueType) {
+ ensureWritable();
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.valueType = valueType;
+ fields.put(fieldName, current);
+ setDefaults(current);
+ changed();
+ } else if (current.valueType == null) {
+ // This can happen if e.g. the app first calls FieldTypes.setStored(...)
+ boolean success = false;
+ try {
+ current.valueType = valueType;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.valueType = null;
+ }
+ }
+ setDefaults(current);
+ changed();
+ } else if (current.valueType != valueType) {
+ illegalState(fieldName, "cannot change from value type " + current.valueType + " to " + valueType);
+ }
+ }
+
+ synchronized void recordLargeTextType(String fieldName, boolean allowStored) {
+ ensureWritable();
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.valueType = ValueType.TEXT;
+ fields.put(fieldName, current);
+ setDefaults(current);
+ if (allowStored == false) {
+ current.stored = Boolean.FALSE;
+ }
+ changed();
+ } else if (current.valueType == null) {
+ // This can happen if e.g. the app first calls FieldTypes.setStored(...)
+ boolean success = false;
+ try {
+ current.valueType = ValueType.TEXT;
+ current.validate();
+ if (allowStored == false && current.stored == Boolean.TRUE) {
+ illegalState(fieldName, "can only store String large text fields");
+ }
+ success = true;
+ } finally {
+ if (success == false) {
+ current.valueType = null;
+ }
+ }
+ setDefaults(current);
+ changed();
+ } else if (current.valueType != ValueType.TEXT) {
+ illegalState(fieldName, "cannot change from value type " + current.valueType + " to " + ValueType.TEXT);
+ }
+ }
+
+ private void setDefaults(FieldType field) {
+ switch (field.valueType) {
+
+ case INT:
+ case FLOAT:
+ case LONG:
+ case DOUBLE:
+ // By default, numbers are trie-indexed as DOCS_ONLY without norms, and enabled for sorting (numeric doc values)
+ if (field.sortable == null) {
+ field.sortable = Boolean.TRUE;
+ }
+ if (field.multiValued == null) {
+ field.multiValued = Boolean.FALSE;
+ }
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_ONLY;
+ }
+ if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
+ if (field.multiValued == Boolean.TRUE) {
+ field.docValuesType = DocValuesType.SORTED_NUMERIC;
+ } else {
+ field.docValuesType = DocValuesType.NUMERIC;
+ }
+ }
+ if (field.indexNorms == null) {
+ field.indexNorms = Boolean.FALSE;
+ }
+ if (field.numericPrecisionStep == null) {
+ if (field.valueType == ValueType.INT || field.valueType == ValueType.FLOAT) {
+ field.numericPrecisionStep = 8;
+ } else {
+ field.numericPrecisionStep = 16;
+ }
+ }
+ break;
+
+ case SHORT_TEXT:
+ // By default, short text is indexed as DOCS_ONLY without norms, and enabled for sorting (sorted doc values)
+ if (field.sortable == null) {
+ field.sortable = Boolean.TRUE;
+ }
+ if (field.multiValued == null) {
+ field.multiValued = Boolean.FALSE;
+ }
+ if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
+ if (field.multiValued == Boolean.TRUE) {
+ field.docValuesType = DocValuesType.SORTED_SET;
+ } else {
+ field.docValuesType = DocValuesType.SORTED;
+ }
+ }
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_ONLY;
+ }
+ if (field.indexNorms == null) {
+ field.indexNorms = Boolean.FALSE;
+ }
+ break;
+
+ case ATOM:
+ if (field.sortable == null) {
+ field.sortable = Boolean.FALSE;
+ }
+ if (field.multiValued == null) {
+ field.multiValued = Boolean.FALSE;
+ }
+ if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
+ if (field.multiValued == Boolean.TRUE) {
+ field.docValuesType = DocValuesType.SORTED_SET;
+ } else {
+ field.docValuesType = DocValuesType.SORTED;
+ }
+ }
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_ONLY;
+ }
+ if (field.indexNorms == null) {
+ field.indexNorms = Boolean.FALSE;
+ }
+ break;
+
+ case BINARY:
+ // By default, binary is just a stored blob:
+ if (field.sortable == null) {
+ field.sortable = Boolean.FALSE;
+ }
+ if (field.multiValued == null) {
+ field.multiValued = Boolean.FALSE;
+ }
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
+ }
+ break;
+
+ case TEXT:
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
+ }
+ if (field.multiValued == null) {
+ field.multiValued = Boolean.FALSE;
+ }
+ if (field.sortable == null) {
+ field.sortable = Boolean.FALSE;
+ }
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
+ if (field.indexNorms == null) {
+ field.indexNorms = Boolean.TRUE;
+ }
+ break;
+
+ default:
+ throw new AssertionError("missing value type in switch");
+ }
+ }
+
+ /** Returns a query matching all documents that have this int term. */
+ public Query newTermQuery(String fieldName, int token) {
+ // nocommit should we take Number?
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == null) {
+ illegalState(fieldName, "cannot create term query: this field was not indexed");
+ }
+
+ BytesRefBuilder bytesBuilder = new BytesRefBuilder();
+
+ switch (fieldType.valueType) {
+ case INT:
+ NumericUtils.intToPrefixCodedBytes(token, 0, bytesBuilder);
+ break;
+ case LONG:
+ NumericUtils.longToPrefixCodedBytes(token, 0, bytesBuilder);
+ break;
+ default:
+ illegalState(fieldName, "cannot create int term query when valueType=" + fieldType.valueType);
+ }
+ return new TermQuery(new Term(fieldName, bytesBuilder.get()));
+ }
+
+ /** Returns a query matching all documents that have this long term. */
+ public Query newTermQuery(String fieldName, long token) {
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == null) {
+ illegalState(fieldName, "cannot create term query: this field was not indexed");
+ }
+
+ BytesRefBuilder bytesBuilder = new BytesRefBuilder();
+
+ switch (fieldType.valueType) {
+ case LONG:
+ NumericUtils.longToPrefixCodedBytes(token, 0, bytesBuilder);
+ break;
+ default:
+ illegalState(fieldName, "cannot create long term query when valueType=" + fieldType.valueType);
+ }
+
+ return new TermQuery(new Term(fieldName, bytesBuilder.get()));
+ }
+
+ /** Returns a query matching all documents that have this binary token. */
+ public Query newTermQuery(String fieldName, byte[] token) {
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == null) {
+ illegalState(fieldName, "cannot create term query: this field was not indexed");
+ }
+
+ // Field must be binary:
+ if (fieldType.valueType != ValueType.BINARY && fieldType.valueType != ValueType.ATOM) {
+ illegalState(fieldName, "binary term query must have valueType BINARY or ATOM; got " + fieldType.valueType);
+ }
+
+ return new TermQuery(new Term(fieldName, new BytesRef(token)));
+ }
+
+ public Query newTermQuery(String fieldName, String token) {
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == null) {
+ illegalState(fieldName, "cannot create term query: this field was not indexed");
+ }
+
+ // Field must be text:
+ if (fieldType.valueType != ValueType.TEXT && fieldType.valueType != ValueType.SHORT_TEXT && fieldType.valueType != ValueType.ATOM) {
+ illegalState(fieldName, "text term query have valueType TEXT, SHORT_TEXT or ATOM; got " + fieldType.valueType);
+ }
+
+ return new TermQuery(new Term(fieldName, token));
+ }
+
+ public Query newRangeQuery(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+
+ // Field must be indexed:
+ if (fieldType.indexOptions == null) {
+ illegalState(fieldName, "cannot create range query: this field was not indexed");
+ }
+
+ // nocommit should we really take Number here? it's too weakly typed? you could ask for float range on an int field? should we
+ // instead make separate methods for each atomic type? or should we "type check" the incoming Number? taking Number is more
+ // conventient for query parsers...?
+
+ switch (fieldType.valueType) {
+ case INT:
+ return NumericRangeQuery.newIntRange(fieldName,
+ min == null ? null : min.intValue(),
+ max == null ? null : max.intValue(),
+ minInclusive, maxInclusive);
+ case FLOAT:
+ return NumericRangeQuery.newFloatRange(fieldName,
+ min == null ? null : min.floatValue(),
+ max == null ? null : max.floatValue(),
+ minInclusive, maxInclusive);
+ case LONG:
+ return NumericRangeQuery.newLongRange(fieldName,
+ min == null ? null : min.longValue(),
+ max == null ? null : max.longValue(),
+ minInclusive, maxInclusive);
+ case DOUBLE:
+ return NumericRangeQuery.newDoubleRange(fieldName,
+ min == null ? null : min.doubleValue(),
+ max == null ? null : max.doubleValue(),
+ minInclusive, maxInclusive);
+ // nocommit termRangeQuery? but we should add enableRangeQueries and check against that?
+ default:
+ illegalState(fieldName, "cannot create numeric range query on non-numeric field; got valueType=" + fieldType.valueType);
+
+ // Dead code but javac disagrees:
+ return null;
+ }
+ }
+
+ /** Builds a sort from arbitrary list of fieldName, reversed pairs. */
+ public Sort newSort(Object... fields) {
+ if (fields.length == 0) {
+ throw new IllegalArgumentException("must sort by at least one field; got nothing");
+ }
+
+ int upto = 0;
+ SortField[] sortFields = new SortField[(fields.length+1)/2];
+
+ while (upto < fields.length) {
+ if ((fields[upto] instanceof String) == false) {
+ throw new IllegalArgumentException("arguments must alternate String, Boolean; expected String but got: " + fields[upto]);
+ }
+ String fieldName = (String) fields[upto];
+ boolean reversed;
+ if (fields.length <= upto+1) {
+ reversed = false;
+ } else if ((fields[upto+1] instanceof Boolean) == false) {
+ throw new IllegalArgumentException("arguments must alternate String, Boolean; expected Boolean but got: " + fields[upto]);
+ } else {
+ reversed = ((Boolean) fields[upto+1]).booleanValue();
+ }
+ sortFields[upto/2] = newSortField(fieldName, reversed);
+ upto += 2;
+ }
+
+ return new Sort(sortFields);
+ }
+
+ /** Returns the SortField for this field. */
+ public SortField newSortField(String fieldName) {
+ return newSortField(fieldName, false);
+ }
+
+ /** Returns the SortField for this field, optionally reversed. */
+ public SortField newSortField(String fieldName, boolean reverse) {
+
+ // Field must exist:
+ FieldType fieldType = getFieldType(fieldName);
+ if (fieldType.sortable != Boolean.TRUE) {
+ illegalState(fieldName, "this field was not indexed for sorting");
+ }
+ switch (fieldType.valueType) {
+ case INT:
+ if (fieldType.multiValued == Boolean.TRUE) {
+ return new SortedNumericSortField(fieldName, SortField.Type.INT, reverse);
+ } else {
+ return new SortField(fieldName, SortField.Type.INT, reverse);
+ }
+ case FLOAT:
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit need to be able to set selector...
+ return new SortedNumericSortField(fieldName, SortField.Type.FLOAT, reverse);
+ } else {
+ return new SortField(fieldName, SortField.Type.FLOAT, reverse);
+ }
+ case LONG:
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit need to be able to set selector...
+ return new SortedNumericSortField(fieldName, SortField.Type.LONG, reverse);
+ } else {
+ return new SortField(fieldName, SortField.Type.LONG, reverse);
+ }
+ case DOUBLE:
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit need to be able to set selector...
+ return new SortedNumericSortField(fieldName, SortField.Type.DOUBLE, reverse);
+ } else {
+ return new SortField(fieldName, SortField.Type.DOUBLE, reverse);
+ }
+ case SHORT_TEXT:
+ case ATOM:
+ case BINARY:
+ if (fieldType.multiValued == Boolean.TRUE) {
+ // nocommit need to be able to set selector...
+ return new SortedSetSortField(fieldName, reverse);
+ } else {
+ return new SortField(fieldName, SortField.Type.STRING, reverse);
+ }
+ default:
+ // BUG
+ illegalState(fieldName, "unhandled sort case, valueType=" + fieldType.valueType);
+
+ // Dead code but javac disagrees:
+ return null;
+ }
+ }
+
+ private synchronized void changed() {
+ ensureWritable();
+ // Push to IW's commit data
+ assert writer != null;
+ // nocommit must serialize current fields to IW's commit data, but this is O(N^2)... hmm
+ }
+
+ private synchronized void ensureWritable() {
+ if (writer == null) {
+ throw new IllegalStateException("FieldProperies is read-only (setIndexWriter was not called)");
+ }
+ }
+
+ static void illegalState(String fieldName, String message) {
+ throw new IllegalStateException("field \"" + fieldName + "\": " + message);
+ }
+
+ static void fieldMustExist(String fieldName, FieldType valueType) {
+ if (valueType == null) {
+ throw new IllegalArgumentException("field \"" + fieldName + "\" is not recognized");
+ }
+ }
+}