You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/10/22 13:19:08 UTC
svn commit: r1633597 - in /lucene/dev/branches/lucene6005/lucene/core/src:
java/org/apache/lucene/document/ java/org/apache/lucene/index/
java/org/apache/lucene/search/ test/org/apache/lucene/document/
Author: mikemccand
Date: Wed Oct 22 11:19:07 2014
New Revision: 1633597
URL: http://svn.apache.org/r1633597
Log:
LUCENE-6005: add default sort order; don't use polymorphism with native types; add pos/offset gap; add highlighting; break out query and index analyzer
Modified:
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java?rev=1633597&r1=1633596&r2=1633597&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java Wed Oct 22 11:19:07 2014
@@ -80,7 +80,7 @@ public class Document2 implements IndexD
@Override
public TokenStream tokenStream(Analyzer analyzerIn, TokenStream reuse) throws IOException {
- Analyzer analyzer = fieldTypes.getAnalyzer();
+ Analyzer analyzer = fieldTypes.getIndexAnalyzer();
if (analyzerIn != analyzer) {
// TODO: remove analyzer from IW APIs
throw new IllegalArgumentException("analyzer must be the instance from FieldTypes");
@@ -360,29 +360,28 @@ public class Document2 implements IndexD
fields.add(new FieldValue(fieldName, value, boost));
}
- // addLongArray, addIntArray
+ // nocommit: addLongArray, addIntArray
- // nocommit don't use overloadign here ... change to addLong, addFloat, etc.
/** Default: support for range filtering/querying and sorting (using numeric doc values). */
- public void addNumber(String fieldName, int value) {
+ public void addInt(String fieldName, int value) {
fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.INT);
fields.add(new FieldValue(fieldName, Integer.valueOf(value)));
}
/** Default: support for range filtering/querying and sorting (using numeric doc values). */
- public void addNumber(String fieldName, float value) {
+ public void addFloat(String fieldName, float value) {
fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.FLOAT);
fields.add(new FieldValue(fieldName, Float.valueOf(value)));
}
/** Default: support for range filtering/querying and sorting (using numeric doc values). */
- public void addNumber(String fieldName, long value) {
+ public void addLong(String fieldName, long value) {
fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.LONG);
fields.add(new FieldValue(fieldName, Long.valueOf(value)));
}
/** Default: support for range filtering/querying and sorting (using numeric doc values). */
- public void addNumber(String fieldName, double value) {
+ public void addDouble(String fieldName, double value) {
fieldTypes.recordValueType(fieldName, FieldTypes.ValueType.DOUBLE);
fields.add(new FieldValue(fieldName, Double.valueOf(value)));
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1633597&r1=1633596&r2=1633597&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Wed Oct 22 11:19:07 2014
@@ -18,7 +18,9 @@ package org.apache.lucene.document;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
@@ -90,6 +92,12 @@ import org.apache.lucene.util.NumericUti
// PerFieldAnalyzerWrapper
// oal.document
+// nocommit maybe have an across-the-board default for "stored"?
+
+// nocommit should we validate field names here?
+
+// nocommit can we somehow always store a "source"?
+
// nocommit make ValueType public? add setter so you can set that too?
// language for the field? (to default collator)
@@ -102,8 +110,6 @@ import org.apache.lucene.util.NumericUti
// nocommit suggesters
-// nocommit how to change block tree's block settings?
-
// nocommit index-time sorting should be here too
// nocommit sort by languages
@@ -122,10 +128,6 @@ import org.apache.lucene.util.NumericUti
// nocommit how to randomize IWC? RIW?
-// nocommit add .getSort method
-
-// nocommit add .getXXXQuery? method
-
// nocommit maybe we need change IW's setCommitData API to be "add/remove key/value from commit data"?
// nocommit just persist as FieldInfos? but that's per-segment, and ... it enforces the low-level constraints?
@@ -138,6 +140,8 @@ import org.apache.lucene.util.NumericUti
// nocommit boolean, float16
+// nocommit collapse IndexableField/Type
+
// nocommit can we move multi-field-ness out of IW? so IW only gets a single instance of each field
/** Records how each field is indexed, stored, etc. This class persists
@@ -169,7 +173,8 @@ public class FieldTypes {
private final Map<String,FieldType> fields = new HashMap<>();
- private final Analyzer defaultAnalyzer;
+ private final Analyzer defaultIndexAnalyzer;
+ private final Analyzer defaultQueryAnalyzer;
private final Similarity defaultSimilarity;
/** Just like current oal.document.FieldType, except for each setting it can also record "not-yet-set". */
@@ -188,6 +193,11 @@ public class FieldTypes {
volatile Integer blockTreeMinItemsInBlock;
volatile Integer blockTreeMaxItemsInBlock;
+ // Gaps to add between multiple values of the same field; if these are not set, we fallback to the Analyzer for that field.
+ volatile Integer analyzerPositionGap;
+ volatile Integer analyzerOffsetGap;
+
+ // If the field is numeric, this is the precision step we use:
volatile Integer numericPrecisionStep;
// Whether this field's values are stored, or null if it's not yet set:
@@ -195,6 +205,7 @@ public class FieldTypes {
// Whether this field's values should be indexed as doc values for sorting:
private volatile Boolean sortable;
+ private volatile Boolean sortReversed;
// Whether this field may appear more than once per document:
private volatile Boolean multiValued;
@@ -215,19 +226,26 @@ public class FieldTypes {
private volatile String postingsFormat;
private volatile String docValuesFormat;
- // NOTE: not persisted, because we don't have API for persisting any analyzer :(
- private volatile Analyzer analyzer;
+ private volatile Boolean highlighted;
+
+ // NOTE: not persisted, because we don't have API for persisting arbitrary analyzers, or maybe we require AnalysisFactory is always used
+ // (which we can serialize)?
+ private volatile Analyzer queryAnalyzer;
+ private volatile Analyzer indexAnalyzer;
private volatile Similarity similarity;
- private boolean validate() {
+ boolean validate() {
if (valueType != null) {
switch (valueType) {
case INT:
case FLOAT:
case LONG:
case DOUBLE:
- if (analyzer != null) {
- illegalState(name, "type " + valueType + " cannot have an analyzer");
+ if (indexAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an indexAnalyzer");
+ }
+ if (queryAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
}
if (docValuesType != null && (docValuesType != DocValuesType.NUMERIC && docValuesType != DocValuesType.SORTED_NUMERIC)) {
illegalState(name, "type " + valueType + " must use NUMERIC docValuesType (got: " + docValuesType + ")");
@@ -250,16 +268,22 @@ public class FieldTypes {
}
break;
case BINARY:
- if (analyzer != null) {
- illegalState(name, "type " + valueType + " cannot have an analyzer");
+ if (indexAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an indexAnalyzer");
+ }
+ if (queryAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
}
if (docValuesType != null && docValuesType != DocValuesType.BINARY && docValuesType != DocValuesType.SORTED && docValuesType != DocValuesType.SORTED_SET) {
illegalState(name, "type " + valueType + " must use BINARY docValuesType (got: " + docValuesType + ")");
}
break;
case ATOM:
- if (analyzer != null) {
- illegalState(name, "type " + valueType + " cannot have an analyzer");
+ if (indexAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have an indexAnalyzer");
+ }
+ if (queryAnalyzer != null) {
+ illegalState(name, "type " + valueType + " cannot have a queryAnalyzer");
}
// nocommit make sure norms are disabled?
if (indexOptions != null && indexOptions.compareTo(IndexOptions.DOCS_ONLY) > 0) {
@@ -280,14 +304,63 @@ public class FieldTypes {
illegalState(name, "DocValuesType=" + docValuesType + " cannot be multi-valued");
}
- if (indexOptions == null && blockTreeMinItemsInBlock != null) {
- illegalState(name, "can only setTermsDictBlockSize if the field is indexed");
+ // nocommit we need a separate "doc values disabled" setting?
+ /*
+ if (sortable == Boolean.TRUE && (docValuesType == null || docValuesType == DocValuesType.BINARY)) {
+ illegalState(name, "cannot sort when DocValuesType=" + docValuesType);
+ }
+ */
+
+
+ if (indexOptions == null) {
+ if (blockTreeMinItemsInBlock != null) {
+ illegalState(name, "can only setTermsDictBlockSize if the field is indexed");
+ }
+ if (indexAnalyzer != null) {
+ illegalState(name, "can only setIndexAnalyzer if the field is indexed");
+ }
+ if (queryAnalyzer != null) {
+ illegalState(name, "can only setQueryAnalyzer if the field is indexed");
+ }
+ } else {
+ if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT && indexAnalyzer != null) {
+ illegalState(name, "can only setIndexAnalyzer for short text and large text fields; got valueType=" + valueType);
+ }
+ if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT && queryAnalyzer != null) {
+ illegalState(name, "can only setQueryAnalyzer for short text and large text fields; got valueType=" + valueType);
+ }
+ }
+
+ if (analyzerPositionGap != null) {
+ if (indexOptions == null) {
+ illegalState(name, "can only setAnalyzerPositionGap if the field is indexed");
+ }
+ if (multiValued != Boolean.TRUE) {
+ illegalState(name, "can only setAnalyzerPositionGap if the field is multi-valued");
+ }
+ }
+ if (analyzerOffsetGap != null) {
+ if (indexOptions == null) {
+ illegalState(name, "can only setAnalyzerOffsetGap if the field is indexed");
+ }
+ if (multiValued != Boolean.TRUE) {
+ illegalState(name, "can only setAnalyzerOffsetGap if the field is multi-valued");
+ }
}
if (postingsFormat != null && blockTreeMinItemsInBlock != null) {
illegalState(name, "cannot use both setTermsDictBlockSize and setPostingsFormat");
}
+ if (highlighted == Boolean.TRUE) {
+ if (valueType != ValueType.TEXT && valueType != ValueType.SHORT_TEXT) {
+ illegalState(name, "can only enable highlighting for TEXT or SHORT_TEXT fields; got valueType=" + valueType);
+ }
+ if (indexOptions != null && indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
+ illegalState(name, "must index with IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS when highlighting is enabled");
+ }
+ }
+
return true;
}
@@ -397,14 +470,15 @@ public class FieldTypes {
/** Create a new index-time (writable) instance using the specified default analyzer, and {@link IndexSearcher#getDefaultSimilarity}
* similarity. Note that you must call {@link #setIndexWriter} before changing any types. */
public FieldTypes(Analyzer defaultAnalyzer) {
- this(defaultAnalyzer, IndexSearcher.getDefaultSimilarity());
+ this(defaultAnalyzer, defaultAnalyzer, IndexSearcher.getDefaultSimilarity());
}
/** Create a new index-time (writable) instance using the specified default analyzer and similarity. Note that you must call {@link
* #setIndexWriter} before changing any types. */
- public FieldTypes(Analyzer defaultAnalyzer, Similarity defaultSimilarity) {
+ public FieldTypes(Analyzer defaultIndexAnalyzer, Analyzer defaultQueryAnalyzer, Similarity defaultSimilarity) {
this.reader = null;
- this.defaultAnalyzer = defaultAnalyzer;
+ this.defaultIndexAnalyzer = defaultIndexAnalyzer;
+ this.defaultQueryAnalyzer = defaultQueryAnalyzer;
this.defaultSimilarity = defaultSimilarity;
}
@@ -415,9 +489,10 @@ public class FieldTypes {
}
/** Create a new search-time (read-only) instance using the specified default analyzer. */
- public FieldTypes(DirectoryReader reader, Analyzer defaultAnalyzer, Similarity defaultSimilarity) throws IOException {
+ public FieldTypes(DirectoryReader reader, Analyzer defaultQueryAnalyzer, Similarity defaultSimilarity) throws IOException {
this.reader = reader;
- this.defaultAnalyzer = defaultAnalyzer;
+ this.defaultIndexAnalyzer = null;
+ this.defaultQueryAnalyzer = defaultQueryAnalyzer;
this.defaultSimilarity = defaultSimilarity;
loadFields(reader.getIndexCommit().getUserData());
}
@@ -493,7 +568,7 @@ public class FieldTypes {
/** Returns a new default {@link IndexWriterConfig}, with {@link Analyzer}, {@link Similarity} and {@link Codec}) pre-set. */
public IndexWriterConfig getDefaultIndexWriterConfig() {
- IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
+ IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
iwc.setSimilarity(similarity);
iwc.setCodec(codec);
@@ -552,21 +627,67 @@ public class FieldTypes {
}
};
- private final Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
+ private abstract class FieldTypeAnalyzer extends DelegatingAnalyzerWrapper {
+ public FieldTypeAnalyzer() {
+ super(Analyzer.PER_FIELD_REUSE_STRATEGY);
+ }
+
+ @Override
+ public int getPositionIncrementGap(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field.analyzerPositionGap != null) {
+ return field.analyzerPositionGap.intValue();
+ } else if (field.indexAnalyzer != null) {
+ return field.indexAnalyzer.getPositionIncrementGap(fieldName);
+ } else {
+ return defaultIndexAnalyzer.getPositionIncrementGap(fieldName);
+ }
+ }
+
+ @Override
+ public int getOffsetGap(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field.analyzerOffsetGap != null) {
+ return field.analyzerOffsetGap.intValue();
+ } else if (field.indexAnalyzer != null) {
+ return field.indexAnalyzer.getOffsetGap(fieldName);
+ } else {
+ return defaultIndexAnalyzer.getOffsetGap(fieldName);
+ }
+ }
+
+ // nocommit what about wrapReader?
+ }
+
+ private final Analyzer indexAnalyzer = new FieldTypeAnalyzer() {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
// Field must exist:
FieldType field = getFieldType(fieldName);
- if (field.analyzer != null) {
- return field.analyzer;
+ if (field.indexAnalyzer != null) {
+ return field.indexAnalyzer;
} else if (field.valueType == ValueType.ATOM) {
- // nocommit need test showing that if you index an ATOM and search field:"XXX YYY" with that atom, it works
- return KEYWORD_ANALYZER;
+ // BUG
+ illegalState(fieldName, "ATOM fields should not be analyzed during indexing");
}
- return FieldTypes.this.defaultAnalyzer;
+ return defaultIndexAnalyzer;
}
+ };
- // nocommit what about wrapReader?
+ private final Analyzer queryAnalyzer = new FieldTypeAnalyzer() {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ // Field must exist:
+ FieldType field = getFieldType(fieldName);
+ if (field.queryAnalyzer != null) {
+ return field.queryAnalyzer;
+ } else if (field.valueType == ValueType.ATOM) {
+ return KEYWORD_ANALYZER;
+ }
+ return defaultQueryAnalyzer;
+ }
};
/** Returns {@link Similarity} that returns the per-field Similarity. */
@@ -579,30 +700,91 @@ public class FieldTypes {
return codec;
}
- /** Returns {@link Analyzer} that returns the per-field analyzer. */
- public Analyzer getAnalyzer() {
- return analyzer;
+ /** Returns {@link Analyzer} that returns the per-field analyzer for use during indexing. */
+ public Analyzer getIndexAnalyzer() {
+ if (writer == null) {
+ return null;
+ } else {
+ return indexAnalyzer;
+ }
+ }
+
+ /** Returns {@link Analyzer} that returns the per-field analyzer for use during searching. */
+ public Analyzer getQueryAnalyzer() {
+ return queryAnalyzer;
}
+ // nocommit we should note that the field has a specific analyzer set, and then throw exc if it didn't get set again after load
+
/** NOTE: analyzer does not persist, so each time you create {@code FieldTypes} from
* {@linkIndexWriter} or {@link IndexReader} you must set all per-field analyzers again. */
public synchronized void setAnalyzer(String fieldName, Analyzer analyzer) {
+ setIndexAnalyzer(fieldName, analyzer);
+ setQueryAnalyzer(fieldName, analyzer);
+ }
+
+ /** NOTE: analyzer does not persist, so each time you create {@code FieldTypes} from
+ * {@linkIndexWriter} or {@link IndexReader} you must set all per-field analyzers again. */
+ public synchronized void setIndexAnalyzer(String fieldName, Analyzer analyzer) {
FieldType current = fields.get(fieldName);
if (current == null) {
current = new FieldType(fieldName);
- current.analyzer = analyzer;
+ current.indexAnalyzer = analyzer;
fields.put(fieldName, current);
changed();
+ } else if (current.indexAnalyzer == null) {
+ boolean success = false;
+ try {
+ current.indexAnalyzer = analyzer;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.indexAnalyzer = null;
+ }
+ }
+ changed();
} else {
- current.analyzer = analyzer;
+ illegalState(fieldName, "analyzer was already set");
+ }
+ }
+
+ public synchronized Analyzer getIndexAnalyzer(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.indexAnalyzer;
+ }
+
+ /** NOTE: analyzer does not persist, so each time you create {@code FieldTypes} from
+ * {@linkIndexWriter} or {@link IndexReader} you must set all per-field analyzers again. */
+ public synchronized void setQueryAnalyzer(String fieldName, Analyzer analyzer) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.queryAnalyzer = analyzer;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.queryAnalyzer == null) {
+ boolean success = false;
+ try {
+ current.queryAnalyzer = analyzer;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.queryAnalyzer = null;
+ }
+ }
changed();
+ } else {
+ illegalState(fieldName, "analyzer was already set");
}
}
- public synchronized Analyzer getAnalyzer(String fieldName) {
+ public synchronized Analyzer getQueryAnalyzer(String fieldName) {
FieldType current = fields.get(fieldName);
fieldMustExist(fieldName, current);
- return current.analyzer;
+ return current.queryAnalyzer;
}
/** NOTE: similarity does not persist, so each time you create {@code FieldTypes} from
@@ -659,6 +841,58 @@ public class FieldTypes {
return current.multiValued == Boolean.TRUE;
}
+ /** The gap that should be added to token positions between each multi-valued field. */
+ public synchronized void setAnalyzerPositionGap(String fieldName, int gap) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.analyzerPositionGap = gap;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.analyzerPositionGap == null) {
+ Integer oldValue = current.analyzerPositionGap;
+ boolean success = false;
+ try {
+ current.analyzerPositionGap = gap;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.analyzerPositionGap = oldValue;
+ }
+ }
+ changed();
+ } else if (current.analyzerPositionGap.intValue() != gap) {
+ illegalState(fieldName, "analyzerPositionGap was already set to " + current.analyzerPositionGap + "; cannot change again to " + gap);
+ }
+ }
+
+ /** The gap that should be added to token positions between each multi-valued field. */
+ public synchronized void setAnalyzerOffsetGap(String fieldName, int gap) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.analyzerOffsetGap = gap;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.analyzerOffsetGap == null) {
+ Integer oldValue = current.analyzerOffsetGap;
+ boolean success = false;
+ try {
+ current.analyzerOffsetGap = gap;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.analyzerOffsetGap = oldValue;
+ }
+ }
+ changed();
+ } else if (current.analyzerOffsetGap.intValue() != gap) {
+ illegalState(fieldName, "analyzerOffsetGap was already set to " + current.analyzerOffsetGap + "; cannot change again to " + gap);
+ }
+ }
+
/** Sets the minimum number of terms in each term block in the terms dictionary. These can be changed at any time, but changes only take
* effect for newly written (flushed or merged) segments. The default is 25; higher values make fewer, larger blocks, which require less
* heap in the IndexReader but slows down term lookups. */
@@ -708,32 +942,43 @@ public class FieldTypes {
/** Enables sorting for this field, using doc values of the appropriate type. */
// nocommit either rename this, or rename enableStored, or both (they are the same letters just shuffled!)
- public synchronized void enableSorted(String fieldName) {
+ public synchronized void enableSorting(String fieldName) {
+ enableSorting(fieldName, false);
+ }
+
+ public synchronized void enableSorting(String fieldName, boolean reversed) {
FieldType current = fields.get(fieldName);
if (current == null) {
current = new FieldType(fieldName);
current.sortable = Boolean.TRUE;
+ current.sortReversed = reversed;
fields.put(fieldName, current);
changed();
} else if (current.sortable == null) {
+ assert current.sortReversed == null;
boolean success = false;
try {
current.sortable = Boolean.TRUE;
+ current.sortReversed = reversed;
current.validate();
success = true;
} finally {
if (success == false) {
current.sortable = null;
+ current.sortReversed = null;
}
}
changed();
} else if (current.sortable == Boolean.FALSE) {
illegalState(fieldName, "sorting was already disabled");
+ } else if (current.sortReversed != reversed) {
+ current.sortReversed = reversed;
+ changed();
}
}
/** Disables sorting for this field. */
- public synchronized void disableSorted(String fieldName) {
+ public synchronized void disableSorting(String fieldName) {
FieldType current = fields.get(fieldName);
if (current == null) {
current = new FieldType(fieldName);
@@ -741,9 +986,11 @@ public class FieldTypes {
fields.put(fieldName, current);
changed();
} else if (current.sortable != Boolean.FALSE) {
+ // nocommit don't we need to ... turn off DocValues if they were only on because of sorting?
// nocommit ok to allow this?
// nocommit should we validate?
current.sortable = Boolean.FALSE;
+ current.sortReversed = null;
changed();
}
}
@@ -754,17 +1001,59 @@ public class FieldTypes {
return current.sortable == Boolean.TRUE;
}
- // nocommit too ambitious?
- public synchronized void enableHighlighted(String fieldName) {
+ /** Enables highlighting for this field, using postings highlighter. */
+ public synchronized void enableHighlighting(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.highlighted = Boolean.TRUE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.highlighted == null) {
+ boolean success = false;
+ try {
+ current.highlighted = Boolean.TRUE;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.highlighted = null;
+ }
+ }
+ changed();
+ } else if (current.highlighted == Boolean.FALSE) {
+ illegalState(fieldName, "cannot enable highlighting: it was already disabled");
+ }
}
- // nocommit too ambitious?
- public synchronized void disableHighlighted(String fieldName) {
+ /** Disables highlighting for this field. */
+ public synchronized void disableHighlighting(String fieldName) {
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = new FieldType(fieldName);
+ current.highlighted = Boolean.FALSE;
+ fields.put(fieldName, current);
+ changed();
+ } else if (current.highlighted == null) {
+ Boolean currentValue = current.highlighted;
+ boolean success = false;
+ try {
+ current.highlighted = Boolean.FALSE;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ current.highlighted = currentValue;
+ }
+ }
+ changed();
+ }
}
- // nocommit too ambitious?
public synchronized boolean getHighlighted(String fieldName) {
- return false;
+ FieldType current = fields.get(fieldName);
+ fieldMustExist(fieldName, current);
+ return current.highlighted == Boolean.TRUE;
}
/** Enables norms for this field. This is only allowed if norms were not already disabled. */
@@ -1144,15 +1433,17 @@ public class FieldTypes {
case FLOAT:
case LONG:
case DOUBLE:
- // By default, numbers are trie-indexed as DOCS_ONLY without norms, and enabled for sorting (numeric doc values)
+ if (field.highlighted == null) {
+ field.highlighted = Boolean.FALSE;
+ }
if (field.sortable == null) {
field.sortable = Boolean.TRUE;
}
if (field.multiValued == null) {
field.multiValued = Boolean.FALSE;
}
- if (field.indexOptions == null) {
- field.indexOptions = IndexOptions.DOCS_ONLY;
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
}
if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
if (field.multiValued == Boolean.TRUE) {
@@ -1161,6 +1452,9 @@ public class FieldTypes {
field.docValuesType = DocValuesType.NUMERIC;
}
}
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_ONLY;
+ }
if (field.indexNorms == null) {
field.indexNorms = Boolean.FALSE;
}
@@ -1174,13 +1468,18 @@ public class FieldTypes {
break;
case SHORT_TEXT:
- // By default, short text is indexed as DOCS_ONLY without norms, and enabled for sorting (sorted doc values)
+ if (field.highlighted == null) {
+ field.highlighted = Boolean.TRUE;
+ }
if (field.sortable == null) {
field.sortable = Boolean.TRUE;
}
if (field.multiValued == null) {
field.multiValued = Boolean.FALSE;
}
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
+ }
if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
if (field.multiValued == Boolean.TRUE) {
field.docValuesType = DocValuesType.SORTED_SET;
@@ -1189,7 +1488,11 @@ public class FieldTypes {
}
}
if (field.indexOptions == null) {
- field.indexOptions = IndexOptions.DOCS_ONLY;
+ if (field.highlighted) {
+ field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ } else {
+ field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
}
if (field.indexNorms == null) {
field.indexNorms = Boolean.FALSE;
@@ -1197,12 +1500,21 @@ public class FieldTypes {
break;
case ATOM:
+ if (field.highlighted == null) {
+ field.highlighted = Boolean.FALSE;
+ }
if (field.sortable == null) {
field.sortable = Boolean.FALSE;
}
if (field.multiValued == null) {
field.multiValued = Boolean.FALSE;
}
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
+ }
+ if (field.indexOptions == null) {
+ field.indexOptions = IndexOptions.DOCS_ONLY;
+ }
if (field.sortable == Boolean.TRUE && field.docValuesType == null) {
if (field.multiValued == Boolean.TRUE) {
field.docValuesType = DocValuesType.SORTED_SET;
@@ -1210,16 +1522,15 @@ public class FieldTypes {
field.docValuesType = DocValuesType.SORTED;
}
}
- if (field.indexOptions == null) {
- field.indexOptions = IndexOptions.DOCS_ONLY;
- }
if (field.indexNorms == null) {
field.indexNorms = Boolean.FALSE;
}
break;
case BINARY:
- // By default, binary is just a stored blob:
+ if (field.highlighted == null) {
+ field.highlighted = Boolean.FALSE;
+ }
if (field.sortable == null) {
field.sortable = Boolean.FALSE;
}
@@ -1229,20 +1540,30 @@ public class FieldTypes {
if (field.stored == null) {
field.stored = Boolean.TRUE;
}
+ if (field.indexNorms == null) {
+ field.indexNorms = Boolean.FALSE;
+ }
break;
case TEXT:
- if (field.stored == null) {
- field.stored = Boolean.TRUE;
+ if (field.highlighted == null) {
+ field.highlighted = Boolean.TRUE;
+ }
+ if (field.sortable == null) {
+ field.sortable = Boolean.FALSE;
}
if (field.multiValued == null) {
field.multiValued = Boolean.FALSE;
}
- if (field.sortable == null) {
- field.sortable = Boolean.FALSE;
+ if (field.stored == null) {
+ field.stored = Boolean.TRUE;
}
if (field.indexOptions == null) {
- field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ if (field.highlighted) {
+ field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ } else {
+ field.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
}
if (field.indexNorms == null) {
field.indexNorms = Boolean.TRUE;
@@ -1252,6 +1573,13 @@ public class FieldTypes {
default:
throw new AssertionError("missing value type in switch");
}
+
+ assert field.highlighted != null;
+ assert field.stored != null;
+ assert field.multiValued != null;
+ assert field.sortable != null;
+ assert field.indexOptions == null || field.indexNorms != null;
+ assert field.validate();
}
/** Returns a query matching all documents that have this int term. */
@@ -1385,6 +1713,8 @@ public class FieldTypes {
}
}
+ // nocommit newPhraseQuery?
+
/** Builds a sort from arbitrary list of fieldName, reversed pairs. */
public Sort newSort(Object... fields) {
if (fields.length == 0) {
@@ -1392,26 +1722,24 @@ public class FieldTypes {
}
int upto = 0;
- SortField[] sortFields = new SortField[(fields.length+1)/2];
+ List<SortField> sortFields = new ArrayList<>();
while (upto < fields.length) {
if ((fields[upto] instanceof String) == false) {
- throw new IllegalArgumentException("arguments must alternate String, Boolean; expected String but got: " + fields[upto]);
+ throw new IllegalArgumentException("arguments must (String [Boolean])+; expected String but got: " + fields[upto].getClass());
}
- String fieldName = (String) fields[upto];
- boolean reversed;
- if (fields.length <= upto+1) {
- reversed = false;
- } else if ((fields[upto+1] instanceof Boolean) == false) {
- throw new IllegalArgumentException("arguments must alternate String, Boolean; expected Boolean but got: " + fields[upto]);
+ String fieldName = (String) fields[upto++];
+ Boolean reversed;
+ if (upto == fields.length || (fields[upto] instanceof Boolean) == false) {
+ reversed = null;
} else {
- reversed = ((Boolean) fields[upto+1]).booleanValue();
+ reversed = (Boolean) fields[upto+1];
+ upto++;
}
- sortFields[upto/2] = newSortField(fieldName, reversed);
- upto += 2;
+ sortFields.add(newSortField(fieldName, reversed));
}
- return new Sort(sortFields);
+ return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
}
/** Returns the SortField for this field. */
@@ -1419,14 +1747,20 @@ public class FieldTypes {
return newSortField(fieldName, false);
}
- /** Returns the SortField for this field, optionally reversed. */
- public SortField newSortField(String fieldName, boolean reverse) {
+ /** Returns the SortField for this field, optionally reversed. If reverse is null, we use the default for the field. */
+ public SortField newSortField(String fieldName, Boolean reverse) {
// Field must exist:
FieldType fieldType = getFieldType(fieldName);
if (fieldType.sortable != Boolean.TRUE) {
illegalState(fieldName, "this field was not indexed for sorting");
}
+ if (reverse == null) {
+ reverse = fieldType.sortReversed;
+ }
+ if (reverse == null) {
+ reverse = Boolean.FALSE;
+ }
switch (fieldType.valueType) {
case INT:
if (fieldType.multiValued == Boolean.TRUE) {
@@ -1478,6 +1812,7 @@ public class FieldTypes {
// Push to IW's commit data
assert writer != null;
// nocommit must serialize current fields to IW's commit data, but this is O(N^2)... hmm
+ // nocommit the schema format itself needs to be versioned too
}
private synchronized void ensureWritable() {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1633597&r1=1633596&r2=1633597&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Wed Oct 22 11:19:07 2014
@@ -559,6 +559,10 @@ final class DefaultIndexingChain extends
// First time we're seeing this field (indexed) in
// this document:
invertState.reset();
+ } else if (docState.analyzer != null) {
+ // TODO: this "multi-field-ness" (and, Analyzer) should be outside of IW somehow
+ invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+ invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);
}
IndexableFieldType fieldType = field.fieldType();
@@ -662,12 +666,6 @@ final class DefaultIndexingChain extends
}
}
- // TODO: this "multi-field-ness" (and, Analyzer) should be outside of IW somehow
- if (docState.analyzer != null) {
- invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
- invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);
- }
-
invertState.boost *= field.boost();
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1633597&r1=1633596&r2=1633597&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Wed Oct 22 11:19:07 2014
@@ -83,8 +83,9 @@ public class PhraseQuery extends Query {
*/
public void add(Term term) {
int position = 0;
- if(positions.size() > 0)
- position = positions.get(positions.size()-1).intValue() + 1;
+ if (positions.size() > 0) {
+ position = positions.get(positions.size()-1).intValue() + 1;
+ }
add(term, position);
}
@@ -117,10 +118,10 @@ public class PhraseQuery extends Query {
* Returns the relative positions of terms in this phrase.
*/
public int[] getPositions() {
- int[] result = new int[positions.size()];
- for(int i = 0; i < positions.size(); i++)
- result[i] = positions.get(i).intValue();
- return result;
+ int[] result = new int[positions.size()];
+ for(int i = 0; i < positions.size(); i++)
+ result[i] = positions.get(i).intValue();
+ return result;
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java?rev=1633597&r1=1633596&r2=1633597&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/document/TestDocument2.java Wed Oct 22 11:19:07 2014
@@ -33,12 +33,15 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@@ -66,7 +69,7 @@ public class TestDocument2 extends Lucen
doc.addShortText("title", "a title");
doc.addAtom("id", "29jafnn");
doc.addStored("bytes", new byte[7]);
- doc.addNumber("number", 17);
+ doc.addInt("int", 17);
w.addDocument(doc);
w.close();
dir.close();
@@ -99,7 +102,8 @@ public class TestDocument2 extends Lucen
IndexWriter w = new IndexWriter(dir, types.getDefaultIndexWriterConfig());
types.setIndexWriter(w);
types.enableStored("id");
- types.enableSorted("binary");
+ // Sort reverse by default:
+ types.enableSorting("binary", true);
Document2 doc = new Document2(types);
byte[] value = new byte[5];
@@ -117,8 +121,8 @@ public class TestDocument2 extends Lucen
IndexSearcher s = newSearcher(r);
TopDocs hits = s.search(new MatchAllDocsQuery(), 2, types.newSort("binary"));
assertEquals(2, hits.scoreDocs.length);
- assertEquals("1", r.document(hits.scoreDocs[0].doc).get("id"));
- assertEquals("0", r.document(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("0", r.document(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", r.document(hits.scoreDocs[1].doc).get("id"));
r.close();
w.close();
dir.close();
@@ -189,9 +193,9 @@ public class TestDocument2 extends Lucen
types.setDocValuesType("sortednumeric", DocValuesType.SORTED_NUMERIC);
Document2 doc = new Document2(types);
- doc.addNumber("sortednumeric", 3);
- doc.addNumber("sortednumeric", 1);
- doc.addNumber("sortednumeric", 2);
+ doc.addInt("sortednumeric", 3);
+ doc.addInt("sortednumeric", 1);
+ doc.addInt("sortednumeric", 2);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w, true);
SortedNumericDocValues sndv = MultiDocValues.getSortedNumericValues(r, "sortednumeric");
@@ -214,21 +218,21 @@ public class TestDocument2 extends Lucen
IndexWriter w = new IndexWriter(dir, types.getDefaultIndexWriterConfig());
types.setIndexWriter(w);
types.enableStored("id");
- types.enableSorted("id");
+ types.enableSorting("id");
//System.out.println("id type: " + types.getFieldType("id"));
Document2 doc = new Document2(types);
- doc.addNumber("float", 3f);
+ doc.addFloat("float", 3f);
doc.addAtom("id", "one");
w.addDocument(doc);
doc = new Document2(types);
- doc.addNumber("float", 2f);
+ doc.addFloat("float", 2f);
doc.addAtom("id", "two");
w.addDocument(doc);
doc = new Document2(types);
- doc.addNumber("float", 7f);
+ doc.addFloat("float", 7f);
doc.addAtom("id", "three");
w.addDocument(doc);
@@ -282,16 +286,8 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
Document2 doc = new Document2(types);
- doc.addNumber("int", 3);
+ doc.addInt("int", 3);
w.addDocument(doc);
-
- doc = new Document2(types);
- try {
- doc.addNumber("int", 2.0);
- fail("did not hit exception");
- } catch (IllegalStateException ise) {
- // expected
- }
w.close();
dir.close();
}
@@ -305,15 +301,15 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
Document2 doc = new Document2(types);
- doc.addNumber("int", 3);
+ doc.addInt("int", 3);
w.addDocument(doc);
doc = new Document2(types);
- doc.addNumber("int", 2);
+ doc.addInt("int", 2);
w.addDocument(doc);
doc = new Document2(types);
- doc.addNumber("int", 7);
+ doc.addInt("int", 7);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w, true);
@@ -344,10 +340,8 @@ public class TestDocument2 extends Lucen
FieldTypes types = new FieldTypes(a);
IndexWriter w = new IndexWriter(dir, types.getDefaultIndexWriterConfig());
types.setIndexWriter(w);
- types.setAnalyzer("atom", a);
- Document2 doc = new Document2(types);
try {
- doc.addAtom("atom", "blahblah");
+ types.setAnalyzer("atom", a);
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
@@ -366,7 +360,7 @@ public class TestDocument2 extends Lucen
types.setDocValuesType("string", DocValuesType.SORTED);
Document2 doc = new Document2(types);
try {
- doc.addNumber("string", 17);
+ doc.addInt("string", 17);
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
@@ -387,7 +381,7 @@ public class TestDocument2 extends Lucen
types.setDocValuesType("binary", DocValuesType.BINARY);
Document2 doc = new Document2(types);
try {
- doc.addNumber("binary", 17);
+ doc.addInt("binary", 17);
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
@@ -448,7 +442,7 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
// Normally sorting is not enabled for atom fields:
- types.enableSorted("id");
+ types.enableSorting("id", true);
types.enableStored("id");
Document2 doc = new Document2(types);
@@ -462,8 +456,8 @@ public class TestDocument2 extends Lucen
IndexSearcher s = newSearcher(r);
TopDocs hits = s.search(new MatchAllDocsQuery(), 2, types.newSort("id"));
assertEquals(2, hits.scoreDocs.length);
- assertEquals("one", r.document(hits.scoreDocs[0].doc).get("id"));
- assertEquals("two", r.document(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("two", r.document(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("one", r.document(hits.scoreDocs[1].doc).get("id"));
r.close();
w.close();
dir.close();
@@ -477,18 +471,18 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
types.setMultiValued("numbers");
- types.enableSorted("numbers");
+ types.enableSorting("numbers");
types.enableStored("id");
Document2 doc = new Document2(types);
- doc.addNumber("numbers", 1);
- doc.addNumber("numbers", 2);
+ doc.addInt("numbers", 1);
+ doc.addInt("numbers", 2);
doc.addAtom("id", "one");
w.addDocument(doc);
doc = new Document2(types);
- doc.addNumber("numbers", -10);
- doc.addNumber("numbers", -20);
+ doc.addInt("numbers", -10);
+ doc.addInt("numbers", -20);
doc.addAtom("id", "two");
w.addDocument(doc);
@@ -511,7 +505,7 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
types.setMultiValued("strings");
- types.enableSorted("strings");
+ types.enableSorting("strings");
types.enableStored("id");
Document2 doc = new Document2(types);
@@ -597,7 +591,7 @@ public class TestDocument2 extends Lucen
}
assertFalse(types.getMultiValued("numeric"));
Document2 doc = new Document2(types);
- doc.addNumber("numeric", 17);
+ doc.addInt("numeric", 17);
w.addDocument(doc);
w.close();
dir.close();
@@ -644,7 +638,7 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
Document2 doc = new Document2(types);
- doc.addNumber("id", 1L);
+ doc.addLong("id", 1L);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w, true);
@@ -665,7 +659,7 @@ public class TestDocument2 extends Lucen
types.setIndexWriter(w);
Document2 doc = new Document2(types);
- doc.addNumber("id", 1);
+ doc.addInt("id", 1);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w, true);
@@ -677,6 +671,26 @@ public class TestDocument2 extends Lucen
dir.close();
}
+ public void testNumericPrecisionStep() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer a = new MockAnalyzer(random());
+ FieldTypes types = new FieldTypes(a);
+ IndexWriterConfig iwc = types.getDefaultIndexWriterConfig();
+ IndexWriter w = new IndexWriter(dir, iwc);
+ types.setIndexWriter(w);
+ types.setNumericPrecisionStep("long", 4);
+
+ Document2 doc = new Document2(types);
+ doc.addLong("long", 17);
+ w.addDocument(doc);
+
+ IndexReader r = DirectoryReader.open(w, true);
+ assertEquals(16, MultiFields.getTerms(r, "long").size());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBinaryTermQuery() throws Exception {
Directory dir = newDirectory();
Analyzer a = new MockAnalyzer(random());
@@ -709,7 +723,7 @@ public class TestDocument2 extends Lucen
types.setDocValuesFormat("id", "Memory");
types.enableStored("id");
- types.enableSorted("id");
+ types.enableSorting("id");
Document2 doc = new Document2(types);
doc.addAtom("id", "1");
@@ -815,7 +829,71 @@ public class TestDocument2 extends Lucen
Document2 doc = new Document2(types);
doc.addAtom("id", "foo bar");
w.addDocument(doc);
- BaseTokenStreamTestCase.assertTokenStreamContents(types.getAnalyzer().tokenStream("id", "foo bar"), new String[] {"foo bar"}, new int[1], new int[] {7});
+ BaseTokenStreamTestCase.assertTokenStreamContents(types.getQueryAnalyzer().tokenStream("id", "foo bar"), new String[] {"foo bar"}, new int[1], new int[] {7});
+ w.close();
+ dir.close();
+ }
+
+ public void testHighlight() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer a = new MockAnalyzer(random());
+ FieldTypes types = new FieldTypes(a);
+ IndexWriterConfig iwc = types.getDefaultIndexWriterConfig();
+ IndexWriter w = new IndexWriter(dir, iwc);
+ types.setIndexWriter(w);
+ types.disableHighlighting("no_highlight");
+
+ Document2 doc = new Document2(types);
+ doc.addLargeText("highlight", "here is some content");
+ doc.addLargeText("no_highlight", "here is some content");
+ w.addDocument(doc);
+
+ // nocommit: we can't actually run highlighter ... w/o being outside core ... maybe this test should be elsewhere?
+ IndexReader r = DirectoryReader.open(w, true);
+ assertTrue(MultiFields.getTerms(r, "highlight").hasOffsets());
+ assertFalse(MultiFields.getTerms(r, "no_highlight").hasOffsets());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testAnalyzerPositionGap() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer a = new MockAnalyzer(random());
+ FieldTypes types = new FieldTypes(a);
+ IndexWriterConfig iwc = types.getDefaultIndexWriterConfig();
+ IndexWriter w = new IndexWriter(dir, iwc);
+ types.setIndexWriter(w);
+ types.setIndexOptions("nogap", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ types.setMultiValued("nogap");
+ types.disableHighlighting("nogap");
+ types.setAnalyzerPositionGap("nogap", 0);
+
+ types.setIndexOptions("onegap", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ types.setMultiValued("onegap");
+ types.disableHighlighting("onegap");
+ types.setAnalyzerPositionGap("onegap", 1);
+
+ Document2 doc = new Document2(types);
+ doc.addLargeText("nogap", "word1");
+ doc.addLargeText("nogap", "word2");
+ doc.addLargeText("onegap", "word1");
+ doc.addLargeText("onegap", "word2");
+ w.addDocument(doc);
+
+ IndexReader r = DirectoryReader.open(w, true);
+ IndexSearcher s = newSearcher(r);
+
+ PhraseQuery q = new PhraseQuery();
+ q.add(new Term("nogap", "word1"));
+ q.add(new Term("nogap", "word2"));
+ assertEquals(1, s.search(q, 1).totalHits);
+
+ q = new PhraseQuery();
+ q.add(new Term("onegap", "word1"));
+ q.add(new Term("onegap", "word2"));
+ assertEquals(0, s.search(q, 1).totalHits);
+ r.close();
w.close();
dir.close();
}
@@ -827,4 +905,6 @@ public class TestDocument2 extends Lucen
// nocommit test for pre-analyzed
// nocommit test multi-valued
+
+ // nocommit test serialize
}