You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2019/04/07 12:31:08 UTC
[lucene-solr] 28/34: Improve sample documents indexing: term
vectors, index options, doc values
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch jira/lucene-2562-luke-swing-3
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 3bbf0f868d2a0b8ee484066d84bd55c0ce524b29
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sun Apr 7 13:28:31 2019 +0900
Improve sample documents indexing: term vectors, index options, doc values
---
.../luke/models/util/twentynewsgroups/Message.java | 28 ++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/util/twentynewsgroups/Message.java b/lucene/luke/src/java/org/apache/lucene/luke/models/util/twentynewsgroups/Message.java
index 85aaaa6..e62d2c0 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/util/twentynewsgroups/Message.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/util/twentynewsgroups/Message.java
@@ -27,11 +27,15 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.util.BytesRef;
/** Data holder class for a newsgroups message */
public class Message {
@@ -119,11 +123,12 @@ public class Message {
if (Objects.nonNull(getNewsgroups())) {
for (String newsgroup : getNewsgroups()) {
doc.add(new StringField("newsgroup", newsgroup, Field.Store.YES));
+ doc.add(new SortedSetDocValuesField("newsgroup_sort", new BytesRef(newsgroup)));
}
}
if (Objects.nonNull(getSubject())) {
- doc.add(new TextField("subject", getSubject(), Field.Store.YES));
+ doc.add(new Field("subject", getSubject(), SUBJECT_FIELD_TYPE));
}
if (Objects.nonNull(getMessageId())) {
@@ -144,7 +149,7 @@ public class Message {
doc.add(new StoredField("lines_raw", String.valueOf(getLines())));
if (Objects.nonNull(getBody())) {
- doc.add(new TextField("body", getBody(), Field.Store.YES));
+ doc.add(new Field("body", getBody(), BODY_FIELD_TYPE));
}
return doc;
@@ -155,4 +160,23 @@ public class Message {
map.put("from", new UAX29URLEmailAnalyzer());
return new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
}
+
+ private final static FieldType SUBJECT_FIELD_TYPE;
+
+ private final static FieldType BODY_FIELD_TYPE;
+
+ static {
+ SUBJECT_FIELD_TYPE = new FieldType();
+ SUBJECT_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ SUBJECT_FIELD_TYPE.setTokenized(true);
+ SUBJECT_FIELD_TYPE.setStored(true);
+
+ BODY_FIELD_TYPE = new FieldType();
+ BODY_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ BODY_FIELD_TYPE.setTokenized(true);
+ BODY_FIELD_TYPE.setStored(true);
+ BODY_FIELD_TYPE.setStoreTermVectors(true);
+ BODY_FIELD_TYPE.setStoreTermVectorPositions(true);
+ BODY_FIELD_TYPE.setStoreTermVectorOffsets(true);
+ }
}