You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by sh...@apache.org on 2015/07/12 05:30:18 UTC
svn commit: r1690423 - in
/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src:
main/java/org/apache/manifoldcf/agents/output/lucene/
main/native2ascii/org/apache/manifoldcf/agents/output/lucene/
main/resources/org/apache/manifoldcf/age...
Author: shinichiro
Date: Sun Jul 12 03:30:17 2015
New Revision: 1690423
URL: http://svn.apache.org/r1690423
Log:
add term_vector option, addTextField() using Reader and addStoredField() using BytesRef
Modified:
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html
manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java Sun Jul 12 03:30:17 2015
@@ -24,6 +24,7 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@@ -67,7 +68,7 @@ public class LuceneClient implements Clo
private final Map<String,Map<String,Object>> fieldsInfo;
private final String idField;
private final String contentField;
- private final Long maximumDocumentLength;
+ private final Long maxDocumentLength;
private final String versionString;
@@ -97,24 +98,36 @@ public class LuceneClient implements Clo
public static final String ATTR_STORE = "store";
public static final String ATTR_INDEX_ANALYZER = "index_analyzer";
public static final String ATTR_QUERY_ANALYZER = "query_analyzer";
+ public static final String ATTR_TERM_VECTOR = "term_vector";
public static final String ATTR_COPY_TO = "copy_to";
- public static final String FIELDTYPE_STRING = "string";
- public static final String FIELDTYPE_TEXT = "text";
+ public static enum FieldType {
+ STRING, TEXT;
+ @Override public String toString() {
+ return name().toLowerCase(Locale.ROOT);
+ }
+ }
+
+ public static enum TermVector {
+ NO, YES, WITH_POSITIONS, WITH_OFFSETS, WITH_POSITIONS_OFFSETS;
+ @Override public String toString() {
+ return name().toLowerCase(Locale.ROOT);
+ }
+ }
public LuceneClient(Path path) throws IOException {
this(path,
LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(),
LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
LuceneClient.defaultIdField(), LuceneClient.defaultContentField(),
- LuceneClient.defaultMaximumDocumentLength());
+ LuceneClient.defaultMaxDocumentLength());
}
public LuceneClient(Path path,
String charfilters, String tokenizers, String filters,
String analyzers, String fields,
String idField, String contentField,
- Long maximumDocumentLength) throws IOException {
+ Long maxDocumentLength) throws IOException {
this.path = Preconditions.checkNotNull(path);
this.charfiltersInfo = parseAsMap(Preconditions.checkNotNull(charfilters));
this.tokenizersInfo = parseAsMap(Preconditions.checkNotNull(tokenizers));
@@ -123,9 +136,9 @@ public class LuceneClient implements Clo
this.fieldsInfo = parseAsMap(Preconditions.checkNotNull(fields));
this.idField = Preconditions.checkNotNull(idField);
this.contentField = Preconditions.checkNotNull(contentField);
- this.maximumDocumentLength = Preconditions.checkNotNull(maximumDocumentLength);
+ this.maxDocumentLength = Preconditions.checkNotNull(maxDocumentLength);
- this.versionString = createVersionString(path, charfiltersInfo, tokenizersInfo, filtersInfo, analyzersInfo, fieldsInfo, idField, contentField, maximumDocumentLength);
+ this.versionString = createVersionString(path, charfiltersInfo, tokenizersInfo, filtersInfo, analyzersInfo, fieldsInfo, idField, contentField, maxDocumentLength);
Map<String,Analyzer> analyzersMap = createAnalyzersMap();
Map<String,Analyzer> fieldIndexAnalyzers = createFieldAnalyzers(analyzersMap, ATTR_INDEX_ANALYZER);
@@ -221,7 +234,7 @@ public class LuceneClient implements Clo
private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) {
Map<String,Analyzer> fieldAnalyzers = Maps.newHashMap();
for (Map.Entry<String,Map<String,Object>> e : fieldsInfo.entrySet()) {
- if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FIELDTYPE_TEXT)) {
+ if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FieldType.TEXT.toString())) {
String field = e.getKey();
String analyzer = e.getValue().get(target).toString();
fieldAnalyzers.put(field, analyzersMap.get(analyzer));
@@ -256,8 +269,8 @@ public class LuceneClient implements Clo
return contentField;
}
- public Long maximumDocumentLength() {
- return maximumDocumentLength;
+ public Long maxDocumentLength() {
+ return maxDocumentLength;
}
public String versionString() {
@@ -272,7 +285,7 @@ public class LuceneClient implements Clo
Map<String,Map<String,Object>> analyzersInfo,
Map<String,Map<String,Object>> fieldsInfo,
String idField,String contentField,
- Long maximumDocumentLength) {
+ Long maxDocumentLength) {
return LuceneConfig.PARAM_PATH + ":" + path.toString() + "+"
+ LuceneConfig.PARAM_CHARFILTERS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(charfiltersInfo) + "+"
+ LuceneConfig.PARAM_TOKENIZERS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(tokenizersInfo) + "+"
@@ -281,7 +294,7 @@ public class LuceneClient implements Clo
+ LuceneConfig.PARAM_FIELDS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(fieldsInfo) + "+"
+ LuceneConfig.PARAM_IDFIELD + ":" + idField + "+"
+ LuceneConfig.PARAM_CONTENTFIELD + ":" + contentField + "+"
- + LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH + ":" + maximumDocumentLength.toString();
+ + LuceneConfig.PARAM_MAXDOCUMENTLENGTH + ":" + maxDocumentLength.toString();
}
public void refresh() throws IOException {
@@ -427,12 +440,12 @@ public class LuceneClient implements Clo
public static String defaultFields() {
String fields =
"{" + "\n"
- + " \"id\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
- + " \"cat\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
- + " \"author\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
- + " \"content\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":true,\""+ATTR_INDEX_ANALYZER+"\":\"text_general\",\""+ATTR_QUERY_ANALYZER+"\":\"text_general\",\""+ATTR_COPY_TO+"\":[\"content_ws\", \"content_ngram\"]}," + "\n"
- + " \"content_ws\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ws\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ws\"}," + "\n"
- + " \"content_ngram\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ngram\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ngram\"}" + "\n"
+ + " \"id\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+ + " \"cat\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+ + " \"author\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+ + " \"content\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":true,\""+ATTR_INDEX_ANALYZER+"\":\"text_general\",\""+ATTR_QUERY_ANALYZER+"\":\"text_general\",\""+ ATTR_TERM_VECTOR +"\":\""+ TermVector.WITH_POSITIONS_OFFSETS.toString() +"\",\""+ATTR_COPY_TO+"\":[\"content_ws\", \"content_ngram\"]}," + "\n"
+ + " \"content_ws\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ws\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ws\"}," + "\n"
+ + " \"content_ngram\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ngram\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ngram\"}" + "\n"
+ "}";
return fields;
}
@@ -445,7 +458,7 @@ public class LuceneClient implements Clo
return "content";
}
- public static Long defaultMaximumDocumentLength() {
+ public static Long defaultMaxDocumentLength() {
return new Long(700000000L);
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java Sun Jul 12 03:30:17 2015
@@ -16,17 +16,17 @@ public class LuceneClientManager {
String charfilters, String tokenizers, String filters,
String analyzers, String fields,
String idField, String contentField,
- Long maximumDocumentLength) throws Exception
+ Long maxDocumentLength) throws Exception
{
LuceneClient client = clients.get(path);
if (client == null) {
- return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maximumDocumentLength);
+ return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maxDocumentLength);
}
if (client != null) {
if (!client.isOpen()) {
- return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maximumDocumentLength);
+ return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maxDocumentLength);
}
String latestVersion = LuceneClient.createVersionString(
new File(path).toPath(),
@@ -35,7 +35,7 @@ public class LuceneClientManager {
LuceneClient.parseAsMap(filters),
LuceneClient.parseAsMap(analyzers),
LuceneClient.parseAsMap(fields),
- idField, contentField, maximumDocumentLength);
+ idField, contentField, maxDocumentLength);
String activeVersion = client.versionString();
if (!activeVersion.equals(latestVersion)) {
throw new IllegalStateException("The connection on this path is active. Can not update to the latest settings."
@@ -51,11 +51,11 @@ public class LuceneClientManager {
String charfilters, String tokenizers, String filters,
String analyzers, String fields,
String idField, String contentField,
- Long maximumDocumentLength) throws Exception
+ Long maxDocumentLength) throws Exception
{
LuceneClient client = new LuceneClient(new File(path).toPath(),
charfilters, tokenizers, filters, analyzers, fields,
- idField, contentField, maximumDocumentLength);
+ idField, contentField, maxDocumentLength);
clients.put(path, client);
return client;
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java Sun Jul 12 03:30:17 2015
@@ -29,5 +29,5 @@ public class LuceneConfig
public static final String PARAM_IDFIELD = "idfield";
public static final String PARAM_CONTENTFIELD = "contentfield";
- public static final String PARAM_MAXIMUMDOCUMENTLENGTH = "maximumdocumentlength";
+ public static final String PARAM_MAXDOCUMENTLENGTH = "maxdocumentlength";
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java Sun Jul 12 03:30:17 2015
@@ -17,9 +17,6 @@
package org.apache.manifoldcf.agents.output.lucene;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -162,16 +159,16 @@ public class LuceneConnector extends org
if (contentField == null)
throw new ManifoldCFException("content field not configured");
- final String maxDocumentLength = params.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
- if (maxDocumentLength == null)
- throw new ManifoldCFException("maximum document length not configured");
- Long maximumDocumentLength = new Long(maxDocumentLength);
+ final String maxDocLength = params.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+ if (maxDocLength == null)
+ throw new ManifoldCFException("max document length not configured");
+ Long maxDocumentLength = new Long(maxDocLength);
try
{
client = LuceneClientManager.getClient(path,
charfilters, tokenizers, filters, analyzers, fields,
- idField, contentField, maximumDocumentLength);
+ idField, contentField, maxDocumentLength);
}
catch (Exception e)
{
@@ -279,7 +276,7 @@ public class LuceneConnector extends org
long length, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption {
getSession();
- if (length > client.maximumDocumentLength())
+ if (length > client.maxDocumentLength())
return false;
return true;
}
@@ -316,12 +313,14 @@ public class LuceneConnector extends org
* only if there's a stream error reading the document data.
*/
@Override
- public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
- throws ManifoldCFException, ServiceInterruption, IOException
+ public int addOrReplaceDocumentWithException(String documentURI,
+ VersionContext pipelineDescription, RepositoryDocument document,
+ String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException
{
getSession();
- if (client.maximumDocumentLength() != null && document.getBinaryLength() > client.maximumDocumentLength().longValue()){
+ if (document.getBinaryLength() > client.maxDocumentLength().longValue()){
activities.recordActivity(null, INGEST_ACTIVITY, null, documentURI, activities.EXCLUDED_LENGTH, "Lucene connector rejected document due to its big size: ('"+document.getBinaryLength()+"')");
return DOCUMENTSTATUS_REJECTED;
}
@@ -349,17 +348,7 @@ public class LuceneConnector extends org
try
{
- Reader r = new InputStreamReader(document.getBinaryStream(), StandardCharsets.UTF_8);
- StringBuilder sb = new StringBuilder((int)document.getBinaryLength());
- char[] buffer = new char[65536];
- while (true)
- {
- int amt = r.read(buffer,0,buffer.length);
- if (amt == -1)
- break;
- sb.append(buffer,0,amt);
- }
- doc = LuceneDocument.addField(doc, client.contentField(), sb.toString(), client.fieldsInfo());
+ doc = LuceneDocument.addField(doc, client.contentField(), document.getBinaryStream(), client.fieldsInfo());
} catch (Exception e) {
if (e instanceof IOException) {
Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer " + documentURI, e);
@@ -407,8 +396,9 @@ public class LuceneConnector extends org
* processing activity.
*/
@Override
- public void removeDocument(String documentURI, String outputDescription, IOutputRemoveActivity activities)
- throws ManifoldCFException, ServiceInterruption
+ public void removeDocument(String documentURI, String outputDescription,
+ IOutputRemoveActivity activities)
+ throws ManifoldCFException, ServiceInterruption
{
getSession();
@@ -533,10 +523,10 @@ public class LuceneConnector extends org
contentField = LuceneClient.defaultContentField();
map.put(LuceneConfig.PARAM_CONTENTFIELD, contentField);
- String maximumDocumentLength = configParams.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
- if (maximumDocumentLength == null)
- maximumDocumentLength = LuceneClient.defaultMaximumDocumentLength().toString();
- map.put(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH, maximumDocumentLength);
+ String maxDocumentLength = configParams.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+ if (maxDocumentLength == null)
+ maxDocumentLength = LuceneClient.defaultMaxDocumentLength().toString();
+ map.put(LuceneConfig.PARAM_MAXDOCUMENTLENGTH, maxDocumentLength);
return map;
}
@@ -595,9 +585,9 @@ public class LuceneConnector extends org
String contentFields = variableContext.getParameter(LuceneConfig.PARAM_CONTENTFIELD);
if (contentFields != null)
parameters.setParameter(LuceneConfig.PARAM_CONTENTFIELD, contentFields);
- String maximumDocumentLength = variableContext.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
- if (maximumDocumentLength != null)
- parameters.setParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH, maximumDocumentLength);
+ String maxDocumentLength = variableContext.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+ if (maxDocumentLength != null)
+ parameters.setParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH, maxDocumentLength);
return null;
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java Sun Jul 12 03:30:17 2015
@@ -16,6 +16,12 @@
*/
package org.apache.manifoldcf.agents.output.lucene;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -26,64 +32,204 @@ import org.apache.lucene.document.Field.
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.util.BytesRef;
+import org.apache.manifoldcf.agents.output.lucene.LuceneClient.TermVector;
+
import com.google.common.base.Objects;
+import com.google.common.io.ByteArrayDataInput;
+import com.google.common.io.ByteSource;
+import com.google.common.io.ByteStreams;
public class LuceneDocument {
private Document doc;
- private static final FieldType TYPE_STORED_WITH_TV = new FieldType(TextField.TYPE_STORED);
+ private static final FieldType STORED = new FieldType();
+ static {
+ STORED.setOmitNorms(false);
+ STORED.setIndexOptions(IndexOptions.NONE);
+ STORED.setTokenized(false);
+ STORED.setStored(true);
+ STORED.freeze();
+ }
+
+ private static final FieldType STRING_NOT_STORED = new FieldType();
+ static {
+ STRING_NOT_STORED.setOmitNorms(true);
+ STRING_NOT_STORED.setIndexOptions(IndexOptions.DOCS);
+ STRING_NOT_STORED.setTokenized(false);
+ STRING_NOT_STORED.setStored(false);
+ STRING_NOT_STORED.freeze();
+ }
+
+ private static final FieldType TEXT_NOT_STORED = new FieldType();
+ static {
+ TEXT_NOT_STORED.setOmitNorms(false);
+ TEXT_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ TEXT_NOT_STORED.setTokenized(true);
+ TEXT_NOT_STORED.setStored(false);
+ TEXT_NOT_STORED.freeze();
+ }
+
+ @Deprecated
+ private static final FieldType TEXT_STORED_WITH_TV = new FieldType(TextField.TYPE_STORED);
+ static {
+ TEXT_STORED_WITH_TV.setStoreTermVectors(true);
+ TEXT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
+ TEXT_STORED_WITH_TV.setStoreTermVectorPositions(true);
+ TEXT_STORED_WITH_TV.freeze();
+ }
+
+ @Deprecated
+ private static final FieldType TEXT_NOT_STORED_WITH_TV = new FieldType(TEXT_NOT_STORED);
+ static {
+ TEXT_NOT_STORED_WITH_TV.setStoreTermVectors(true);
+ TEXT_NOT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
+ TEXT_NOT_STORED_WITH_TV.setStoreTermVectorPositions(true);
+ TEXT_NOT_STORED_WITH_TV.freeze();
+ }
+
+ private static final FieldType TEXT_NOT_STORED_WITH_TV_YES = new FieldType(TEXT_NOT_STORED);
+ static {
+ TEXT_NOT_STORED_WITH_TV_YES.setStoreTermVectors(true);
+ TEXT_NOT_STORED_WITH_TV_YES.freeze();
+ }
+
+ private static final FieldType TEXT_NOT_STORED_WITH_TV_POSITIONS = new FieldType(TEXT_NOT_STORED);
+ static {
+ TEXT_NOT_STORED_WITH_TV_POSITIONS.setStoreTermVectors(true);
+ TEXT_NOT_STORED_WITH_TV_POSITIONS.setStoreTermVectorPositions(true);
+ TEXT_NOT_STORED_WITH_TV_POSITIONS.freeze();
+ }
+
+ private static final FieldType TEXT_NOT_STORED_WITH_TV_OFFSETS = new FieldType(TEXT_NOT_STORED);
static {
- TYPE_STORED_WITH_TV.setStoreTermVectors(true);
- TYPE_STORED_WITH_TV.setStoreTermVectorOffsets(true);
- TYPE_STORED_WITH_TV.setStoreTermVectorPositions(true);
- TYPE_STORED_WITH_TV.freeze();
+ TEXT_NOT_STORED_WITH_TV_OFFSETS.setStoreTermVectors(true);
+ TEXT_NOT_STORED_WITH_TV_OFFSETS.setStoreTermVectorOffsets(true);
+ TEXT_NOT_STORED_WITH_TV_OFFSETS.freeze();
}
- private static final FieldType TYPE_NOT_STORED_WITH_TV = new FieldType(TextField.TYPE_NOT_STORED);
+ private static final FieldType TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS = new FieldType(TEXT_NOT_STORED);
static {
- TYPE_NOT_STORED_WITH_TV.setStoreTermVectors(true);
- TYPE_NOT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
- TYPE_NOT_STORED_WITH_TV.setStoreTermVectorPositions(true);
- TYPE_NOT_STORED_WITH_TV.freeze();
+ TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectors(true);
+ TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectorPositions(true);
+ TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectorOffsets(true);
+ TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.freeze();
}
public LuceneDocument() {
doc = new Document();
}
+ @Deprecated
public LuceneDocument addStringField(String name, String value, boolean store) {
Store stored = (store) ? Field.Store.YES : Field.Store.NO;
doc.add(new StringField(name, value, stored));
return this;
}
+ @Deprecated
public LuceneDocument addTextField(String name, String value, boolean store) {
- FieldType type = (store) ? TYPE_STORED_WITH_TV : TYPE_NOT_STORED_WITH_TV;
+ FieldType type = (store) ? TEXT_STORED_WITH_TV : TEXT_NOT_STORED_WITH_TV;
doc.add(new Field(name, value, type));
return this;
}
+ public LuceneDocument addStringField(String name, BytesRef value) {
+ doc.add(new Field(name, value, STRING_NOT_STORED));
+ return this;
+ }
+
+ public LuceneDocument addTextField(String name, Reader value, String termvector) {
+ FieldType ftype = null;
+ if (termvector.equals(TermVector.NO.toString())) {
+ ftype = TEXT_NOT_STORED;
+ } else if (termvector.equals(TermVector.YES.toString())) {
+ ftype = TEXT_NOT_STORED_WITH_TV_YES;
+ } else if (termvector.equals(TermVector.WITH_POSITIONS.toString())) {
+ ftype = TEXT_NOT_STORED_WITH_TV_POSITIONS;
+ } else if (termvector.equals(TermVector.WITH_OFFSETS.toString())) {
+ ftype = TEXT_NOT_STORED_WITH_TV_OFFSETS;
+ } else if (termvector.equals(TermVector.WITH_POSITIONS_OFFSETS.toString())) {
+ ftype = TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS;
+ }
+ doc.add(new Field(name, value, ftype));
+ return this;
+ }
+
+ public LuceneDocument addStoredField(String name, BytesRef value) {
+ doc.add(new Field(name, value, STORED));
+ return this;
+ }
+
public Document toDocument() {
return doc;
}
- public static LuceneDocument addField(LuceneDocument from, String field, String value, Map<String,Map<String,Object>> fieldsInfo) {
+ @Deprecated
+ public static LuceneDocument addFieldDeprecated(LuceneDocument from, String field, String value, Map<String,Map<String,Object>> fieldsInfo) {
String fieldtype = (String)fieldsInfo.get(field).get(LuceneClient.ATTR_FIELDTYPE);
boolean store = (boolean)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_STORE), false);
- if (fieldtype.equals(LuceneClient.FIELDTYPE_TEXT)) {
+ if (fieldtype.equals(LuceneClient.FieldType.TEXT.toString())) {
from.addTextField(field, value, store);
- } else if (fieldtype.equals(LuceneClient.FIELDTYPE_STRING)) {
+ } else if (fieldtype.equals(LuceneClient.FieldType.STRING.toString())) {
from.addStringField(field, value, store);
}
@SuppressWarnings("unchecked")
List<String> copyFields = (List<String>)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_COPY_TO), new ArrayList<String>());
for (String tofield : copyFields) {
- from = addField(from, tofield, value, fieldsInfo);
+ from = addFieldDeprecated(from, tofield, value, fieldsInfo);
}
return from;
}
+ public static LuceneDocument addField(LuceneDocument from, String field, Object value, Map<String,Map<String,Object>> fieldsInfo) throws IOException {
+ String type = (String)fieldsInfo.get(field).get(LuceneClient.ATTR_FIELDTYPE);
+ boolean store = (boolean)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_STORE), false);
+ String termvector = (String)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_TERM_VECTOR), TermVector.NO.toString());
+ @SuppressWarnings("unchecked")
+ List<String> copyFields = (List<String>)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_COPY_TO), new ArrayList<String>());
+
+ if (value instanceof InputStream) {
+ byte[] b = ByteStreams.toByteArray((InputStream)value);
+ BytesRef br = new BytesRef(b);
+
+ if (type.equals(LuceneClient.FieldType.TEXT.toString())) {
+ from.addTextField(field, ByteSource.wrap(BytesRef.deepCopyOf(br).bytes).asCharSource(StandardCharsets.UTF_8).openBufferedStream(), termvector);
+ } else if (type.equals(LuceneClient.FieldType.STRING.toString())) {
+ from.addStringField(field, BytesRef.deepCopyOf(br));
+ }
+ if (store) {
+ from.addStoredField(field, BytesRef.deepCopyOf(br));
+ }
+ for (String tofield : copyFields) {
+ InputStream toValue = new ByteArrayInputStream(BytesRef.deepCopyOf(br).bytes);
+ from = addField(from, tofield, toValue, fieldsInfo);
+ }
+ }
+
+ if (value instanceof String) {
+ byte[] b = value.toString().getBytes(StandardCharsets.UTF_8);
+ BytesRef br = new BytesRef(b);
+
+ if (type.equals(LuceneClient.FieldType.TEXT.toString())) {
+ from.addTextField(field, new StringReader(BytesRef.deepCopyOf(br).utf8ToString()), termvector);
+ } else if (type.equals(LuceneClient.FieldType.STRING.toString())) {
+ from.addStringField(field, BytesRef.deepCopyOf(br));
+ }
+ if (store) {
+ from.addStoredField(field, BytesRef.deepCopyOf(br));
+ }
+ for (String tofield : copyFields) {
+ String toValue = new String(BytesRef.deepCopyOf(br).bytes, StandardCharsets.UTF_8);
+ from = addField(from, tofield, toValue, fieldsInfo);
+ }
+ }
+
+ return from;
+ }
+
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties Sun Jul 12 03:30:17 2015
@@ -25,7 +25,7 @@ LuceneConnector.Analyzers=Analyzers
LuceneConnector.Fields=Fields
LuceneConnector.Idfield=id field name
LuceneConnector.Contentfield=content field name
-LuceneConnector.Maximumdocumentlength=Maximum document length
+LuceneConnector.Maxdocumentlength=Max document length
LuceneConnector.PleaseSupplyValidPath=Path can't be empty. Please supply a valid path
@@ -36,6 +36,6 @@ LuceneConnector.PleaseSupplyValidAnalyze
LuceneConnector.PleaseSupplyValidFields=Fields can't be empty. Please supply a valid fields
LuceneConnector.PleaseSupplyValidIdfield=Idfield can't be empty. Please supply a valid idfield
LuceneConnector.PleaseSupplyValidContentfield=Contentfield can't be empty. Please supply a valid contentfield
-LuceneConnector.PleaseSupplyValidMaximumdocumentlength=Maximum document length can't be empty. Please supply a valid maximum document length
-LuceneConnector.MaximumDocumentLengthMustBeAnInteger=Maximum document length must be an integer
+LuceneConnector.PleaseSupplyValidMaxdocumentlength=Max document length can't be empty. Please supply a valid max document length
+LuceneConnector.MaxDocumentLengthMustBeAnInteger=Max document length must be an integer
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js Sun Jul 12 03:30:17 2015
@@ -74,15 +74,15 @@ function checkConfig() {
return false;
}
}
- if (editconnection.maximumdocumentlength) {
- if (editconnection.maximumdocumentlength.value == "") {
- alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaximumdocumentlength'))");
- editconnection.maximumdocumentlength.focus();
+ if (editconnection.maxdocumentlength) {
+ if (editconnection.maxdocumentlength.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaxdocumentlength'))");
+ editconnection.maxdocumentlength.focus();
return false;
}
- if (editconnection.maximumdocumentlength.value != "" && !isInteger(editconnection.maximumdocumentlength.value)) {
- alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaximumDocumentLengthMustBeAnInteger'))");
- editconnection.maximumdocumentlength.focus();
+ if (editconnection.maxdocumentlength.value != "" && !isInteger(editconnection.maxdocumentlength.value)) {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaxDocumentLengthMustBeAnInteger'))");
+ editconnection.maxdocumentlength.focus();
return false;
}
}
@@ -154,17 +154,17 @@ function checkConfigForSave() {
return false;
}
}
- if (editconnection.maximumdocumentlength) {
- if (editconnection.maximumdocumentlength.value == "") {
- alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaximumdocumentlength'))");
+ if (editconnection.maxdocumentlength) {
+ if (editconnection.maxdocumentlength.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaxdocumentlength'))");
SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('LuceneConnector.Parameters'))");
- editconnection.maximumdocumentlength.focus();
+ editconnection.maxdocumentlength.focus();
return false;
}
- if (editconnection.maximumdocumentlength.value != "" && !isInteger(editconnection.maximumdocumentlength.value)) {
- alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaximumDocumentLengthMustBeAnInteger'))");
+ if (editconnection.maxdocumentlength.value != "" && !isInteger(editconnection.maxdocumentlength.value)) {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaxDocumentLengthMustBeAnInteger'))");
SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('LuceneConnector.Parameters'))");
- editconnection.maximumdocumentlength.focus();
+ editconnection.maxdocumentlength.focus();
return false;
}
}
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html Sun Jul 12 03:30:17 2015
@@ -84,10 +84,10 @@
</tr>
<tr>
<td class="description">
- $Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maximumdocumentlength'))
+ $Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maxdocumentlength'))
</td>
- <td class="value"><input name="maximumdocumentlength" type="text"
- value="$Encoder.attributeEscape($MAXIMUMDOCUMENTLENGTH)" size="48" />
+ <td class="value"><input name="maxdocumentlength" type="text"
+ value="$Encoder.attributeEscape($MAXDOCUMENTLENGTH)" size="48" />
</td>
</tr>
</table>
@@ -102,6 +102,6 @@
<input type="hidden" name="fields" value="$Encoder.attributeEscape($FIELDS)" />
<input type="hidden" name="idfield" value="$Encoder.attributeEscape($IDFIELD)" />
<input type="hidden" name="contentfield" value="$Encoder.attributeEscape($CONTENTFIELD)" />
-<input type="hidden" name="maximumdocumentlength" value="$Encoder.attributeEscape($MAXIMUMDOCUMENTLENGTH)" />
+<input type="hidden" name="maxdocumentlength" value="$Encoder.attributeEscape($MAXDOCUMENTLENGTH)" />
#end
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html Sun Jul 12 03:30:17 2015
@@ -49,7 +49,7 @@
<td class="value">$Encoder.bodyEscape($CONTENTFIELD)</td>
</tr>
<tr>
- <td class="description">$Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maximumdocumentlength'))</td>
- <td class="value">$Encoder.bodyEscape($MAXIMUMDOCUMENTLENGTH)</td>
+ <td class="description">$Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maxdocumentlength'))</td>
+ <td class="value">$Encoder.bodyEscape($MAXDOCUMENTLENGTH)</td>
</tr>
</table>
\ No newline at end of file
Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java Sun Jul 12 03:30:17 2015
@@ -18,6 +18,8 @@ package org.apache.manifoldcf.agents.out
import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@@ -42,6 +44,7 @@ import org.junit.Before;
import org.junit.Test;
import com.google.common.base.StandardSystemProperty;
+import com.google.common.io.ByteSource;
import static org.junit.Assert.*;
import static org.hamcrest.CoreMatchers.*;
@@ -104,8 +107,8 @@ public class LuceneClientTest {
IndexSearcher searcher = client.newSearcher();
assertThat(searcher.count(new MatchAllDocsQuery()), is(0));
- IndexSearcher realtimeSearcher = client.newRealtimeSearcher();
- assertThat(realtimeSearcher.count(new MatchAllDocsQuery()), is(0));
+
+ assertThat(client.newRealtimeSearcher().count(new MatchAllDocsQuery()), is(0));
client.close();
}
@@ -115,12 +118,12 @@ public class LuceneClientTest {
LuceneClient client1 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
- LuceneClient.defaultIdField(), LuceneClient.defaultContentField(), LuceneClient.defaultMaximumDocumentLength());
+ LuceneClient.defaultIdField(), LuceneClient.defaultContentField(), LuceneClient.defaultMaxDocumentLength());
assertThat(client1.isOpen(), is(true));
LuceneClient client2 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
- "id", "content", LuceneClient.defaultMaximumDocumentLength());
+ "id", "content", LuceneClient.defaultMaxDocumentLength());
assertThat(client2.isOpen(), is(true));
assertThat(client1, is(client2));
@@ -129,7 +132,7 @@ public class LuceneClientTest {
try {
client3 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
- "dummy_id", "dummy_content", LuceneClient.defaultMaximumDocumentLength());
+ "dummy_id", "dummy_content", LuceneClient.defaultMaxDocumentLength());
fail("Should not get here");
} catch (Exception e) {
assert e instanceof IllegalStateException;
@@ -142,7 +145,7 @@ public class LuceneClientTest {
client3 =
LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
- "dummy_id", "dummy_content", LuceneClient.defaultMaximumDocumentLength());
+ "dummy_id", "dummy_content", LuceneClient.defaultMaxDocumentLength());
assertThat(client3.isOpen(), is(true));
assertThat(client3, not(client1));
@@ -154,43 +157,43 @@ public class LuceneClientTest {
String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"addorreplace-index";
try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
// add
- LuceneDocument doc1 = new LuceneDocument()
- .addStringField(ID, "/repo/001", true)
- .addTextField(CONTENT, "green", true);
+ LuceneDocument doc1 = new LuceneDocument();
+ doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+ doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap("green".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/001", doc1);
- LuceneDocument doc2 = new LuceneDocument()
- .addStringField(ID, "/repo/002", true)
- .addTextField(CONTENT, "yellow", true);
+ LuceneDocument doc2 = new LuceneDocument();
+ doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+ doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap("yellow".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/002", doc2);
- client.optimize();
- IndexSearcher searcher = client.newSearcher();
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(1));
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(1));
-
- // update
- LuceneDocument updateDoc = new LuceneDocument()
- .addStringField(ID, "/repo/001", true)
- .addTextField(CONTENT, "yellow", true);
- client.addOrReplace("/repo/001", updateDoc);
-
- client.optimize();
- searcher = client.newSearcher();
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
-
- // add
- LuceneDocument addDoc = new LuceneDocument()
- .addStringField(ID, "/repo/100", true)
- .addTextField(CONTENT, "red", true);
- client.addOrReplace("/repo/100", addDoc);
-
- client.optimize();
- searcher = client.newSearcher();
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
- assertThat(searcher.count(new TermQuery(new Term(CONTENT, "red"))), is(1));
+ client.optimize();
+ IndexSearcher searcher = client.newSearcher();
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(1));
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(1));
+
+ // update
+ LuceneDocument updateDoc = new LuceneDocument();
+ updateDoc = LuceneDocument.addField(updateDoc, ID, "/repo/001", client.fieldsInfo());
+ updateDoc = LuceneDocument.addField(updateDoc, CONTENT, ByteSource.wrap("yellow".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
+ client.addOrReplace("/repo/001", updateDoc);
+
+ client.optimize();
+ searcher = client.newSearcher();
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
+
+ // add
+ LuceneDocument addDoc = new LuceneDocument();
+ addDoc = LuceneDocument.addField(addDoc, ID, "/repo/100", client.fieldsInfo());
+ addDoc = LuceneDocument.addField(addDoc, CONTENT, ByteSource.wrap("red".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
+ client.addOrReplace("/repo/100", addDoc);
+
+ client.optimize();
+ searcher = client.newSearcher();
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
+ assertThat(searcher.count(new TermQuery(new Term(CONTENT, "red"))), is(1));
}
}
@@ -199,14 +202,14 @@ public class LuceneClientTest {
String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"remove-index";
try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
- LuceneDocument doc1 = new LuceneDocument()
- .addStringField(ID, "/repo/001", true)
- .addTextField(CONTENT, "Apache", true);
+ LuceneDocument doc1 = new LuceneDocument();
+ doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+ doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap("Apache".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/001", doc1);
- LuceneDocument doc2 = new LuceneDocument()
- .addStringField(ID, "/repo/002", true)
- .addTextField(CONTENT, "Apache", true);
+ LuceneDocument doc2 = new LuceneDocument();
+ doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+ doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap("Apache".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/002", doc2);
client.optimize();
@@ -227,21 +230,21 @@ public class LuceneClientTest {
try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
String content1 = "Apache ManifoldCF, Apache Lucene";
- LuceneDocument doc1 = new LuceneDocument()
- .addStringField(ID, "/repo/001", true)
- .addTextField(CONTENT, content1, true)
- .addTextField("content_ws", content1, false)
- .addTextField("content_ngram", content1, false);
+ LuceneDocument doc1 = new LuceneDocument();
+ doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+ doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap(content1.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/001", doc1);
- LuceneDocument doc2 = new LuceneDocument()
- .addStringField(ID, "/repo/002", true)
- .addTextField(CONTENT, "This is stop word. apache software.", true);
+ String content2 = "This is stop word. apache software.";
+ LuceneDocument doc2 = new LuceneDocument();
+ doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+ doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap(content2.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/002", doc2);
- LuceneDocument doc3 = new LuceneDocument()
- .addStringField(ID, "/repo/003", true)
- .addTextField(CONTENT, "Apache Solr", true);
+ String content3 = "Apache Solr";
+ LuceneDocument doc3 = new LuceneDocument();
+ doc3 = LuceneDocument.addField(doc3, ID, "/repo/003", client.fieldsInfo());
+ doc3 = LuceneDocument.addField(doc3, CONTENT, ByteSource.wrap(content3.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace("/repo/003", doc3);
client.optimize();
@@ -272,17 +275,23 @@ public class LuceneClientTest {
}
assertThat(client.reader().docFreq(new Term(CONTENT, br)), is(3));
+ assertThat(client.reader().getTermVector(docID, "content_ws"), is(nullValue()));
+ assertThat(client.reader().getTermVector(docID, "content_ngram"), is(nullValue()));
+
hits = searcher.search(client.newQuery("id:\\/repo\\/003"), 1);
Document storedDocument = searcher.doc(hits.scoreDocs[0].doc);
- assertThat(storedDocument.getField(CONTENT).stringValue(), is("Apache Solr"));
+ assertThat(storedDocument.getField(CONTENT).binaryValue().utf8ToString(), is("Apache Solr"));
+ assertThat(storedDocument.getField(CONTENT).stringValue(), is(nullValue()));
String nrt = "near-real-time";
- LuceneDocument doc4 = new LuceneDocument()
- .addStringField(ID, nrt, true);
+ LuceneDocument doc4 = new LuceneDocument();
+ doc4 = LuceneDocument.addField(doc4, ID, nrt, client.fieldsInfo());
+ doc4 = LuceneDocument.addField(doc4, CONTENT, ByteSource.wrap(nrt.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
client.addOrReplace(nrt, doc4);
ManifoldCF.sleep(1500L);
assertThat(searcher.count(client.newQuery(ID+":"+nrt)), is(0));
- assertThat(client.newSearcher().count(client.newQuery(ID+":"+nrt)), is(0));
+ IndexSearcher searcher2 = client.newSearcher();
+ assertThat(searcher2.count(client.newQuery(ID+":"+nrt)), is(0));
assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+nrt)), is(1));
}
}
@@ -290,10 +299,14 @@ public class LuceneClientTest {
@Test
public void testIndexRepositoryDocument() throws IOException, ManifoldCFException {
String documentURI = "file://dummy/rd";
+ String content = "Classification, categorization, and tagging using Lucene";
+
RepositoryDocument rd = new RepositoryDocument();
rd.addField("cat", "foo");
rd.addField("author", new String[]{ "abe", "obama" });
- rd.addField(CONTENT, "Classification, categorization, and tagging using Lucene");
+ byte[] b = content.getBytes(StandardCharsets.UTF_8);
+ InputStream in = ByteSource.wrap(b).openBufferedStream();
+ rd.setBinary(in, b.length);
String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"rd-index";
try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
@@ -301,6 +314,8 @@ public class LuceneClientTest {
doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
+ doc = LuceneDocument.addField(doc, client.contentField(), rd.getBinaryStream(), client.fieldsInfo());
+
Iterator<String> it = rd.getFields();
while (it.hasNext()) {
String rdField = it.next();