You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by sh...@apache.org on 2015/07/12 05:30:18 UTC

svn commit: r1690423 - in /manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src: main/java/org/apache/manifoldcf/agents/output/lucene/ main/native2ascii/org/apache/manifoldcf/agents/output/lucene/ main/resources/org/apache/manifoldcf/age...

Author: shinichiro
Date: Sun Jul 12 03:30:17 2015
New Revision: 1690423

URL: http://svn.apache.org/r1690423
Log:
add term_vector option, addTextField() using Reader and addStoredField() using BytesRef

Modified:
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html
    manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClient.java Sun Jul 12 03:30:17 2015
@@ -24,6 +24,7 @@ import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
@@ -67,7 +68,7 @@ public class LuceneClient implements Clo
   private final Map<String,Map<String,Object>> fieldsInfo;
   private final String idField;
   private final String contentField;
-  private final Long maximumDocumentLength;
+  private final Long maxDocumentLength;
 
   private final String versionString;
 
@@ -97,24 +98,36 @@ public class LuceneClient implements Clo
   public static final String ATTR_STORE = "store";
   public static final String ATTR_INDEX_ANALYZER = "index_analyzer";
   public static final String ATTR_QUERY_ANALYZER = "query_analyzer";
+  public static final String ATTR_TERM_VECTOR = "term_vector";
   public static final String ATTR_COPY_TO = "copy_to";
 
-  public static final String FIELDTYPE_STRING = "string";
-  public static final String FIELDTYPE_TEXT = "text";
+  public static enum FieldType {
+    STRING, TEXT;
+    @Override public String toString() {
+      return name().toLowerCase(Locale.ROOT);
+    }
+  }
+
+  public static enum TermVector {
+    NO, YES, WITH_POSITIONS, WITH_OFFSETS, WITH_POSITIONS_OFFSETS;
+    @Override public String toString() {
+      return name().toLowerCase(Locale.ROOT);
+    }
+  }
 
   public LuceneClient(Path path) throws IOException {
     this(path,
          LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(),
          LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
          LuceneClient.defaultIdField(), LuceneClient.defaultContentField(),
-         LuceneClient.defaultMaximumDocumentLength());
+         LuceneClient.defaultMaxDocumentLength());
   }
 
   public LuceneClient(Path path,
                       String charfilters, String tokenizers, String filters,
                       String analyzers, String fields,
                       String idField, String contentField,
-                      Long maximumDocumentLength) throws IOException {
+                      Long maxDocumentLength) throws IOException {
     this.path = Preconditions.checkNotNull(path);
     this.charfiltersInfo = parseAsMap(Preconditions.checkNotNull(charfilters));
     this.tokenizersInfo = parseAsMap(Preconditions.checkNotNull(tokenizers));
@@ -123,9 +136,9 @@ public class LuceneClient implements Clo
     this.fieldsInfo = parseAsMap(Preconditions.checkNotNull(fields));
     this.idField = Preconditions.checkNotNull(idField);
     this.contentField = Preconditions.checkNotNull(contentField);
-    this.maximumDocumentLength = Preconditions.checkNotNull(maximumDocumentLength);
+    this.maxDocumentLength = Preconditions.checkNotNull(maxDocumentLength);
 
-    this.versionString = createVersionString(path, charfiltersInfo, tokenizersInfo, filtersInfo, analyzersInfo, fieldsInfo, idField, contentField, maximumDocumentLength);
+    this.versionString = createVersionString(path, charfiltersInfo, tokenizersInfo, filtersInfo, analyzersInfo, fieldsInfo, idField, contentField, maxDocumentLength);
 
     Map<String,Analyzer> analyzersMap = createAnalyzersMap();
     Map<String,Analyzer> fieldIndexAnalyzers = createFieldAnalyzers(analyzersMap, ATTR_INDEX_ANALYZER);
@@ -221,7 +234,7 @@ public class LuceneClient implements Clo
   private Map<String,Analyzer> createFieldAnalyzers(Map<String,Analyzer> analyzersMap, String target) {
     Map<String,Analyzer> fieldAnalyzers = Maps.newHashMap();
     for (Map.Entry<String,Map<String,Object>> e : fieldsInfo.entrySet()) {
-      if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FIELDTYPE_TEXT)) {
+      if (e.getValue().get(ATTR_FIELDTYPE).toString().equals(FieldType.TEXT.toString())) {
         String field = e.getKey();
         String analyzer = e.getValue().get(target).toString();
         fieldAnalyzers.put(field, analyzersMap.get(analyzer));
@@ -256,8 +269,8 @@ public class LuceneClient implements Clo
     return contentField;
   }
 
-  public Long maximumDocumentLength() {
-    return maximumDocumentLength;
+  public Long maxDocumentLength() {
+    return maxDocumentLength;
   }
 
   public String versionString() {
@@ -272,7 +285,7 @@ public class LuceneClient implements Clo
     Map<String,Map<String,Object>> analyzersInfo,
     Map<String,Map<String,Object>> fieldsInfo,
     String idField,String contentField,
-    Long maximumDocumentLength) {
+    Long maxDocumentLength) {
     return LuceneConfig.PARAM_PATH + ":" + path.toString() + "+"
          + LuceneConfig.PARAM_CHARFILTERS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(charfiltersInfo) + "+"
          + LuceneConfig.PARAM_TOKENIZERS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(tokenizersInfo) + "+"
@@ -281,7 +294,7 @@ public class LuceneClient implements Clo
          + LuceneConfig.PARAM_FIELDS + ":" + Joiner.on(",").withKeyValueSeparator("=").join(fieldsInfo) + "+"
          + LuceneConfig.PARAM_IDFIELD + ":" + idField + "+"
          + LuceneConfig.PARAM_CONTENTFIELD + ":" + contentField + "+"
-         + LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH + ":" + maximumDocumentLength.toString();
+         + LuceneConfig.PARAM_MAXDOCUMENTLENGTH + ":" + maxDocumentLength.toString();
   }
 
   public void refresh() throws IOException {
@@ -427,12 +440,12 @@ public class LuceneClient implements Clo
   public static String defaultFields() {
     String fields =
         "{" + "\n"
-          + "  \"id\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
-          + "  \"cat\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
-          + "  \"author\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_STRING+"\", \""+ATTR_STORE+"\":true},"+ "\n"
-          + "  \"content\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":true,\""+ATTR_INDEX_ANALYZER+"\":\"text_general\",\""+ATTR_QUERY_ANALYZER+"\":\"text_general\",\""+ATTR_COPY_TO+"\":[\"content_ws\", \"content_ngram\"]}," + "\n"
-          + "  \"content_ws\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ws\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ws\"}," + "\n"
-          + "  \"content_ngram\":{\""+ATTR_FIELDTYPE+"\":\""+FIELDTYPE_TEXT+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ngram\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ngram\"}" + "\n"
+          + "  \"id\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+          + "  \"cat\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+          + "  \"author\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.STRING.toString()+"\", \""+ATTR_STORE+"\":true},"+ "\n"
+          + "  \"content\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":true,\""+ATTR_INDEX_ANALYZER+"\":\"text_general\",\""+ATTR_QUERY_ANALYZER+"\":\"text_general\",\""+ ATTR_TERM_VECTOR +"\":\""+ TermVector.WITH_POSITIONS_OFFSETS.toString() +"\",\""+ATTR_COPY_TO+"\":[\"content_ws\", \"content_ngram\"]}," + "\n"
+          + "  \"content_ws\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ws\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ws\"}," + "\n"
+          + "  \"content_ngram\":{\""+ATTR_FIELDTYPE+"\":\""+FieldType.TEXT.toString()+"\", \""+ATTR_STORE+"\":false,\""+ATTR_INDEX_ANALYZER+"\":\"text_ngram\",\""+ATTR_QUERY_ANALYZER+"\":\"text_ngram\"}" + "\n"
       + "}";
     return fields;
   }
@@ -445,7 +458,7 @@ public class LuceneClient implements Clo
     return "content";
   }
 
-  public static Long defaultMaximumDocumentLength() {
+  public static Long defaultMaxDocumentLength() {
     return new Long(700000000L);
   }
 

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneClientManager.java Sun Jul 12 03:30:17 2015
@@ -16,17 +16,17 @@ public class LuceneClientManager {
                       String charfilters, String tokenizers, String filters,
                       String analyzers, String fields,
                       String idField, String contentField,
-                      Long maximumDocumentLength) throws Exception
+                      Long maxDocumentLength) throws Exception
   {
     LuceneClient client = clients.get(path);
 
     if (client == null) {
-      return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maximumDocumentLength);
+      return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maxDocumentLength);
     }
 
     if (client != null) {
       if (!client.isOpen()) {
-        return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maximumDocumentLength);
+        return newClient(path, charfilters, tokenizers, filters, analyzers, fields, idField, contentField, maxDocumentLength);
       }
       String latestVersion = LuceneClient.createVersionString(
           new File(path).toPath(),
@@ -35,7 +35,7 @@ public class LuceneClientManager {
           LuceneClient.parseAsMap(filters),
           LuceneClient.parseAsMap(analyzers),
           LuceneClient.parseAsMap(fields),
-          idField, contentField, maximumDocumentLength);
+          idField, contentField, maxDocumentLength);
       String activeVersion = client.versionString();
       if (!activeVersion.equals(latestVersion)) {
         throw new IllegalStateException("The connection on this path is active. Can not update to the latest settings."
@@ -51,11 +51,11 @@ public class LuceneClientManager {
           String charfilters, String tokenizers, String filters,
           String analyzers, String fields,
           String idField, String contentField,
-          Long maximumDocumentLength) throws Exception
+          Long maxDocumentLength) throws Exception
   {
     LuceneClient client =  new LuceneClient(new File(path).toPath(),
                            charfilters, tokenizers, filters, analyzers, fields,
-                           idField, contentField, maximumDocumentLength);
+                           idField, contentField, maxDocumentLength);
     clients.put(path, client);
     return client;
   }

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConfig.java Sun Jul 12 03:30:17 2015
@@ -29,5 +29,5 @@ public class LuceneConfig
   public static final String PARAM_IDFIELD = "idfield";
   public static final String PARAM_CONTENTFIELD = "contentfield";
 
-  public static final String PARAM_MAXIMUMDOCUMENTLENGTH = "maximumdocumentlength";
+  public static final String PARAM_MAXDOCUMENTLENGTH = "maxdocumentlength";
 }

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneConnector.java Sun Jul 12 03:30:17 2015
@@ -17,9 +17,6 @@
 package org.apache.manifoldcf.agents.output.lucene;
 
 import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -162,16 +159,16 @@ public class LuceneConnector extends org
       if (contentField == null)
         throw new ManifoldCFException("content field not configured");
 
-      final String maxDocumentLength = params.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
-      if (maxDocumentLength == null)
-        throw new ManifoldCFException("maximum document length not configured");
-      Long maximumDocumentLength = new Long(maxDocumentLength);
+      final String maxDocLength = params.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+      if (maxDocLength == null)
+        throw new ManifoldCFException("max document length not configured");
+      Long maxDocumentLength = new Long(maxDocLength);
 
       try
       {
         client = LuceneClientManager.getClient(path,
                    charfilters, tokenizers, filters, analyzers, fields,
-                   idField, contentField, maximumDocumentLength);
+                   idField, contentField, maxDocumentLength);
       }
       catch (Exception e)
       {
@@ -279,7 +276,7 @@ public class LuceneConnector extends org
       long length, IOutputCheckActivity activities)
       throws ManifoldCFException, ServiceInterruption {
     getSession();
-    if (length > client.maximumDocumentLength())
+    if (length > client.maxDocumentLength())
       return false;
     return true;
   }
@@ -316,12 +313,14 @@ public class LuceneConnector extends org
    *             only if there's a stream error reading the document data.
    */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
-    throws ManifoldCFException, ServiceInterruption, IOException
+  public int addOrReplaceDocumentWithException(String documentURI,
+      VersionContext pipelineDescription, RepositoryDocument document,
+      String authorityNameString, IOutputAddActivity activities)
+      throws ManifoldCFException, ServiceInterruption, IOException
   {
     getSession();
 
-    if (client.maximumDocumentLength() != null && document.getBinaryLength() > client.maximumDocumentLength().longValue()){
+    if (document.getBinaryLength() > client.maxDocumentLength().longValue()){
       activities.recordActivity(null, INGEST_ACTIVITY, null, documentURI, activities.EXCLUDED_LENGTH, "Lucene connector rejected document due to its big size: ('"+document.getBinaryLength()+"')");
       return DOCUMENTSTATUS_REJECTED;
     }
@@ -349,17 +348,7 @@ public class LuceneConnector extends org
 
     try
     {
-      Reader r = new InputStreamReader(document.getBinaryStream(), StandardCharsets.UTF_8);
-      StringBuilder sb = new StringBuilder((int)document.getBinaryLength());
-      char[] buffer = new char[65536];
-      while (true)
-      {
-        int amt = r.read(buffer,0,buffer.length);
-        if (amt == -1)
-          break;
-        sb.append(buffer,0,amt);
-      }
-      doc = LuceneDocument.addField(doc, client.contentField(), sb.toString(), client.fieldsInfo());
+      doc = LuceneDocument.addField(doc, client.contentField(), document.getBinaryStream(), client.fieldsInfo());
     } catch (Exception e) {
       if (e instanceof IOException) {
         Logging.connectors.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer " + documentURI, e);
@@ -407,8 +396,9 @@ public class LuceneConnector extends org
    *            processing activity.
    */
   @Override
-  public void removeDocument(String documentURI, String outputDescription, IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption
+  public void removeDocument(String documentURI, String outputDescription,
+      IOutputRemoveActivity activities)
+      throws ManifoldCFException, ServiceInterruption
   {
     getSession();
 
@@ -533,10 +523,10 @@ public class LuceneConnector extends org
       contentField = LuceneClient.defaultContentField();
     map.put(LuceneConfig.PARAM_CONTENTFIELD, contentField);
 
-    String maximumDocumentLength = configParams.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
-    if (maximumDocumentLength == null)
-      maximumDocumentLength = LuceneClient.defaultMaximumDocumentLength().toString();
-    map.put(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH, maximumDocumentLength);
+    String maxDocumentLength = configParams.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+    if (maxDocumentLength == null)
+      maxDocumentLength = LuceneClient.defaultMaxDocumentLength().toString();
+    map.put(LuceneConfig.PARAM_MAXDOCUMENTLENGTH, maxDocumentLength);
 
     return map;
   }
@@ -595,9 +585,9 @@ public class LuceneConnector extends org
     String contentFields = variableContext.getParameter(LuceneConfig.PARAM_CONTENTFIELD);
     if (contentFields != null)
       parameters.setParameter(LuceneConfig.PARAM_CONTENTFIELD, contentFields);
-    String maximumDocumentLength = variableContext.getParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH);
-    if (maximumDocumentLength != null)
-      parameters.setParameter(LuceneConfig.PARAM_MAXIMUMDOCUMENTLENGTH, maximumDocumentLength);
+    String maxDocumentLength = variableContext.getParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH);
+    if (maxDocumentLength != null)
+      parameters.setParameter(LuceneConfig.PARAM_MAXDOCUMENTLENGTH, maxDocumentLength);
     return null;
   }
 

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/java/org/apache/manifoldcf/agents/output/lucene/LuceneDocument.java Sun Jul 12 03:30:17 2015
@@ -16,6 +16,12 @@
 */
 package org.apache.manifoldcf.agents.output.lucene;
 
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -26,64 +32,204 @@ import org.apache.lucene.document.Field.
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.util.BytesRef;
+import org.apache.manifoldcf.agents.output.lucene.LuceneClient.TermVector;
+
 import com.google.common.base.Objects;
+import com.google.common.io.ByteArrayDataInput;
+import com.google.common.io.ByteSource;
+import com.google.common.io.ByteStreams;
 
 public class LuceneDocument {
 
   private Document doc;
 
-  private static final FieldType TYPE_STORED_WITH_TV = new FieldType(TextField.TYPE_STORED);
+  private static final FieldType STORED = new FieldType();
+  static {
+    STORED.setOmitNorms(false);
+    STORED.setIndexOptions(IndexOptions.NONE);
+    STORED.setTokenized(false);
+    STORED.setStored(true);
+    STORED.freeze();
+  }
+
+  private static final FieldType STRING_NOT_STORED = new FieldType();
+  static {
+    STRING_NOT_STORED.setOmitNorms(true);
+    STRING_NOT_STORED.setIndexOptions(IndexOptions.DOCS);
+    STRING_NOT_STORED.setTokenized(false);
+    STRING_NOT_STORED.setStored(false);
+    STRING_NOT_STORED.freeze();
+  }
+
+  private static final FieldType TEXT_NOT_STORED = new FieldType();
+  static {
+    TEXT_NOT_STORED.setOmitNorms(false);
+    TEXT_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+    TEXT_NOT_STORED.setTokenized(true);
+    TEXT_NOT_STORED.setStored(false);
+    TEXT_NOT_STORED.freeze();
+  }
+
+  @Deprecated
+  private static final FieldType TEXT_STORED_WITH_TV = new FieldType(TextField.TYPE_STORED);
+  static {
+    TEXT_STORED_WITH_TV.setStoreTermVectors(true);
+    TEXT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
+    TEXT_STORED_WITH_TV.setStoreTermVectorPositions(true);
+    TEXT_STORED_WITH_TV.freeze();
+  }
+
+  @Deprecated
+  private static final FieldType TEXT_NOT_STORED_WITH_TV = new FieldType(TEXT_NOT_STORED);
+  static {
+    TEXT_NOT_STORED_WITH_TV.setStoreTermVectors(true);
+    TEXT_NOT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
+    TEXT_NOT_STORED_WITH_TV.setStoreTermVectorPositions(true);
+    TEXT_NOT_STORED_WITH_TV.freeze();
+  }
+
+  private static final FieldType TEXT_NOT_STORED_WITH_TV_YES = new FieldType(TEXT_NOT_STORED);
+  static {
+    TEXT_NOT_STORED_WITH_TV_YES.setStoreTermVectors(true);
+    TEXT_NOT_STORED_WITH_TV_YES.freeze();
+  }
+
+  private static final FieldType TEXT_NOT_STORED_WITH_TV_POSITIONS = new FieldType(TEXT_NOT_STORED);
+  static {
+    TEXT_NOT_STORED_WITH_TV_POSITIONS.setStoreTermVectors(true);
+    TEXT_NOT_STORED_WITH_TV_POSITIONS.setStoreTermVectorPositions(true);
+    TEXT_NOT_STORED_WITH_TV_POSITIONS.freeze();
+  }
+
+  private static final FieldType TEXT_NOT_STORED_WITH_TV_OFFSETS = new FieldType(TEXT_NOT_STORED);
   static {
-    TYPE_STORED_WITH_TV.setStoreTermVectors(true);
-    TYPE_STORED_WITH_TV.setStoreTermVectorOffsets(true);
-    TYPE_STORED_WITH_TV.setStoreTermVectorPositions(true);
-    TYPE_STORED_WITH_TV.freeze();
+    TEXT_NOT_STORED_WITH_TV_OFFSETS.setStoreTermVectors(true);
+    TEXT_NOT_STORED_WITH_TV_OFFSETS.setStoreTermVectorOffsets(true);
+    TEXT_NOT_STORED_WITH_TV_OFFSETS.freeze();
   }
 
-  private static final FieldType TYPE_NOT_STORED_WITH_TV = new FieldType(TextField.TYPE_NOT_STORED);
+  private static final FieldType TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS = new FieldType(TEXT_NOT_STORED);
   static {
-    TYPE_NOT_STORED_WITH_TV.setStoreTermVectors(true);
-    TYPE_NOT_STORED_WITH_TV.setStoreTermVectorOffsets(true);
-    TYPE_NOT_STORED_WITH_TV.setStoreTermVectorPositions(true);
-    TYPE_NOT_STORED_WITH_TV.freeze();
+    TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectors(true);
+    TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectorPositions(true);
+    TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.setStoreTermVectorOffsets(true);
+    TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS.freeze();
   }
 
   public LuceneDocument() {
     doc = new Document();
   }
 
+  @Deprecated
   public LuceneDocument addStringField(String name, String value, boolean store) {
     Store stored = (store) ? Field.Store.YES : Field.Store.NO;
     doc.add(new StringField(name, value, stored));
     return this;
   }
 
+  @Deprecated
   public LuceneDocument addTextField(String name, String value, boolean store) {
-    FieldType type = (store) ? TYPE_STORED_WITH_TV : TYPE_NOT_STORED_WITH_TV;
+    FieldType type = (store) ? TEXT_STORED_WITH_TV : TEXT_NOT_STORED_WITH_TV;
     doc.add(new Field(name, value, type));
     return this;
   }
 
+  public LuceneDocument addStringField(String name, BytesRef value) {
+    doc.add(new Field(name, value, STRING_NOT_STORED));
+    return this;
+  }
+
+  public LuceneDocument addTextField(String name, Reader value, String termvector) {
+    FieldType ftype = null;
+    if (termvector.equals(TermVector.NO.toString())) {
+      ftype = TEXT_NOT_STORED;
+    } else if (termvector.equals(TermVector.YES.toString())) {
+      ftype = TEXT_NOT_STORED_WITH_TV_YES;
+    } else if (termvector.equals(TermVector.WITH_POSITIONS.toString())) {
+      ftype = TEXT_NOT_STORED_WITH_TV_POSITIONS;
+    } else if (termvector.equals(TermVector.WITH_OFFSETS.toString())) {
+      ftype = TEXT_NOT_STORED_WITH_TV_OFFSETS;
+    } else if (termvector.equals(TermVector.WITH_POSITIONS_OFFSETS.toString())) {
+      ftype = TEXT_NOT_STORED_WITH_TV_POSITIONS_OFFSETS;
+    }
+    doc.add(new Field(name, value, ftype));
+    return this;
+  }
+
+  public LuceneDocument addStoredField(String name, BytesRef value) {
+    doc.add(new Field(name, value, STORED));
+    return this;
+  }
+
   public Document toDocument() {
     return doc;
   }
 
-  public static LuceneDocument addField(LuceneDocument from, String field, String value, Map<String,Map<String,Object>> fieldsInfo) {
+  @Deprecated
+  public static LuceneDocument addFieldDeprecated(LuceneDocument from, String field, String value, Map<String,Map<String,Object>> fieldsInfo) {
     String fieldtype = (String)fieldsInfo.get(field).get(LuceneClient.ATTR_FIELDTYPE);
     boolean store = (boolean)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_STORE), false);
 
-    if (fieldtype.equals(LuceneClient.FIELDTYPE_TEXT)) {
+    if (fieldtype.equals(LuceneClient.FieldType.TEXT.toString())) {
       from.addTextField(field, value, store);
-    } else if (fieldtype.equals(LuceneClient.FIELDTYPE_STRING)) {
+    } else if (fieldtype.equals(LuceneClient.FieldType.STRING.toString())) {
       from.addStringField(field, value, store);
     }
 
     @SuppressWarnings("unchecked")
     List<String> copyFields = (List<String>)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_COPY_TO), new ArrayList<String>());
     for (String tofield : copyFields) {
-      from = addField(from, tofield, value, fieldsInfo);
+      from = addFieldDeprecated(from, tofield, value, fieldsInfo);
     }
     return from;
   }
 
+  public static LuceneDocument addField(LuceneDocument from, String field, Object value, Map<String,Map<String,Object>> fieldsInfo) throws IOException {
+    String type = (String)fieldsInfo.get(field).get(LuceneClient.ATTR_FIELDTYPE);
+    boolean store = (boolean)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_STORE), false);
+    String termvector = (String)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_TERM_VECTOR), TermVector.NO.toString());
+    @SuppressWarnings("unchecked")
+    List<String> copyFields = (List<String>)Objects.firstNonNull(fieldsInfo.get(field).get(LuceneClient.ATTR_COPY_TO), new ArrayList<String>());
+
+    if (value instanceof InputStream) {
+      byte[] b = ByteStreams.toByteArray((InputStream)value);
+      BytesRef br = new BytesRef(b);
+
+      if (type.equals(LuceneClient.FieldType.TEXT.toString())) {
+        from.addTextField(field, ByteSource.wrap(BytesRef.deepCopyOf(br).bytes).asCharSource(StandardCharsets.UTF_8).openBufferedStream(), termvector);
+      } else if (type.equals(LuceneClient.FieldType.STRING.toString())) {
+        from.addStringField(field, BytesRef.deepCopyOf(br));
+      }
+      if (store) {
+        from.addStoredField(field, BytesRef.deepCopyOf(br));
+      }
+      for (String tofield : copyFields) {
+        InputStream toValue = new ByteArrayInputStream(BytesRef.deepCopyOf(br).bytes);
+        from = addField(from, tofield, toValue, fieldsInfo);
+      }
+    }
+
+    if (value instanceof String) {
+      byte[] b = value.toString().getBytes(StandardCharsets.UTF_8);
+      BytesRef br = new BytesRef(b);
+
+      if (type.equals(LuceneClient.FieldType.TEXT.toString())) {
+        from.addTextField(field, new StringReader(BytesRef.deepCopyOf(br).utf8ToString()), termvector);
+      } else if (type.equals(LuceneClient.FieldType.STRING.toString())) {
+        from.addStringField(field, BytesRef.deepCopyOf(br));
+      }
+      if (store) {
+        from.addStoredField(field, BytesRef.deepCopyOf(br));
+      }
+      for (String tofield : copyFields) {
+        String toValue = new String(BytesRef.deepCopyOf(br).bytes, StandardCharsets.UTF_8);
+        from = addField(from, tofield, toValue, fieldsInfo);
+      }
+    }
+
+    return from;
+  }
+
 }

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/lucene/common_en_US.properties Sun Jul 12 03:30:17 2015
@@ -25,7 +25,7 @@ LuceneConnector.Analyzers=Analyzers
 LuceneConnector.Fields=Fields
 LuceneConnector.Idfield=id field name
 LuceneConnector.Contentfield=content field name
-LuceneConnector.Maximumdocumentlength=Maximum document length
+LuceneConnector.Maxdocumentlength=Max document length
 
 
 LuceneConnector.PleaseSupplyValidPath=Path can't be empty. Please supply a valid path
@@ -36,6 +36,6 @@ LuceneConnector.PleaseSupplyValidAnalyze
 LuceneConnector.PleaseSupplyValidFields=Fields can't be empty. Please supply a valid fields
 LuceneConnector.PleaseSupplyValidIdfield=Idfield can't be empty. Please supply a valid idfield
 LuceneConnector.PleaseSupplyValidContentfield=Contentfield can't be empty. Please supply a valid contentfield
-LuceneConnector.PleaseSupplyValidMaximumdocumentlength=Maximum document length can't be empty. Please supply a valid maximum document length
-LuceneConnector.MaximumDocumentLengthMustBeAnInteger=Maximum document length must be an integer
+LuceneConnector.PleaseSupplyValidMaxdocumentlength=Max document length can't be empty. Please supply a valid max document length
+LuceneConnector.MaxDocumentLengthMustBeAnInteger=Max document length must be an integer
 

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration.js Sun Jul 12 03:30:17 2015
@@ -74,15 +74,15 @@ function checkConfig() {
       return false;
     }
   }
-  if (editconnection.maximumdocumentlength) {
-    if (editconnection.maximumdocumentlength.value == "") {
-      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaximumdocumentlength'))");
-      editconnection.maximumdocumentlength.focus();
+  if (editconnection.maxdocumentlength) {
+    if (editconnection.maxdocumentlength.value == "") {
+      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaxdocumentlength'))");
+      editconnection.maxdocumentlength.focus();
       return false;
     }
-    if (editconnection.maximumdocumentlength.value != "" && !isInteger(editconnection.maximumdocumentlength.value)) {
-      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaximumDocumentLengthMustBeAnInteger'))");
-      editconnection.maximumdocumentlength.focus();
+    if (editconnection.maxdocumentlength.value != "" && !isInteger(editconnection.maxdocumentlength.value)) {
+      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaxDocumentLengthMustBeAnInteger'))");
+      editconnection.maxdocumentlength.focus();
       return false;
     }
   }
@@ -154,17 +154,17 @@ function checkConfigForSave() {
       return false;
     }
   }
-  if (editconnection.maximumdocumentlength) {
-    if (editconnection.maximumdocumentlength.value == "") {
-      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaximumdocumentlength'))");
+  if (editconnection.maxdocumentlength) {
+    if (editconnection.maxdocumentlength.value == "") {
+      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.PleaseSupplyValidMaxdocumentlength'))");
       SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('LuceneConnector.Parameters'))");
-      editconnection.maximumdocumentlength.focus();
+      editconnection.maxdocumentlength.focus();
       return false;
     }
-    if (editconnection.maximumdocumentlength.value != "" && !isInteger(editconnection.maximumdocumentlength.value)) {
-      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaximumDocumentLengthMustBeAnInteger'))");
+    if (editconnection.maxdocumentlength.value != "" && !isInteger(editconnection.maxdocumentlength.value)) {
+      alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('LuceneConnector.MaxDocumentLengthMustBeAnInteger'))");
       SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('LuceneConnector.Parameters'))");
-      editconnection.maximumdocumentlength.focus();
+      editconnection.maxdocumentlength.focus();
       return false;
     }
   }

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/editConfiguration_Parameters.html Sun Jul 12 03:30:17 2015
@@ -84,10 +84,10 @@
   </tr>
   <tr>
     <td class="description">
-      $Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maximumdocumentlength'))
+      $Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maxdocumentlength'))
     </td>
-    <td class="value"><input name="maximumdocumentlength" type="text"
-      value="$Encoder.attributeEscape($MAXIMUMDOCUMENTLENGTH)" size="48" />
+    <td class="value"><input name="maxdocumentlength" type="text"
+      value="$Encoder.attributeEscape($MAXDOCUMENTLENGTH)" size="48" />
     </td>
   </tr>
 </table>
@@ -102,6 +102,6 @@
 <input type="hidden" name="fields" value="$Encoder.attributeEscape($FIELDS)" />
 <input type="hidden" name="idfield" value="$Encoder.attributeEscape($IDFIELD)" />
 <input type="hidden" name="contentfield" value="$Encoder.attributeEscape($CONTENTFIELD)" />
-<input type="hidden" name="maximumdocumentlength" value="$Encoder.attributeEscape($MAXIMUMDOCUMENTLENGTH)" />
+<input type="hidden" name="maxdocumentlength" value="$Encoder.attributeEscape($MAXDOCUMENTLENGTH)" />
 
 #end

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/main/resources/org/apache/manifoldcf/agents/output/lucene/viewConfiguration.html Sun Jul 12 03:30:17 2015
@@ -49,7 +49,7 @@
     <td class="value">$Encoder.bodyEscape($CONTENTFIELD)</td>
   </tr>
   <tr>
-    <td class="description">$Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maximumdocumentlength'))</td>
-    <td class="value">$Encoder.bodyEscape($MAXIMUMDOCUMENTLENGTH)</td>
+    <td class="description">$Encoder.bodyEscape($ResourceBundle.getString('LuceneConnector.Maxdocumentlength'))</td>
+    <td class="value">$Encoder.bodyEscape($MAXDOCUMENTLENGTH)</td>
   </tr>
 </table>
\ No newline at end of file

Modified: manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java?rev=1690423&r1=1690422&r2=1690423&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java (original)
+++ manifoldcf/branches/CONNECTORS-1219/connectors/lucene/connector/src/test/java/org/apache/manifoldcf/agents/output/lucene/tests/LuceneClientTest.java Sun Jul 12 03:30:17 2015
@@ -18,6 +18,8 @@ package org.apache.manifoldcf.agents.out
 
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
@@ -42,6 +44,7 @@ import org.junit.Before;
 import org.junit.Test;
 
 import com.google.common.base.StandardSystemProperty;
+import com.google.common.io.ByteSource;
 
 import static org.junit.Assert.*;
 import static org.hamcrest.CoreMatchers.*;
@@ -104,8 +107,8 @@ public class LuceneClientTest {
 
     IndexSearcher searcher = client.newSearcher();
     assertThat(searcher.count(new MatchAllDocsQuery()), is(0));
-    IndexSearcher realtimeSearcher = client.newRealtimeSearcher();
-    assertThat(realtimeSearcher.count(new MatchAllDocsQuery()), is(0));
+
+    assertThat(client.newRealtimeSearcher().count(new MatchAllDocsQuery()), is(0));
     client.close();
   }
 
@@ -115,12 +118,12 @@ public class LuceneClientTest {
 
     LuceneClient client1 =
       LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
-        LuceneClient.defaultIdField(), LuceneClient.defaultContentField(), LuceneClient.defaultMaximumDocumentLength());
+        LuceneClient.defaultIdField(), LuceneClient.defaultContentField(), LuceneClient.defaultMaxDocumentLength());
     assertThat(client1.isOpen(), is(true));
 
     LuceneClient client2 =
       LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
-        "id", "content", LuceneClient.defaultMaximumDocumentLength());
+        "id", "content", LuceneClient.defaultMaxDocumentLength());
     assertThat(client2.isOpen(), is(true));
 
     assertThat(client1, is(client2));
@@ -129,7 +132,7 @@ public class LuceneClientTest {
     try {
       client3 =
         LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
-          "dummy_id", "dummy_content", LuceneClient.defaultMaximumDocumentLength());
+          "dummy_id", "dummy_content", LuceneClient.defaultMaxDocumentLength());
       fail("Should not get here");
     } catch (Exception e) {
       assert e instanceof IllegalStateException;
@@ -142,7 +145,7 @@ public class LuceneClientTest {
 
     client3 =
       LuceneClientManager.getClient(path, LuceneClient.defaultCharfilters(), LuceneClient.defaultTokenizers(), LuceneClient.defaultFilters(), LuceneClient.defaultAnalyzers(), LuceneClient.defaultFields(),
-        "dummy_id", "dummy_content", LuceneClient.defaultMaximumDocumentLength());
+        "dummy_id", "dummy_content", LuceneClient.defaultMaxDocumentLength());
     assertThat(client3.isOpen(), is(true));
 
     assertThat(client3, not(client1));
@@ -154,43 +157,43 @@ public class LuceneClientTest {
     String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"addorreplace-index";
     try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
       // add
-      LuceneDocument doc1 = new LuceneDocument()
-        .addStringField(ID, "/repo/001", true)
-        .addTextField(CONTENT, "green", true);
+      LuceneDocument doc1 = new LuceneDocument();
+      doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+      doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap("green".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/001", doc1);
 
-      LuceneDocument doc2 = new LuceneDocument()
-        .addStringField(ID, "/repo/002", true)
-        .addTextField(CONTENT, "yellow", true);
+      LuceneDocument doc2 = new LuceneDocument();
+      doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+      doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap("yellow".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/002", doc2);
 
-     client.optimize();
-     IndexSearcher searcher = client.newSearcher();
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(1));
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(1));
-
-     // update
-     LuceneDocument updateDoc = new LuceneDocument()
-       .addStringField(ID, "/repo/001", true)
-       .addTextField(CONTENT, "yellow", true);
-     client.addOrReplace("/repo/001", updateDoc);
-
-     client.optimize();
-     searcher = client.newSearcher();
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
-
-     // add
-     LuceneDocument addDoc = new LuceneDocument()
-       .addStringField(ID, "/repo/100", true)
-       .addTextField(CONTENT, "red", true);
-     client.addOrReplace("/repo/100", addDoc);
-
-     client.optimize();
-     searcher = client.newSearcher();
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
-     assertThat(searcher.count(new TermQuery(new Term(CONTENT, "red"))), is(1));
+      client.optimize();
+      IndexSearcher searcher = client.newSearcher();
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(1));
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(1));
+
+      // update
+      LuceneDocument updateDoc = new LuceneDocument();
+      updateDoc = LuceneDocument.addField(updateDoc, ID, "/repo/001", client.fieldsInfo());
+      updateDoc = LuceneDocument.addField(updateDoc, CONTENT, ByteSource.wrap("yellow".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
+      client.addOrReplace("/repo/001", updateDoc);
+
+      client.optimize();
+      searcher = client.newSearcher();
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
+
+      // add
+      LuceneDocument addDoc = new LuceneDocument();
+      addDoc = LuceneDocument.addField(addDoc, ID, "/repo/100", client.fieldsInfo());
+      addDoc = LuceneDocument.addField(addDoc, CONTENT, ByteSource.wrap("red".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
+      client.addOrReplace("/repo/100", addDoc);
+
+      client.optimize();
+      searcher = client.newSearcher();
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "green"))), is(0));
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "yellow"))), is(2));
+      assertThat(searcher.count(new TermQuery(new Term(CONTENT, "red"))), is(1));
     }
   }
 
@@ -199,14 +202,14 @@ public class LuceneClientTest {
     String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"remove-index";
     try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
 
-      LuceneDocument doc1 = new LuceneDocument()
-        .addStringField(ID, "/repo/001", true)
-        .addTextField(CONTENT, "Apache", true);
+      LuceneDocument doc1 = new LuceneDocument();
+      doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+      doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap("Apache".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/001", doc1);
 
-      LuceneDocument doc2 = new LuceneDocument()
-        .addStringField(ID, "/repo/002", true)
-        .addTextField(CONTENT, "Apache", true);
+      LuceneDocument doc2 = new LuceneDocument();
+      doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+      doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap("Apache".getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/002", doc2);
 
       client.optimize();
@@ -227,21 +230,21 @@ public class LuceneClientTest {
     try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
 
       String content1 = "Apache ManifoldCF, Apache Lucene";
-      LuceneDocument doc1 = new LuceneDocument()
-        .addStringField(ID, "/repo/001", true)
-        .addTextField(CONTENT, content1, true)
-        .addTextField("content_ws", content1, false)
-        .addTextField("content_ngram", content1, false);
+      LuceneDocument doc1 = new LuceneDocument();
+      doc1 = LuceneDocument.addField(doc1, ID, "/repo/001", client.fieldsInfo());
+      doc1 = LuceneDocument.addField(doc1, CONTENT, ByteSource.wrap(content1.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/001", doc1);
 
-      LuceneDocument doc2 = new LuceneDocument()
-        .addStringField(ID, "/repo/002", true)
-        .addTextField(CONTENT, "This is stop word. apache software.", true);
+      String content2 = "This is stop word. apache software.";
+      LuceneDocument doc2 = new LuceneDocument();
+      doc2 = LuceneDocument.addField(doc2, ID, "/repo/002", client.fieldsInfo());
+      doc2 = LuceneDocument.addField(doc2, CONTENT, ByteSource.wrap(content2.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/002", doc2);
 
-      LuceneDocument doc3 = new LuceneDocument()
-        .addStringField(ID, "/repo/003", true)
-        .addTextField(CONTENT, "Apache Solr", true);
+      String content3 = "Apache Solr";
+      LuceneDocument doc3 = new LuceneDocument();
+      doc3 = LuceneDocument.addField(doc3, ID, "/repo/003", client.fieldsInfo());
+      doc3 = LuceneDocument.addField(doc3, CONTENT, ByteSource.wrap(content3.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace("/repo/003", doc3);
 
       client.optimize();
@@ -272,17 +275,23 @@ public class LuceneClientTest {
       }
       assertThat(client.reader().docFreq(new Term(CONTENT, br)), is(3));
 
+      assertThat(client.reader().getTermVector(docID, "content_ws"), is(nullValue()));
+      assertThat(client.reader().getTermVector(docID, "content_ngram"), is(nullValue()));
+
       hits = searcher.search(client.newQuery("id:\\/repo\\/003"), 1);
       Document storedDocument = searcher.doc(hits.scoreDocs[0].doc);
-      assertThat(storedDocument.getField(CONTENT).stringValue(), is("Apache Solr"));
+      assertThat(storedDocument.getField(CONTENT).binaryValue().utf8ToString(), is("Apache Solr"));
+      assertThat(storedDocument.getField(CONTENT).stringValue(), is(nullValue()));
 
       String nrt = "near-real-time";
-      LuceneDocument doc4 = new LuceneDocument()
-        .addStringField(ID, nrt, true);
+      LuceneDocument doc4 = new LuceneDocument();
+      doc4 = LuceneDocument.addField(doc4, ID, nrt, client.fieldsInfo());
+      doc4 = LuceneDocument.addField(doc4, CONTENT, ByteSource.wrap(nrt.getBytes(StandardCharsets.UTF_8)).openBufferedStream(), client.fieldsInfo());
       client.addOrReplace(nrt, doc4);
       ManifoldCF.sleep(1500L);
       assertThat(searcher.count(client.newQuery(ID+":"+nrt)), is(0));
-      assertThat(client.newSearcher().count(client.newQuery(ID+":"+nrt)), is(0));
+      IndexSearcher searcher2 = client.newSearcher();
+      assertThat(searcher2.count(client.newQuery(ID+":"+nrt)), is(0));
       assertThat(client.newRealtimeSearcher().count(client.newQuery(ID+":"+nrt)), is(1));
     }
   }
@@ -290,10 +299,14 @@ public class LuceneClientTest {
   @Test
   public void testIndexRepositoryDocument() throws IOException, ManifoldCFException {
     String documentURI = "file://dummy/rd";
+    String content = "Classification, categorization, and tagging using Lucene";
+
     RepositoryDocument rd = new RepositoryDocument();
     rd.addField("cat", "foo");
     rd.addField("author", new String[]{ "abe", "obama" });
-    rd.addField(CONTENT, "Classification, categorization, and tagging using Lucene");
+    byte[] b = content.getBytes(StandardCharsets.UTF_8);
+    InputStream in = ByteSource.wrap(b).openBufferedStream();
+    rd.setBinary(in, b.length);
 
     String path = testDir.getAbsolutePath()+sep+"tmp"+sep+"rd-index";
     try (LuceneClient client = new LuceneClient(new File(path).toPath())) {
@@ -301,6 +314,8 @@ public class LuceneClientTest {
 
       doc = LuceneDocument.addField(doc, client.idField(), documentURI, client.fieldsInfo());
 
+      doc = LuceneDocument.addField(doc, client.contentField(), rd.getBinaryStream(), client.fieldsInfo());
+
       Iterator<String> it = rd.getFields();
       while (it.hasNext()) {
         String rdField = it.next();